winex11.drv: Refcount the vulkan surface window.
[wine.git] / dlls / urlmon / uri.c
blob8de3161a7bf493e581f0ab13167aa9471396e0be
1 /*
2 * Copyright 2010 Jacek Caban for CodeWeavers
3 * Copyright 2010 Thomas Mullaly
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 #include <limits.h>
22 #include "urlmon_main.h"
23 #include "wine/debug.h"
25 #define NO_SHLWAPI_REG
26 #include "shlwapi.h"
28 #include "strsafe.h"
30 #define URI_DISPLAY_NO_ABSOLUTE_URI 0x1
31 #define URI_DISPLAY_NO_DEFAULT_PORT_AUTH 0x2
33 #define ALLOW_NULL_TERM_SCHEME 0x01
34 #define ALLOW_NULL_TERM_USER_NAME 0x02
35 #define ALLOW_NULL_TERM_PASSWORD 0x04
36 #define ALLOW_BRACKETLESS_IP_LITERAL 0x08
37 #define SKIP_IP_FUTURE_CHECK 0x10
38 #define IGNORE_PORT_DELIMITER 0x20
40 #define RAW_URI_FORCE_PORT_DISP 0x1
41 #define RAW_URI_CONVERT_TO_DOS_PATH 0x2
43 #define COMBINE_URI_FORCE_FLAG_USE 0x1
45 WINE_DEFAULT_DEBUG_CHANNEL(urlmon);
47 static const IID IID_IUriObj = {0x4b364760,0x9f51,0x11df,{0x98,0x1c,0x08,0x00,0x20,0x0c,0x9a,0x66}};
49 typedef struct {
50 IUri IUri_iface;
51 IUriBuilderFactory IUriBuilderFactory_iface;
52 IPersistStream IPersistStream_iface;
53 IMarshal IMarshal_iface;
55 LONG ref;
57 BSTR raw_uri;
59 /* Information about the canonicalized URI's buffer. */
60 WCHAR *canon_uri;
61 DWORD canon_size;
62 DWORD canon_len;
63 BOOL display_modifiers;
64 DWORD create_flags;
66 INT scheme_start;
67 DWORD scheme_len;
68 URL_SCHEME scheme_type;
70 INT userinfo_start;
71 DWORD userinfo_len;
72 INT userinfo_split;
74 INT host_start;
75 DWORD host_len;
76 Uri_HOST_TYPE host_type;
78 INT port_offset;
79 DWORD port;
80 BOOL has_port;
82 INT authority_start;
83 DWORD authority_len;
85 INT domain_offset;
87 INT path_start;
88 DWORD path_len;
89 INT extension_offset;
91 INT query_start;
92 DWORD query_len;
94 INT fragment_start;
95 DWORD fragment_len;
96 } Uri;
98 typedef struct {
99 IUriBuilder IUriBuilder_iface;
100 LONG ref;
102 Uri *uri;
103 DWORD modified_props;
105 WCHAR *fragment;
106 DWORD fragment_len;
108 WCHAR *host;
109 DWORD host_len;
111 WCHAR *password;
112 DWORD password_len;
114 WCHAR *path;
115 DWORD path_len;
117 BOOL has_port;
118 DWORD port;
120 WCHAR *query;
121 DWORD query_len;
123 WCHAR *scheme;
124 DWORD scheme_len;
126 WCHAR *username;
127 DWORD username_len;
128 } UriBuilder;
130 typedef struct {
131 const WCHAR *str;
132 DWORD len;
133 } h16;
135 typedef struct {
136 /* IPv6 addresses can hold up to 8 h16 components. */
137 h16 components[8];
138 DWORD h16_count;
140 /* An IPv6 can have 1 elision ("::"). */
141 const WCHAR *elision;
143 /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */
144 const WCHAR *ipv4;
145 DWORD ipv4_len;
147 INT components_size;
148 INT elision_size;
149 } ipv6_address;
151 typedef struct {
152 BSTR uri;
154 BOOL is_relative;
155 BOOL is_opaque;
156 BOOL has_implicit_scheme;
157 BOOL has_implicit_ip;
158 UINT implicit_ipv4;
159 BOOL must_have_path;
161 const WCHAR *scheme;
162 DWORD scheme_len;
163 URL_SCHEME scheme_type;
165 const WCHAR *username;
166 DWORD username_len;
168 const WCHAR *password;
169 DWORD password_len;
171 const WCHAR *host;
172 DWORD host_len;
173 Uri_HOST_TYPE host_type;
175 BOOL has_ipv6;
176 ipv6_address ipv6_address;
178 BOOL has_port;
179 const WCHAR *port;
180 DWORD port_len;
181 DWORD port_value;
183 const WCHAR *path;
184 DWORD path_len;
186 const WCHAR *query;
187 DWORD query_len;
189 const WCHAR *fragment;
190 DWORD fragment_len;
191 } parse_data;
193 static const CHAR hexDigits[] = "0123456789ABCDEF";
195 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */
196 static const struct {
197 URL_SCHEME scheme;
198 WCHAR scheme_name[16];
199 } recognized_schemes[] = {
200 {URL_SCHEME_FTP, {'f','t','p',0}},
201 {URL_SCHEME_HTTP, {'h','t','t','p',0}},
202 {URL_SCHEME_GOPHER, {'g','o','p','h','e','r',0}},
203 {URL_SCHEME_MAILTO, {'m','a','i','l','t','o',0}},
204 {URL_SCHEME_NEWS, {'n','e','w','s',0}},
205 {URL_SCHEME_NNTP, {'n','n','t','p',0}},
206 {URL_SCHEME_TELNET, {'t','e','l','n','e','t',0}},
207 {URL_SCHEME_WAIS, {'w','a','i','s',0}},
208 {URL_SCHEME_FILE, {'f','i','l','e',0}},
209 {URL_SCHEME_MK, {'m','k',0}},
210 {URL_SCHEME_HTTPS, {'h','t','t','p','s',0}},
211 {URL_SCHEME_SHELL, {'s','h','e','l','l',0}},
212 {URL_SCHEME_SNEWS, {'s','n','e','w','s',0}},
213 {URL_SCHEME_LOCAL, {'l','o','c','a','l',0}},
214 {URL_SCHEME_JAVASCRIPT, {'j','a','v','a','s','c','r','i','p','t',0}},
215 {URL_SCHEME_VBSCRIPT, {'v','b','s','c','r','i','p','t',0}},
216 {URL_SCHEME_ABOUT, {'a','b','o','u','t',0}},
217 {URL_SCHEME_RES, {'r','e','s',0}},
218 {URL_SCHEME_MSSHELLROOTED, {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}},
219 {URL_SCHEME_MSSHELLIDLIST, {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}},
220 {URL_SCHEME_MSHELP, {'h','c','p',0}},
221 {URL_SCHEME_WILDCARD, {'*',0}}
224 /* List of default ports Windows recognizes. */
225 static const struct {
226 URL_SCHEME scheme;
227 USHORT port;
228 } default_ports[] = {
229 {URL_SCHEME_FTP, 21},
230 {URL_SCHEME_HTTP, 80},
231 {URL_SCHEME_GOPHER, 70},
232 {URL_SCHEME_NNTP, 119},
233 {URL_SCHEME_TELNET, 23},
234 {URL_SCHEME_WAIS, 210},
235 {URL_SCHEME_HTTPS, 443},
238 /* List of 3-character top level domain names Windows seems to recognize.
239 * There might be more, but, these are the only ones I've found so far.
241 static const struct {
242 WCHAR tld_name[4];
243 } recognized_tlds[] = {
244 {{'c','o','m',0}},
245 {{'e','d','u',0}},
246 {{'g','o','v',0}},
247 {{'i','n','t',0}},
248 {{'m','i','l',0}},
249 {{'n','e','t',0}},
250 {{'o','r','g',0}}
253 static Uri *get_uri_obj(IUri *uri)
255 Uri *ret;
256 HRESULT hres;
258 hres = IUri_QueryInterface(uri, &IID_IUriObj, (void**)&ret);
259 return SUCCEEDED(hres) ? ret : NULL;
262 static inline BOOL is_alpha(WCHAR val) {
263 return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z'));
266 static inline BOOL is_num(WCHAR val) {
267 return (val >= '0' && val <= '9');
270 static inline BOOL is_drive_path(const WCHAR *str) {
271 return (is_alpha(str[0]) && (str[1] == ':' || str[1] == '|'));
274 static inline BOOL is_unc_path(const WCHAR *str) {
275 return (str[0] == '\\' && str[1] == '\\');
278 static inline BOOL is_forbidden_dos_path_char(WCHAR val) {
279 return (val == '>' || val == '<' || val == '\"');
282 /* A URI is implicitly a file path if it begins with
283 * a drive letter (e.g. X:) or starts with "\\" (UNC path).
285 static inline BOOL is_implicit_file_path(const WCHAR *str) {
286 return (is_unc_path(str) || (is_alpha(str[0]) && str[1] == ':'));
289 /* Checks if the URI is a hierarchical URI. A hierarchical
290 * URI is one that has "//" after the scheme.
292 static BOOL check_hierarchical(const WCHAR **ptr) {
293 const WCHAR *start = *ptr;
295 if(**ptr != '/')
296 return FALSE;
298 ++(*ptr);
299 if(**ptr != '/') {
300 *ptr = start;
301 return FALSE;
304 ++(*ptr);
305 return TRUE;
308 /* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */
309 static inline BOOL is_unreserved(WCHAR val) {
310 return (is_alpha(val) || is_num(val) || val == '-' || val == '.' ||
311 val == '_' || val == '~');
314 /* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
315 * / "*" / "+" / "," / ";" / "="
317 static inline BOOL is_subdelim(WCHAR val) {
318 return (val == '!' || val == '$' || val == '&' ||
319 val == '\'' || val == '(' || val == ')' ||
320 val == '*' || val == '+' || val == ',' ||
321 val == ';' || val == '=');
324 /* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" */
325 static inline BOOL is_gendelim(WCHAR val) {
326 return (val == ':' || val == '/' || val == '?' ||
327 val == '#' || val == '[' || val == ']' ||
328 val == '@');
331 /* Characters that delimit the end of the authority
332 * section of a URI. Sometimes a '\\' is considered
333 * an authority delimiter.
335 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) {
336 return (val == '#' || val == '/' || val == '?' ||
337 val == '\0' || (acceptSlash && val == '\\'));
340 /* reserved = gen-delims / sub-delims */
341 static inline BOOL is_reserved(WCHAR val) {
342 return (is_subdelim(val) || is_gendelim(val));
345 static inline BOOL is_hexdigit(WCHAR val) {
346 return ((val >= 'a' && val <= 'f') ||
347 (val >= 'A' && val <= 'F') ||
348 (val >= '0' && val <= '9'));
351 static inline BOOL is_path_delim(URL_SCHEME scheme, WCHAR val) {
352 return (!val || (val == '#' && scheme != URL_SCHEME_FILE) || val == '?');
355 static inline BOOL is_slash(WCHAR c)
357 return c == '/' || c == '\\';
360 static inline BOOL is_ascii(WCHAR c)
362 return c < 0x80;
365 static BOOL is_default_port(URL_SCHEME scheme, DWORD port) {
366 DWORD i;
368 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
369 if(default_ports[i].scheme == scheme && default_ports[i].port)
370 return TRUE;
373 return FALSE;
376 /* List of schemes types Windows seems to expect to be hierarchical. */
377 static inline BOOL is_hierarchical_scheme(URL_SCHEME type) {
378 return(type == URL_SCHEME_HTTP || type == URL_SCHEME_FTP ||
379 type == URL_SCHEME_GOPHER || type == URL_SCHEME_NNTP ||
380 type == URL_SCHEME_TELNET || type == URL_SCHEME_WAIS ||
381 type == URL_SCHEME_FILE || type == URL_SCHEME_HTTPS ||
382 type == URL_SCHEME_RES);
385 /* Checks if 'flags' contains an invalid combination of Uri_CREATE flags. */
386 static inline BOOL has_invalid_flag_combination(DWORD flags) {
387 return((flags & Uri_CREATE_DECODE_EXTRA_INFO && flags & Uri_CREATE_NO_DECODE_EXTRA_INFO) ||
388 (flags & Uri_CREATE_CANONICALIZE && flags & Uri_CREATE_NO_CANONICALIZE) ||
389 (flags & Uri_CREATE_CRACK_UNKNOWN_SCHEMES && flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES) ||
390 (flags & Uri_CREATE_PRE_PROCESS_HTML_URI && flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI) ||
391 (flags & Uri_CREATE_IE_SETTINGS && flags & Uri_CREATE_NO_IE_SETTINGS));
394 /* Applies each default Uri_CREATE flags to 'flags' if it
395 * doesn't cause a flag conflict.
397 static void apply_default_flags(DWORD *flags) {
398 if(!(*flags & Uri_CREATE_NO_CANONICALIZE))
399 *flags |= Uri_CREATE_CANONICALIZE;
400 if(!(*flags & Uri_CREATE_NO_DECODE_EXTRA_INFO))
401 *flags |= Uri_CREATE_DECODE_EXTRA_INFO;
402 if(!(*flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES))
403 *flags |= Uri_CREATE_CRACK_UNKNOWN_SCHEMES;
404 if(!(*flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI))
405 *flags |= Uri_CREATE_PRE_PROCESS_HTML_URI;
406 if(!(*flags & Uri_CREATE_IE_SETTINGS))
407 *flags |= Uri_CREATE_NO_IE_SETTINGS;
410 /* Determines if the URI is hierarchical using the information already parsed into
411 * data and using the current location of parsing in the URI string.
413 * Windows considers a URI hierarchical if one of the following is true:
414 * A.) It's a wildcard scheme.
415 * B.) It's an implicit file scheme.
416 * C.) It's a known hierarchical scheme and it has two '\\' after the scheme name.
417 * (the '\\' will be converted into "//" during canonicalization).
418 * D.) "//" appears after the scheme name (or at the beginning if no scheme is given).
420 static inline BOOL is_hierarchical_uri(const WCHAR **ptr, const parse_data *data) {
421 const WCHAR *start = *ptr;
423 if(data->scheme_type == URL_SCHEME_WILDCARD)
424 return TRUE;
425 else if(data->scheme_type == URL_SCHEME_FILE && data->has_implicit_scheme)
426 return TRUE;
427 else if(is_hierarchical_scheme(data->scheme_type) && (*ptr)[0] == '\\' && (*ptr)[1] == '\\') {
428 *ptr += 2;
429 return TRUE;
430 } else if(data->scheme_type != URL_SCHEME_MAILTO && check_hierarchical(ptr))
431 return TRUE;
433 *ptr = start;
434 return FALSE;
437 /* Computes the size of the given IPv6 address.
438 * Each h16 component is 16 bits. If there is an IPv4 address, it's
439 * 32 bits. If there's an elision it can be 16 to 128 bits, depending
440 * on the number of other components.
442 * Modeled after google-url's CheckIPv6ComponentsSize function
444 static void compute_ipv6_comps_size(ipv6_address *address) {
445 address->components_size = address->h16_count * 2;
447 if(address->ipv4)
448 /* IPv4 address is 4 bytes. */
449 address->components_size += 4;
451 if(address->elision) {
452 /* An elision can be anywhere from 2 bytes up to 16 bytes.
453 * Its size depends on the size of the h16 and IPv4 components.
455 address->elision_size = 16 - address->components_size;
456 if(address->elision_size < 2)
457 address->elision_size = 2;
458 } else
459 address->elision_size = 0;
462 /* Taken from dlls/jscript/lex.c */
463 static int hex_to_int(WCHAR val) {
464 if(val >= '0' && val <= '9')
465 return val - '0';
466 else if(val >= 'a' && val <= 'f')
467 return val - 'a' + 10;
468 else if(val >= 'A' && val <= 'F')
469 return val - 'A' + 10;
471 return -1;
474 /* Helper function for converting a percent encoded string
475 * representation of a WCHAR value into its actual WCHAR value. If
476 * the two characters following the '%' aren't valid hex values then
477 * this function returns the NULL character.
479 * E.g.
480 * "%2E" will result in '.' being returned by this function.
482 static WCHAR decode_pct_val(const WCHAR *ptr) {
483 WCHAR ret = '\0';
485 if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) {
486 INT a = hex_to_int(*(ptr + 1));
487 INT b = hex_to_int(*(ptr + 2));
489 ret = a << 4;
490 ret += b;
493 return ret;
496 /* Helper function for percent encoding a given character
497 * and storing the encoded value into a given buffer (dest).
499 * It's up to the calling function to ensure that there is
500 * at least enough space in 'dest' for the percent encoded
501 * value to be stored (so dest + 3 spaces available).
503 static inline void pct_encode_val(WCHAR val, WCHAR *dest) {
504 dest[0] = '%';
505 dest[1] = hexDigits[(val >> 4) & 0xf];
506 dest[2] = hexDigits[val & 0xf];
509 /* Attempts to parse the domain name from the host.
511 * This function also includes the Top-level Domain (TLD) name
512 * of the host when it tries to find the domain name. If it finds
513 * a valid domain name it will assign 'domain_start' the offset
514 * into 'host' where the domain name starts.
516 * It's implied that if there is a domain name its range is:
517 * [host+domain_start, host+host_len).
519 void find_domain_name(const WCHAR *host, DWORD host_len,
520 INT *domain_start) {
521 const WCHAR *last_tld, *sec_last_tld, *end;
523 end = host+host_len-1;
525 *domain_start = -1;
527 /* There has to be at least enough room for a '.' followed by a
528 * 3-character TLD for a domain to even exist in the host name.
530 if(host_len < 4)
531 return;
533 last_tld = memrchrW(host, '.', host_len);
534 if(!last_tld)
535 /* http://hostname -> has no domain name. */
536 return;
538 sec_last_tld = memrchrW(host, '.', last_tld-host);
539 if(!sec_last_tld) {
540 /* If the '.' is at the beginning of the host there
541 * has to be at least 3 characters in the TLD for it
542 * to be valid.
543 * Ex: .com -> .com as the domain name.
544 * .co -> has no domain name.
546 if(last_tld-host == 0) {
547 if(end-(last_tld-1) < 3)
548 return;
549 } else if(last_tld-host == 3) {
550 DWORD i;
552 /* If there are three characters in front of last_tld and
553 * they are on the list of recognized TLDs, then this
554 * host doesn't have a domain (since the host only contains
555 * a TLD name.
556 * Ex: edu.uk -> has no domain name.
557 * foo.uk -> foo.uk as the domain name.
559 for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
560 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3))
561 return;
563 } else if(last_tld-host < 3)
564 /* Anything less than 3 characters is considered part
565 * of the TLD name.
566 * Ex: ak.uk -> Has no domain name.
568 return;
570 /* Otherwise the domain name is the whole host name. */
571 *domain_start = 0;
572 } else if(end+1-last_tld > 3) {
573 /* If the last_tld has more than 3 characters, then it's automatically
574 * considered the TLD of the domain name.
575 * Ex: www.winehq.org.uk.test -> uk.test as the domain name.
577 *domain_start = (sec_last_tld+1)-host;
578 } else if(last_tld - (sec_last_tld+1) < 4) {
579 DWORD i;
580 /* If the sec_last_tld is 3 characters long it HAS to be on the list of
581 * recognized to still be considered part of the TLD name, otherwise
582 * it's considered the domain name.
583 * Ex: www.google.com.uk -> google.com.uk as the domain name.
584 * www.google.foo.uk -> foo.uk as the domain name.
586 if(last_tld - (sec_last_tld+1) == 3) {
587 for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
588 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) {
589 const WCHAR *domain = memrchrW(host, '.', sec_last_tld-host);
591 if(!domain)
592 *domain_start = 0;
593 else
594 *domain_start = (domain+1) - host;
595 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
596 (host+host_len)-(host+*domain_start)));
597 return;
601 *domain_start = (sec_last_tld+1)-host;
602 } else {
603 /* Since the sec_last_tld is less than 3 characters it's considered
604 * part of the TLD.
605 * Ex: www.google.fo.uk -> google.fo.uk as the domain name.
607 const WCHAR *domain = memrchrW(host, '.', sec_last_tld-host);
609 if(!domain)
610 *domain_start = 0;
611 else
612 *domain_start = (domain+1) - host;
614 } else {
615 /* The second to last TLD has more than 3 characters making it
616 * the domain name.
617 * Ex: www.google.test.us -> test.us as the domain name.
619 *domain_start = (sec_last_tld+1)-host;
622 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
623 (host+host_len)-(host+*domain_start)));
626 /* Removes the dot segments from a hierarchical URIs path component. This
627 * function performs the removal in place.
629 * This function returns the new length of the path string.
631 static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) {
632 WCHAR *out = path;
633 const WCHAR *in = out;
634 const WCHAR *end = out + path_len;
635 DWORD len;
637 while(in < end) {
638 /* Move the first path segment in the input buffer to the end of
639 * the output buffer, and any subsequent characters up to, including
640 * the next "/" character (if any) or the end of the input buffer.
642 while(in < end && !is_slash(*in))
643 *out++ = *in++;
644 if(in == end)
645 break;
646 *out++ = *in++;
648 while(in < end) {
649 if(*in != '.')
650 break;
652 /* Handle ending "/." */
653 if(in + 1 == end) {
654 ++in;
655 break;
658 /* Handle "/./" */
659 if(is_slash(in[1])) {
660 in += 2;
661 continue;
664 /* If we don't have "/../" or ending "/.." */
665 if(in[1] != '.' || (in + 2 != end && !is_slash(in[2])))
666 break;
668 /* Find the slash preceding out pointer and move out pointer to it */
669 if(out > path+1 && is_slash(*--out))
670 --out;
671 while(out > path && !is_slash(*(--out)));
672 if(is_slash(*out))
673 ++out;
674 in += 2;
675 if(in != end)
676 ++in;
680 len = out - path;
681 TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len,
682 debugstr_wn(path, len), len);
683 return len;
686 /* Attempts to find the file extension in a given path. */
687 static INT find_file_extension(const WCHAR *path, DWORD path_len) {
688 const WCHAR *end;
690 for(end = path+path_len-1; end >= path && *end != '/' && *end != '\\'; --end) {
691 if(*end == '.')
692 return end-path;
695 return -1;
698 /* Computes the location where the elision should occur in the IPv6
699 * address using the numerical values of each component stored in
700 * 'values'. If the address shouldn't contain an elision then 'index'
701 * is assigned -1 as its value. Otherwise 'index' will contain the
702 * starting index (into values) where the elision should be, and 'count'
703 * will contain the number of cells the elision covers.
705 * NOTES:
706 * Windows will expand an elision if the elision only represents one h16
707 * component of the address.
709 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
711 * If the IPv6 address contains an IPv4 address, the IPv4 address is also
712 * considered for being included as part of an elision if all its components
713 * are zeros.
715 * Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::]
717 static void compute_elision_location(const ipv6_address *address, const USHORT values[8],
718 INT *index, DWORD *count) {
719 DWORD i, max_len, cur_len;
720 INT max_index, cur_index;
722 max_len = cur_len = 0;
723 max_index = cur_index = -1;
724 for(i = 0; i < 8; ++i) {
725 BOOL check_ipv4 = (address->ipv4 && i == 6);
726 BOOL is_end = (check_ipv4 || i == 7);
728 if(check_ipv4) {
729 /* Check if the IPv4 address contains only zeros. */
730 if(values[i] == 0 && values[i+1] == 0) {
731 if(cur_index == -1)
732 cur_index = i;
734 cur_len += 2;
735 ++i;
737 } else if(values[i] == 0) {
738 if(cur_index == -1)
739 cur_index = i;
741 ++cur_len;
744 if(is_end || values[i] != 0) {
745 /* We only consider it for an elision if it's
746 * more than 1 component long.
748 if(cur_len > 1 && cur_len > max_len) {
749 /* Found the new elision location. */
750 max_len = cur_len;
751 max_index = cur_index;
754 /* Reset the current range for the next range of zeros. */
755 cur_index = -1;
756 cur_len = 0;
760 *index = max_index;
761 *count = max_len;
764 /* Removes all the leading and trailing white spaces or
765 * control characters from the URI and removes all control
766 * characters inside of the URI string.
768 static BSTR pre_process_uri(LPCWSTR uri) {
769 const WCHAR *start, *end, *ptr;
770 WCHAR *ptr2;
771 DWORD len;
772 BSTR ret;
774 start = uri;
775 /* Skip leading controls and whitespace. */
776 while(*start && (iscntrlW(*start) || isspaceW(*start))) ++start;
778 /* URI consisted only of control/whitespace. */
779 if(!*start)
780 return SysAllocStringLen(NULL, 0);
782 end = start + strlenW(start);
783 while(--end > start && (iscntrlW(*end) || isspaceW(*end)));
785 len = ++end - start;
786 for(ptr = start; ptr < end; ptr++) {
787 if(iscntrlW(*ptr))
788 len--;
791 ret = SysAllocStringLen(NULL, len);
792 if(!ret)
793 return NULL;
795 for(ptr = start, ptr2=ret; ptr < end; ptr++) {
796 if(!iscntrlW(*ptr))
797 *ptr2++ = *ptr;
800 return ret;
803 /* Converts the specified IPv4 address into an uint value.
805 * This function assumes that the IPv4 address has already been validated.
807 static UINT ipv4toui(const WCHAR *ip, DWORD len) {
808 UINT ret = 0;
809 DWORD comp_value = 0;
810 const WCHAR *ptr;
812 for(ptr = ip; ptr < ip+len; ++ptr) {
813 if(*ptr == '.') {
814 ret <<= 8;
815 ret += comp_value;
816 comp_value = 0;
817 } else
818 comp_value = comp_value*10 + (*ptr-'0');
821 ret <<= 8;
822 ret += comp_value;
824 return ret;
827 /* Converts an IPv4 address in numerical form into its fully qualified
828 * string form. This function returns the number of characters written
829 * to 'dest'. If 'dest' is NULL this function will return the number of
830 * characters that would have been written.
832 * It's up to the caller to ensure there's enough space in 'dest' for the
833 * address.
835 static DWORD ui2ipv4(WCHAR *dest, UINT address) {
836 static const WCHAR formatW[] =
837 {'%','u','.','%','u','.','%','u','.','%','u',0};
838 DWORD ret = 0;
839 UCHAR digits[4];
841 digits[0] = (address >> 24) & 0xff;
842 digits[1] = (address >> 16) & 0xff;
843 digits[2] = (address >> 8) & 0xff;
844 digits[3] = address & 0xff;
846 if(!dest) {
847 WCHAR tmp[16];
848 ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]);
849 } else
850 ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]);
852 return ret;
855 static DWORD ui2str(WCHAR *dest, UINT value) {
856 static const WCHAR formatW[] = {'%','u',0};
857 DWORD ret = 0;
859 if(!dest) {
860 WCHAR tmp[11];
861 ret = sprintfW(tmp, formatW, value);
862 } else
863 ret = sprintfW(dest, formatW, value);
865 return ret;
868 /* Converts a h16 component (from an IPv6 address) into its
869 * numerical value.
871 * This function assumes that the h16 component has already been validated.
873 static USHORT h16tous(h16 component) {
874 DWORD i;
875 USHORT ret = 0;
877 for(i = 0; i < component.len; ++i) {
878 ret <<= 4;
879 ret += hex_to_int(component.str[i]);
882 return ret;
885 /* Converts an IPv6 address into its 128 bits (16 bytes) numerical value.
887 * This function assumes that the ipv6_address has already been validated.
889 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) {
890 DWORD i, cur_component = 0;
891 BOOL already_passed_elision = FALSE;
893 for(i = 0; i < address->h16_count; ++i) {
894 if(address->elision) {
895 if(address->components[i].str > address->elision && !already_passed_elision) {
896 /* Means we just passed the elision and need to add its values to
897 * 'number' before we do anything else.
899 INT j;
900 for(j = 0; j < address->elision_size; j+=2)
901 number[cur_component++] = 0;
903 already_passed_elision = TRUE;
907 number[cur_component++] = h16tous(address->components[i]);
910 /* Case when the elision appears after the h16 components. */
911 if(!already_passed_elision && address->elision) {
912 INT j;
913 for(j = 0; j < address->elision_size; j+=2)
914 number[cur_component++] = 0;
917 if(address->ipv4) {
918 UINT value = ipv4toui(address->ipv4, address->ipv4_len);
920 if(cur_component != 6) {
921 ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component);
922 return FALSE;
925 number[cur_component++] = (value >> 16) & 0xffff;
926 number[cur_component] = value & 0xffff;
929 return TRUE;
932 /* Checks if the characters pointed to by 'ptr' are
933 * a percent encoded data octet.
935 * pct-encoded = "%" HEXDIG HEXDIG
937 static BOOL check_pct_encoded(const WCHAR **ptr) {
938 const WCHAR *start = *ptr;
940 if(**ptr != '%')
941 return FALSE;
943 ++(*ptr);
944 if(!is_hexdigit(**ptr)) {
945 *ptr = start;
946 return FALSE;
949 ++(*ptr);
950 if(!is_hexdigit(**ptr)) {
951 *ptr = start;
952 return FALSE;
955 ++(*ptr);
956 return TRUE;
959 /* dec-octet = DIGIT ; 0-9
960 * / %x31-39 DIGIT ; 10-99
961 * / "1" 2DIGIT ; 100-199
962 * / "2" %x30-34 DIGIT ; 200-249
963 * / "25" %x30-35 ; 250-255
965 static BOOL check_dec_octet(const WCHAR **ptr) {
966 const WCHAR *c1, *c2, *c3;
968 c1 = *ptr;
969 /* A dec-octet must be at least 1 digit long. */
970 if(*c1 < '0' || *c1 > '9')
971 return FALSE;
973 ++(*ptr);
975 c2 = *ptr;
976 /* Since the 1-digit requirement was met, it doesn't
977 * matter if this is a DIGIT value, it's considered a
978 * dec-octet.
980 if(*c2 < '0' || *c2 > '9')
981 return TRUE;
983 ++(*ptr);
985 c3 = *ptr;
986 /* Same explanation as above. */
987 if(*c3 < '0' || *c3 > '9')
988 return TRUE;
990 /* Anything > 255 isn't a valid IP dec-octet. */
991 if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') {
992 *ptr = c1;
993 return FALSE;
996 ++(*ptr);
997 return TRUE;
1000 /* Checks if there is an implicit IPv4 address in the host component of the URI.
1001 * The max value of an implicit IPv4 address is UINT_MAX.
1003 * Ex:
1004 * "234567" would be considered an implicit IPv4 address.
1006 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) {
1007 const WCHAR *start = *ptr;
1008 ULONGLONG ret = 0;
1009 *val = 0;
1011 while(is_num(**ptr)) {
1012 ret = ret*10 + (**ptr - '0');
1014 if(ret > UINT_MAX) {
1015 *ptr = start;
1016 return FALSE;
1018 ++(*ptr);
1021 if(*ptr == start)
1022 return FALSE;
1024 *val = ret;
1025 return TRUE;
1028 /* Checks if the string contains an IPv4 address.
1030 * This function has a strict mode or a non-strict mode of operation
1031 * When 'strict' is set to FALSE this function will return TRUE if
1032 * the string contains at least 'dec-octet "." dec-octet' since partial
1033 * IPv4 addresses will be normalized out into full IPv4 addresses. When
1034 * 'strict' is set this function expects there to be a full IPv4 address.
1036 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1038 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) {
1039 const WCHAR *start = *ptr;
1041 if(!check_dec_octet(ptr)) {
1042 *ptr = start;
1043 return FALSE;
1046 if(**ptr != '.') {
1047 *ptr = start;
1048 return FALSE;
1051 ++(*ptr);
1052 if(!check_dec_octet(ptr)) {
1053 *ptr = start;
1054 return FALSE;
1057 if(**ptr != '.') {
1058 if(strict) {
1059 *ptr = start;
1060 return FALSE;
1061 } else
1062 return TRUE;
1065 ++(*ptr);
1066 if(!check_dec_octet(ptr)) {
1067 *ptr = start;
1068 return FALSE;
1071 if(**ptr != '.') {
1072 if(strict) {
1073 *ptr = start;
1074 return FALSE;
1075 } else
1076 return TRUE;
1079 ++(*ptr);
1080 if(!check_dec_octet(ptr)) {
1081 *ptr = start;
1082 return FALSE;
1085 /* Found a four digit ip address. */
1086 return TRUE;
1088 /* Tries to parse the scheme name of the URI.
1090 * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896.
1091 * NOTE: Windows accepts a number as the first character of a scheme.
1093 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data, DWORD extras) {
1094 const WCHAR *start = *ptr;
1096 data->scheme = NULL;
1097 data->scheme_len = 0;
1099 while(**ptr) {
1100 if(**ptr == '*' && *ptr == start) {
1101 /* Might have found a wildcard scheme. If it is the next
1102 * char has to be a ':' for it to be a valid URI
1104 ++(*ptr);
1105 break;
1106 } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' &&
1107 **ptr != '-' && **ptr != '.')
1108 break;
1110 (*ptr)++;
1113 if(*ptr == start)
1114 return FALSE;
1116 /* Schemes must end with a ':' */
1117 if(**ptr != ':' && !((extras & ALLOW_NULL_TERM_SCHEME) && !**ptr)) {
1118 *ptr = start;
1119 return FALSE;
1122 data->scheme = start;
1123 data->scheme_len = *ptr - start;
1125 ++(*ptr);
1126 return TRUE;
1129 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores
1130 * the deduced URL_SCHEME in data->scheme_type.
1132 static BOOL parse_scheme_type(parse_data *data) {
1133 /* If there's scheme data then see if it's a recognized scheme. */
1134 if(data->scheme && data->scheme_len) {
1135 DWORD i;
1137 for(i = 0; i < sizeof(recognized_schemes)/sizeof(recognized_schemes[0]); ++i) {
1138 if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) {
1139 /* Has to be a case insensitive compare. */
1140 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) {
1141 data->scheme_type = recognized_schemes[i].scheme;
1142 return TRUE;
1147 /* If we get here it means it's not a recognized scheme. */
1148 data->scheme_type = URL_SCHEME_UNKNOWN;
1149 return TRUE;
1150 } else if(data->is_relative) {
1151 /* Relative URI's have no scheme. */
1152 data->scheme_type = URL_SCHEME_UNKNOWN;
1153 return TRUE;
1154 } else {
1155 /* Should never reach here! what happened... */
1156 FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri));
1157 return FALSE;
1161 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't
1162 * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type
1163 * using the flags specified in 'flags' (if any). Flags that affect how this function
1164 * operates are the Uri_CREATE_ALLOW_* flags.
1166 * All parsed/deduced information will be stored in 'data' when the function returns.
1168 * Returns TRUE if it was able to successfully parse the information.
1170 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1171 static const WCHAR fileW[] = {'f','i','l','e',0};
1172 static const WCHAR wildcardW[] = {'*',0};
1174 /* First check to see if the uri could implicitly be a file path. */
1175 if(is_implicit_file_path(*ptr)) {
1176 if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) {
1177 data->scheme = fileW;
1178 data->scheme_len = lstrlenW(fileW);
1179 data->has_implicit_scheme = TRUE;
1181 TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags);
1182 } else {
1183 /* Windows does not consider anything that can implicitly be a file
1184 * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set...
1186 TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n",
1187 ptr, data, flags);
1188 return FALSE;
1190 } else if(!parse_scheme_name(ptr, data, extras)) {
1191 /* No scheme was found, this means it could be:
1192 * a) an implicit Wildcard scheme
1193 * b) a relative URI
1194 * c) an invalid URI.
1196 if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) {
1197 data->scheme = wildcardW;
1198 data->scheme_len = lstrlenW(wildcardW);
1199 data->has_implicit_scheme = TRUE;
1201 TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags);
1202 } else if (flags & Uri_CREATE_ALLOW_RELATIVE) {
1203 data->is_relative = TRUE;
1204 TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags);
1205 } else {
1206 TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags);
1207 return FALSE;
1211 if(!data->is_relative)
1212 TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags,
1213 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len);
1215 if(!parse_scheme_type(data))
1216 return FALSE;
1218 TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type);
1219 return TRUE;
1222 static BOOL parse_username(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1223 data->username = *ptr;
1225 while(**ptr != ':' && **ptr != '@') {
1226 if(**ptr == '%') {
1227 if(!check_pct_encoded(ptr)) {
1228 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1229 *ptr = data->username;
1230 data->username = NULL;
1231 return FALSE;
1233 } else
1234 continue;
1235 } else if(extras & ALLOW_NULL_TERM_USER_NAME && !**ptr)
1236 break;
1237 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1238 *ptr = data->username;
1239 data->username = NULL;
1240 return FALSE;
1243 ++(*ptr);
1246 data->username_len = *ptr - data->username;
1247 return TRUE;
1250 static BOOL parse_password(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1251 data->password = *ptr;
1253 while(**ptr != '@') {
1254 if(**ptr == '%') {
1255 if(!check_pct_encoded(ptr)) {
1256 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1257 *ptr = data->password;
1258 data->password = NULL;
1259 return FALSE;
1261 } else
1262 continue;
1263 } else if(extras & ALLOW_NULL_TERM_PASSWORD && !**ptr)
1264 break;
1265 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1266 *ptr = data->password;
1267 data->password = NULL;
1268 return FALSE;
1271 ++(*ptr);
1274 data->password_len = *ptr - data->password;
1275 return TRUE;
1278 /* Parses the userinfo part of the URI (if it exists). The userinfo field of
1279 * a URI can consist of "username:password@", or just "username@".
1281 * RFC def:
1282 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
1284 * NOTES:
1285 * 1) If there is more than one ':' in the userinfo part of the URI Windows
1286 * uses the first occurrence of ':' to delimit the username and password
1287 * components.
1289 * ex:
1290 * ftp://user:pass:word@winehq.org
1292 * would yield "user" as the username and "pass:word" as the password.
1294 * 2) Windows allows any character to appear in the "userinfo" part of
1295 * a URI, as long as it's not an authority delimiter character set.
1297 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) {
1298 const WCHAR *start = *ptr;
1300 if(!parse_username(ptr, data, flags, 0)) {
1301 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1302 return;
1305 if(**ptr == ':') {
1306 ++(*ptr);
1307 if(!parse_password(ptr, data, flags, 0)) {
1308 *ptr = start;
1309 data->username = NULL;
1310 data->username_len = 0;
1311 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1312 return;
1316 if(**ptr != '@') {
1317 *ptr = start;
1318 data->username = NULL;
1319 data->username_len = 0;
1320 data->password = NULL;
1321 data->password_len = 0;
1323 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1324 return;
1327 if(data->username)
1328 TRACE("(%p %p %x): Found username %s len=%d.\n", ptr, data, flags,
1329 debugstr_wn(data->username, data->username_len), data->username_len);
1331 if(data->password)
1332 TRACE("(%p %p %x): Found password %s len=%d.\n", ptr, data, flags,
1333 debugstr_wn(data->password, data->password_len), data->password_len);
1335 ++(*ptr);
1338 /* Attempts to parse a port from the URI.
1340 * NOTES:
1341 * Windows seems to have a cap on what the maximum value
1342 * for a port can be. The max value is USHORT_MAX.
1344 * port = *DIGIT
1346 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) {
1347 UINT port = 0;
1348 data->port = *ptr;
1350 while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1351 if(!is_num(**ptr)) {
1352 *ptr = data->port;
1353 data->port = NULL;
1354 return FALSE;
1357 port = port*10 + (**ptr-'0');
1359 if(port > USHRT_MAX) {
1360 *ptr = data->port;
1361 data->port = NULL;
1362 return FALSE;
1365 ++(*ptr);
1368 data->has_port = TRUE;
1369 data->port_value = port;
1370 data->port_len = *ptr - data->port;
1372 TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags,
1373 debugstr_wn(data->port, data->port_len), data->port_len, data->port_value);
1374 return TRUE;
1377 /* Attempts to parse a IPv4 address from the URI.
1379 * NOTES:
1380 * Windows normalizes IPv4 addresses, This means there are three
1381 * possibilities for the URI to contain an IPv4 address.
1382 * 1) A well formed address (ex. 192.2.2.2).
1383 * 2) A partially formed address. For example "192.0" would
1384 * normalize to "192.0.0.0" during canonicalization.
1385 * 3) An implicit IPv4 address. For example "256" would
1386 * normalize to "0.0.1.0" during canonicalization. Also
1387 * note that the maximum value for an implicit IP address
1388 * is UINT_MAX, if the value in the URI exceeds this then
1389 * it is not considered an IPv4 address.
1391 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1392 const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN;
1393 data->host = *ptr;
1395 if(!check_ipv4address(ptr, FALSE)) {
1396 if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) {
1397 TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n",
1398 ptr, data, flags);
1399 *ptr = data->host;
1400 data->host = NULL;
1401 return FALSE;
1402 } else
1403 data->has_implicit_ip = TRUE;
1406 data->host_len = *ptr - data->host;
1407 data->host_type = Uri_HOST_IPV4;
1409 /* Check if what we found is the only part of the host name (if it isn't
1410 * we don't have an IPv4 address).
1412 if(**ptr == ':') {
1413 ++(*ptr);
1414 if(!parse_port(ptr, data, flags)) {
1415 *ptr = data->host;
1416 data->host = NULL;
1417 return FALSE;
1419 } else if(!is_auth_delim(**ptr, !is_unknown)) {
1420 /* Found more data which belongs to the host, so this isn't an IPv4. */
1421 *ptr = data->host;
1422 data->host = NULL;
1423 data->has_implicit_ip = FALSE;
1424 return FALSE;
1427 TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n",
1428 ptr, data, flags, debugstr_wn(data->host, data->host_len),
1429 data->host_len, data->host_type);
1430 return TRUE;
1433 /* Attempts to parse the reg-name from the URI.
1435 * Because of the way Windows handles ':' this function also
1436 * handles parsing the port.
1438 * reg-name = *( unreserved / pct-encoded / sub-delims )
1440 * NOTE:
1441 * Windows allows everything, but, the characters in "auth_delims" and ':'
1442 * to appear in a reg-name, unless it's an unknown scheme type then ':' is
1443 * allowed to appear (even if a valid port isn't after it).
1445 * Windows doesn't like host names which start with '[' and end with ']'
1446 * and don't contain a valid IP literal address in between them.
1448 * On Windows if a '[' is encountered in the host name the ':' no longer
1449 * counts as a delimiter until you reach the next ']' or an "authority delimiter".
1451 * A reg-name CAN be empty.
1453 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1454 const BOOL has_start_bracket = **ptr == '[';
1455 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1456 const BOOL is_res = data->scheme_type == URL_SCHEME_RES;
1457 BOOL inside_brackets = has_start_bracket;
1459 /* res URIs don't have ports. */
1460 BOOL ignore_col = (extras & IGNORE_PORT_DELIMITER) || is_res;
1462 /* We have to be careful with file schemes. */
1463 if(data->scheme_type == URL_SCHEME_FILE) {
1464 /* This is because an implicit file scheme could be "C:\\test" and it
1465 * would trick this function into thinking the host is "C", when after
1466 * canonicalization the host would end up being an empty string. A drive
1467 * path can also have a '|' instead of a ':' after the drive letter.
1469 if(is_drive_path(*ptr)) {
1470 /* Regular old drive paths have no host type (or host name). */
1471 data->host_type = Uri_HOST_UNKNOWN;
1472 data->host = *ptr;
1473 data->host_len = 0;
1474 return TRUE;
1475 } else if(is_unc_path(*ptr))
1476 /* Skip past the "\\" of a UNC path. */
1477 *ptr += 2;
1480 data->host = *ptr;
1482 /* For res URIs, everything before the first '/' is
1483 * considered the host.
1485 while((!is_res && !is_auth_delim(**ptr, known_scheme)) ||
1486 (is_res && **ptr && **ptr != '/')) {
1487 if(**ptr == ':' && !ignore_col) {
1488 /* We can ignore ':' if we are inside brackets.*/
1489 if(!inside_brackets) {
1490 const WCHAR *tmp = (*ptr)++;
1492 /* Attempt to parse the port. */
1493 if(!parse_port(ptr, data, flags)) {
1494 /* Windows expects there to be a valid port for known scheme types. */
1495 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1496 *ptr = data->host;
1497 data->host = NULL;
1498 TRACE("(%p %p %x %x): Expected valid port\n", ptr, data, flags, extras);
1499 return FALSE;
1500 } else
1501 /* Windows gives up on trying to parse a port when it
1502 * encounters an invalid port.
1504 ignore_col = TRUE;
1505 } else {
1506 data->host_len = tmp - data->host;
1507 break;
1510 } else if(**ptr == '%' && (known_scheme && !is_res)) {
1511 /* Has to be a legit % encoded value. */
1512 if(!check_pct_encoded(ptr)) {
1513 *ptr = data->host;
1514 data->host = NULL;
1515 return FALSE;
1516 } else
1517 continue;
1518 } else if(is_res && is_forbidden_dos_path_char(**ptr)) {
1519 *ptr = data->host;
1520 data->host = NULL;
1521 return FALSE;
1522 } else if(**ptr == ']')
1523 inside_brackets = FALSE;
1524 else if(**ptr == '[')
1525 inside_brackets = TRUE;
1527 ++(*ptr);
1530 if(has_start_bracket) {
1531 /* Make sure the last character of the host wasn't a ']'. */
1532 if(*(*ptr-1) == ']') {
1533 TRACE("(%p %p %x %x): Expected an IP literal inside of the host\n",
1534 ptr, data, flags, extras);
1535 *ptr = data->host;
1536 data->host = NULL;
1537 return FALSE;
1541 /* Don't overwrite our length if we found a port earlier. */
1542 if(!data->port)
1543 data->host_len = *ptr - data->host;
1545 /* If the host is empty, then it's an unknown host type. */
1546 if(data->host_len == 0 || is_res)
1547 data->host_type = Uri_HOST_UNKNOWN;
1548 else
1549 data->host_type = Uri_HOST_DNS;
1551 TRACE("(%p %p %x %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags, extras,
1552 debugstr_wn(data->host, data->host_len), data->host_len);
1553 return TRUE;
1556 /* Attempts to parse an IPv6 address out of the URI.
1558 * IPv6address = 6( h16 ":" ) ls32
1559 * / "::" 5( h16 ":" ) ls32
1560 * / [ h16 ] "::" 4( h16 ":" ) ls32
1561 * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1562 * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1563 * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1564 * / [ *4( h16 ":" ) h16 ] "::" ls32
1565 * / [ *5( h16 ":" ) h16 ] "::" h16
1566 * / [ *6( h16 ":" ) h16 ] "::"
1568 * ls32 = ( h16 ":" h16 ) / IPv4address
1569 * ; least-significant 32 bits of address.
1571 * h16 = 1*4HEXDIG
1572 * ; 16 bits of address represented in hexadecimal.
1574 * Modeled after google-url's 'DoParseIPv6' function.
1576 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1577 const WCHAR *start, *cur_start;
1578 ipv6_address ip;
1580 start = cur_start = *ptr;
1581 memset(&ip, 0, sizeof(ipv6_address));
1583 for(;; ++(*ptr)) {
1584 /* Check if we're on the last character of the host. */
1585 BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)
1586 || **ptr == ']');
1588 BOOL is_split = (**ptr == ':');
1589 BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':');
1591 /* Check if we're at the end of a component, or
1592 * if we're at the end of the IPv6 address.
1594 if(is_split || is_end) {
1595 DWORD cur_len = 0;
1597 cur_len = *ptr - cur_start;
1599 /* h16 can't have a length > 4. */
1600 if(cur_len > 4) {
1601 *ptr = start;
1603 TRACE("(%p %p %x): h16 component to long.\n",
1604 ptr, data, flags);
1605 return FALSE;
1608 if(cur_len == 0) {
1609 /* An h16 component can't have the length of 0 unless
1610 * the elision is at the beginning of the address, or
1611 * at the end of the address.
1613 if(!((*ptr == start && is_elision) ||
1614 (is_end && (*ptr-2) == ip.elision))) {
1615 *ptr = start;
1616 TRACE("(%p %p %x): IPv6 component cannot have a length of 0.\n",
1617 ptr, data, flags);
1618 return FALSE;
1622 if(cur_len > 0) {
1623 /* An IPv6 address can have no more than 8 h16 components. */
1624 if(ip.h16_count >= 8) {
1625 *ptr = start;
1626 TRACE("(%p %p %x): Not a IPv6 address, too many h16 components.\n",
1627 ptr, data, flags);
1628 return FALSE;
1631 ip.components[ip.h16_count].str = cur_start;
1632 ip.components[ip.h16_count].len = cur_len;
1634 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n",
1635 ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len,
1636 ip.h16_count);
1637 ++ip.h16_count;
1641 if(is_end)
1642 break;
1644 if(is_elision) {
1645 /* A IPv6 address can only have 1 elision ('::'). */
1646 if(ip.elision) {
1647 *ptr = start;
1649 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n",
1650 ptr, data, flags);
1651 return FALSE;
1654 ip.elision = *ptr;
1655 ++(*ptr);
1658 if(is_split)
1659 cur_start = *ptr+1;
1660 else {
1661 if(!check_ipv4address(ptr, TRUE)) {
1662 if(!is_hexdigit(**ptr)) {
1663 /* Not a valid character for an IPv6 address. */
1664 *ptr = start;
1665 return FALSE;
1667 } else {
1668 /* Found an IPv4 address. */
1669 ip.ipv4 = cur_start;
1670 ip.ipv4_len = *ptr - cur_start;
1672 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n",
1673 ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len),
1674 ip.ipv4_len);
1676 /* IPv4 addresses can only appear at the end of a IPv6. */
1677 break;
1682 compute_ipv6_comps_size(&ip);
1684 /* Make sure the IPv6 address adds up to 16 bytes. */
1685 if(ip.components_size + ip.elision_size != 16) {
1686 *ptr = start;
1687 TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n",
1688 ptr, data, flags);
1689 return FALSE;
1692 if(ip.elision_size == 2) {
1693 /* For some reason on Windows if an elision that represents
1694 * only one h16 component is encountered at the very begin or
1695 * end of an IPv6 address, Windows does not consider it a
1696 * valid IPv6 address.
1698 * Ex: [::2:3:4:5:6:7] is not valid, even though the sum
1699 * of all the components == 128bits.
1701 if(ip.elision < ip.components[0].str ||
1702 ip.elision > ip.components[ip.h16_count-1].str) {
1703 *ptr = start;
1704 TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n",
1705 ptr, data, flags);
1706 return FALSE;
1710 data->host_type = Uri_HOST_IPV6;
1711 data->has_ipv6 = TRUE;
1712 data->ipv6_address = ip;
1714 TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n",
1715 ptr, data, flags, debugstr_wn(start, *ptr-start),
1716 (int)(*ptr-start));
1717 return TRUE;
1720 /* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */
1721 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) {
1722 const WCHAR *start = *ptr;
1724 /* IPvFuture has to start with a 'v' or 'V'. */
1725 if(**ptr != 'v' && **ptr != 'V')
1726 return FALSE;
1728 /* Following the v there must be at least 1 hex digit. */
1729 ++(*ptr);
1730 if(!is_hexdigit(**ptr)) {
1731 *ptr = start;
1732 return FALSE;
1735 ++(*ptr);
1736 while(is_hexdigit(**ptr))
1737 ++(*ptr);
1739 /* End of the hexdigit sequence must be a '.' */
1740 if(**ptr != '.') {
1741 *ptr = start;
1742 return FALSE;
1745 ++(*ptr);
1746 if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') {
1747 *ptr = start;
1748 return FALSE;
1751 ++(*ptr);
1752 while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':')
1753 ++(*ptr);
1755 data->host_type = Uri_HOST_UNKNOWN;
1757 TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags,
1758 debugstr_wn(start, *ptr-start), (int)(*ptr-start));
1760 return TRUE;
1763 /* IP-literal = "[" ( IPv6address / IPvFuture ) "]" */
1764 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1765 data->host = *ptr;
1767 if(**ptr != '[' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) {
1768 data->host = NULL;
1769 return FALSE;
1770 } else if(**ptr == '[')
1771 ++(*ptr);
1773 if(!parse_ipv6address(ptr, data, flags)) {
1774 if(extras & SKIP_IP_FUTURE_CHECK || !parse_ipvfuture(ptr, data, flags)) {
1775 *ptr = data->host;
1776 data->host = NULL;
1777 return FALSE;
1781 if(**ptr != ']' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) {
1782 *ptr = data->host;
1783 data->host = NULL;
1784 return FALSE;
1785 } else if(!**ptr && extras & ALLOW_BRACKETLESS_IP_LITERAL) {
1786 /* The IP literal didn't contain brackets and was followed by
1787 * a NULL terminator, so no reason to even check the port.
1789 data->host_len = *ptr - data->host;
1790 return TRUE;
1793 ++(*ptr);
1794 if(**ptr == ':') {
1795 ++(*ptr);
1796 /* If a valid port is not found, then let it trickle down to
1797 * parse_reg_name.
1799 if(!parse_port(ptr, data, flags)) {
1800 *ptr = data->host;
1801 data->host = NULL;
1802 return FALSE;
1804 } else
1805 data->host_len = *ptr - data->host;
1807 return TRUE;
1810 /* Parses the host information from the URI.
1812 * host = IP-literal / IPv4address / reg-name
1814 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1815 if(!parse_ip_literal(ptr, data, flags, extras)) {
1816 if(!parse_ipv4address(ptr, data, flags)) {
1817 if(!parse_reg_name(ptr, data, flags, extras)) {
1818 TRACE("(%p %p %x %x): Malformed URI, Unknown host type.\n",
1819 ptr, data, flags, extras);
1820 return FALSE;
1825 return TRUE;
1828 /* Parses the authority information from the URI.
1830 * authority = [ userinfo "@" ] host [ ":" port ]
1832 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
1833 parse_userinfo(ptr, data, flags);
1835 /* Parsing the port will happen during one of the host parsing
1836 * routines (if the URI has a port).
1838 if(!parse_host(ptr, data, flags, 0))
1839 return FALSE;
1841 return TRUE;
1844 /* Attempts to parse the path information of a hierarchical URI. */
1845 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) {
1846 const WCHAR *start = *ptr;
1847 static const WCHAR slash[] = {'/',0};
1848 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
1850 if(is_path_delim(data->scheme_type, **ptr)) {
1851 if(data->scheme_type == URL_SCHEME_WILDCARD && !data->must_have_path) {
1852 data->path = NULL;
1853 data->path_len = 0;
1854 } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
1855 /* If the path component is empty, then a '/' is added. */
1856 data->path = slash;
1857 data->path_len = 1;
1859 } else {
1860 while(!is_path_delim(data->scheme_type, **ptr)) {
1861 if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN && !is_file) {
1862 if(!check_pct_encoded(ptr)) {
1863 *ptr = start;
1864 return FALSE;
1865 } else
1866 continue;
1867 } else if(is_forbidden_dos_path_char(**ptr) && is_file &&
1868 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
1869 /* File schemes with USE_DOS_PATH set aren't allowed to have
1870 * a '<' or '>' or '\"' appear in them.
1872 *ptr = start;
1873 return FALSE;
1874 } else if(**ptr == '\\') {
1875 /* Not allowed to have a backslash if NO_CANONICALIZE is set
1876 * and the scheme is known type (but not a file scheme).
1878 if(flags & Uri_CREATE_NO_CANONICALIZE) {
1879 if(data->scheme_type != URL_SCHEME_FILE &&
1880 data->scheme_type != URL_SCHEME_UNKNOWN) {
1881 *ptr = start;
1882 return FALSE;
1887 ++(*ptr);
1890 /* The only time a URI doesn't have a path is when
1891 * the NO_CANONICALIZE flag is set and the raw URI
1892 * didn't contain one.
1894 if(*ptr == start) {
1895 data->path = NULL;
1896 data->path_len = 0;
1897 } else {
1898 data->path = start;
1899 data->path_len = *ptr - start;
1903 if(data->path)
1904 TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags,
1905 debugstr_wn(data->path, data->path_len), data->path_len);
1906 else
1907 TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags);
1909 return TRUE;
1912 /* Parses the path of an opaque URI (much less strict than the parser
1913 * for a hierarchical URI).
1915 * NOTE:
1916 * Windows allows invalid % encoded data to appear in opaque URI paths
1917 * for unknown scheme types.
1919 * File schemes with USE_DOS_PATH set aren't allowed to have '<', '>', or '\"'
1920 * appear in them.
1922 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) {
1923 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1924 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
1925 const BOOL is_mailto = data->scheme_type == URL_SCHEME_MAILTO;
1927 if (is_mailto && (*ptr)[0] == '/' && (*ptr)[1] == '/')
1929 if ((*ptr)[2]) data->path = *ptr + 2;
1930 else data->path = NULL;
1932 else
1933 data->path = *ptr;
1935 while(!is_path_delim(data->scheme_type, **ptr)) {
1936 if(**ptr == '%' && known_scheme) {
1937 if(!check_pct_encoded(ptr)) {
1938 *ptr = data->path;
1939 data->path = NULL;
1940 return FALSE;
1941 } else
1942 continue;
1943 } else if(is_forbidden_dos_path_char(**ptr) && is_file &&
1944 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
1945 *ptr = data->path;
1946 data->path = NULL;
1947 return FALSE;
1950 ++(*ptr);
1953 if (data->path) data->path_len = *ptr - data->path;
1954 TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags,
1955 debugstr_wn(data->path, data->path_len), data->path_len);
1956 return TRUE;
1959 /* Determines how the URI should be parsed after the scheme information.
1961 * If the scheme is followed by "//", then it is treated as a hierarchical URI
1962 * which then the authority and path information will be parsed out. Otherwise, the
1963 * URI will be treated as an opaque URI which the authority information is not parsed
1964 * out.
1966 * RFC 3896 definition of hier-part:
1968 * hier-part = "//" authority path-abempty
1969 * / path-absolute
1970 * / path-rootless
1971 * / path-empty
1973 * MSDN opaque URI definition:
1974 * scheme ":" path [ "#" fragment ]
1976 * NOTES:
1977 * If the URI is of an unknown scheme type and has a "//" following the scheme then it
1978 * is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is
1979 * set then it is considered an opaque URI regardless of what follows the scheme information
1980 * (per MSDN documentation).
1982 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
1983 const WCHAR *start = *ptr;
1985 data->must_have_path = FALSE;
1987 /* For javascript: URIs, simply set everything as a path */
1988 if(data->scheme_type == URL_SCHEME_JAVASCRIPT) {
1989 data->path = *ptr;
1990 data->path_len = strlenW(*ptr);
1991 data->is_opaque = TRUE;
1992 *ptr += data->path_len;
1993 return TRUE;
1996 /* Checks if the authority information needs to be parsed. */
1997 if(is_hierarchical_uri(ptr, data)) {
1998 /* Only treat it as a hierarchical URI if the scheme_type is known or
1999 * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set.
2001 if(data->scheme_type != URL_SCHEME_UNKNOWN ||
2002 !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) {
2003 TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
2004 data->is_opaque = FALSE;
2006 if(data->scheme_type == URL_SCHEME_WILDCARD && !data->has_implicit_scheme) {
2007 if(**ptr == '/' && *(*ptr+1) == '/') {
2008 data->must_have_path = TRUE;
2009 *ptr += 2;
2013 /* TODO: Handle hierarchical URI's, parse authority then parse the path. */
2014 if(!parse_authority(ptr, data, flags))
2015 return FALSE;
2017 return parse_path_hierarchical(ptr, data, flags);
2018 } else
2019 /* Reset ptr to its starting position so opaque path parsing
2020 * begins at the correct location.
2022 *ptr = start;
2025 /* If it reaches here, then the URI will be treated as an opaque
2026 * URI.
2029 TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags);
2031 data->is_opaque = TRUE;
2032 if(!parse_path_opaque(ptr, data, flags))
2033 return FALSE;
2035 return TRUE;
2038 /* Attempts to parse the query string from the URI.
2040 * NOTES:
2041 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
2042 * data is allowed to appear in the query string. For unknown scheme types
2043 * invalid percent encoded data is allowed to appear regardless.
2045 static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) {
2046 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2048 if(**ptr != '?') {
2049 TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags);
2050 return TRUE;
2053 data->query = *ptr;
2055 ++(*ptr);
2056 while(**ptr && **ptr != '#') {
2057 if(**ptr == '%' && known_scheme &&
2058 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2059 if(!check_pct_encoded(ptr)) {
2060 *ptr = data->query;
2061 data->query = NULL;
2062 return FALSE;
2063 } else
2064 continue;
2067 ++(*ptr);
2070 data->query_len = *ptr - data->query;
2072 TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags,
2073 debugstr_wn(data->query, data->query_len), data->query_len);
2074 return TRUE;
2077 /* Attempts to parse the fragment from the URI.
2079 * NOTES:
2080 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
2081 * data is allowed to appear in the query string. For unknown scheme types
2082 * invalid percent encoded data is allowed to appear regardless.
2084 static BOOL parse_fragment(const WCHAR **ptr, parse_data *data, DWORD flags) {
2085 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2087 if(**ptr != '#') {
2088 TRACE("(%p %p %x): URI didn't contain a fragment.\n", ptr, data, flags);
2089 return TRUE;
2092 data->fragment = *ptr;
2094 ++(*ptr);
2095 while(**ptr) {
2096 if(**ptr == '%' && known_scheme &&
2097 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2098 if(!check_pct_encoded(ptr)) {
2099 *ptr = data->fragment;
2100 data->fragment = NULL;
2101 return FALSE;
2102 } else
2103 continue;
2106 ++(*ptr);
2109 data->fragment_len = *ptr - data->fragment;
2111 TRACE("(%p %p %x): Parsed fragment %s len=%d\n", ptr, data, flags,
2112 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len);
2113 return TRUE;
2116 /* Parses and validates the components of the specified by data->uri
2117 * and stores the information it parses into 'data'.
2119 * Returns TRUE if it successfully parsed the URI. False otherwise.
2121 static BOOL parse_uri(parse_data *data, DWORD flags) {
2122 const WCHAR *ptr;
2123 const WCHAR **pptr;
2125 ptr = data->uri;
2126 pptr = &ptr;
2128 TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri));
2130 if(!parse_scheme(pptr, data, flags, 0))
2131 return FALSE;
2133 if(!parse_hierpart(pptr, data, flags))
2134 return FALSE;
2136 if(!parse_query(pptr, data, flags))
2137 return FALSE;
2139 if(!parse_fragment(pptr, data, flags))
2140 return FALSE;
2142 TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
2143 return TRUE;
2146 static BOOL canonicalize_username(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2147 const WCHAR *ptr;
2149 if(!data->username) {
2150 uri->userinfo_start = -1;
2151 return TRUE;
2154 uri->userinfo_start = uri->canon_len;
2155 for(ptr = data->username; ptr < data->username+data->username_len; ++ptr) {
2156 if(*ptr == '%') {
2157 /* Only decode % encoded values for known scheme types. */
2158 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2159 /* See if the value really needs decoding. */
2160 WCHAR val = decode_pct_val(ptr);
2161 if(is_unreserved(val)) {
2162 if(!computeOnly)
2163 uri->canon_uri[uri->canon_len] = val;
2165 ++uri->canon_len;
2167 /* Move pass the hex characters. */
2168 ptr += 2;
2169 continue;
2172 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') {
2173 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
2174 * is NOT set.
2176 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
2177 if(!computeOnly)
2178 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len);
2180 uri->canon_len += 3;
2181 continue;
2185 if(!computeOnly)
2186 /* Nothing special, so just copy the character over. */
2187 uri->canon_uri[uri->canon_len] = *ptr;
2188 ++uri->canon_len;
2191 return TRUE;
2194 static BOOL canonicalize_password(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2195 const WCHAR *ptr;
2197 if(!data->password) {
2198 uri->userinfo_split = -1;
2199 return TRUE;
2202 if(uri->userinfo_start == -1)
2203 /* Has a password, but, doesn't have a username. */
2204 uri->userinfo_start = uri->canon_len;
2206 uri->userinfo_split = uri->canon_len - uri->userinfo_start;
2208 /* Add the ':' to the userinfo component. */
2209 if(!computeOnly)
2210 uri->canon_uri[uri->canon_len] = ':';
2211 ++uri->canon_len;
2213 for(ptr = data->password; ptr < data->password+data->password_len; ++ptr) {
2214 if(*ptr == '%') {
2215 /* Only decode % encoded values for known scheme types. */
2216 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2217 /* See if the value really needs decoding. */
2218 WCHAR val = decode_pct_val(ptr);
2219 if(is_unreserved(val)) {
2220 if(!computeOnly)
2221 uri->canon_uri[uri->canon_len] = val;
2223 ++uri->canon_len;
2225 /* Move pass the hex characters. */
2226 ptr += 2;
2227 continue;
2230 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') {
2231 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
2232 * is NOT set.
2234 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
2235 if(!computeOnly)
2236 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len);
2238 uri->canon_len += 3;
2239 continue;
2243 if(!computeOnly)
2244 /* Nothing special, so just copy the character over. */
2245 uri->canon_uri[uri->canon_len] = *ptr;
2246 ++uri->canon_len;
2249 return TRUE;
2252 /* Canonicalizes the userinfo of the URI represented by the parse_data.
2254 * Canonicalization of the userinfo is a simple process. If there are any percent
2255 * encoded characters that fall in the "unreserved" character set, they are decoded
2256 * to their actual value. If a character is not in the "unreserved" or "reserved" sets
2257 * then it is percent encoded. Other than that the characters are copied over without
2258 * change.
2260 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2261 uri->userinfo_start = uri->userinfo_split = -1;
2262 uri->userinfo_len = 0;
2264 if(!data->username && !data->password)
2265 /* URI doesn't have userinfo, so nothing to do here. */
2266 return TRUE;
2268 if(!canonicalize_username(data, uri, flags, computeOnly))
2269 return FALSE;
2271 if(!canonicalize_password(data, uri, flags, computeOnly))
2272 return FALSE;
2274 uri->userinfo_len = uri->canon_len - uri->userinfo_start;
2275 if(!computeOnly)
2276 TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n",
2277 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len),
2278 uri->userinfo_split, uri->userinfo_len);
2280 /* Now insert the '@' after the userinfo. */
2281 if(!computeOnly)
2282 uri->canon_uri[uri->canon_len] = '@';
2283 ++uri->canon_len;
2285 return TRUE;
2288 /* Attempts to canonicalize a reg_name.
2290 * Things that happen:
2291 * 1) If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is
2292 * lower cased. Unless it's an unknown scheme type, which case it's
2293 * no lower cased regardless.
2295 * 2) Unreserved % encoded characters are decoded for known
2296 * scheme types.
2298 * 3) Forbidden characters are % encoded as long as
2299 * Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and
2300 * it isn't an unknown scheme type.
2302 * 4) If it's a file scheme and the host is "localhost" it's removed.
2304 * 5) If it's a file scheme and Uri_CREATE_FILE_USE_DOS_PATH is set,
2305 * then the UNC path characters are added before the host name.
2307 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri,
2308 DWORD flags, BOOL computeOnly) {
2309 static const WCHAR localhostW[] =
2310 {'l','o','c','a','l','h','o','s','t',0};
2311 const WCHAR *ptr;
2312 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2314 if(data->scheme_type == URL_SCHEME_FILE &&
2315 data->host_len == lstrlenW(localhostW)) {
2316 if(!StrCmpNIW(data->host, localhostW, data->host_len)) {
2317 uri->host_start = -1;
2318 uri->host_len = 0;
2319 uri->host_type = Uri_HOST_UNKNOWN;
2320 return TRUE;
2324 if(data->scheme_type == URL_SCHEME_FILE && flags & Uri_CREATE_FILE_USE_DOS_PATH) {
2325 if(!computeOnly) {
2326 uri->canon_uri[uri->canon_len] = '\\';
2327 uri->canon_uri[uri->canon_len+1] = '\\';
2329 uri->canon_len += 2;
2330 uri->authority_start = uri->canon_len;
2333 uri->host_start = uri->canon_len;
2335 for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) {
2336 if(*ptr == '%' && known_scheme) {
2337 WCHAR val = decode_pct_val(ptr);
2338 if(is_unreserved(val)) {
2339 /* If NO_CANONICALIZE is not set, then windows lower cases the
2340 * decoded value.
2342 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) {
2343 if(!computeOnly)
2344 uri->canon_uri[uri->canon_len] = tolowerW(val);
2345 } else {
2346 if(!computeOnly)
2347 uri->canon_uri[uri->canon_len] = val;
2349 ++uri->canon_len;
2351 /* Skip past the % encoded character. */
2352 ptr += 2;
2353 continue;
2354 } else {
2355 /* Just copy the % over. */
2356 if(!computeOnly)
2357 uri->canon_uri[uri->canon_len] = *ptr;
2358 ++uri->canon_len;
2360 } else if(*ptr == '\\') {
2361 /* Only unknown scheme types could have made it here with a '\\' in the host name. */
2362 if(!computeOnly)
2363 uri->canon_uri[uri->canon_len] = *ptr;
2364 ++uri->canon_len;
2365 } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && is_ascii(*ptr) &&
2366 !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) {
2367 if(!computeOnly) {
2368 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2370 /* The percent encoded value gets lower cased also. */
2371 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
2372 uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]);
2373 uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]);
2377 uri->canon_len += 3;
2378 } else {
2379 if(!computeOnly) {
2380 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme)
2381 uri->canon_uri[uri->canon_len] = tolowerW(*ptr);
2382 else
2383 uri->canon_uri[uri->canon_len] = *ptr;
2386 ++uri->canon_len;
2390 uri->host_len = uri->canon_len - uri->host_start;
2392 if(!computeOnly)
2393 TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags,
2394 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2395 uri->host_len);
2397 if(!computeOnly)
2398 find_domain_name(uri->canon_uri+uri->host_start, uri->host_len,
2399 &(uri->domain_offset));
2401 return TRUE;
2404 /* Attempts to canonicalize an implicit IPv4 address. */
2405 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2406 uri->host_start = uri->canon_len;
2408 TRACE("%u\n", data->implicit_ipv4);
2409 /* For unknown scheme types Windows doesn't convert
2410 * the value into an IP address, but it still considers
2411 * it an IPv4 address.
2413 if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2414 if(!computeOnly)
2415 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2416 uri->canon_len += data->host_len;
2417 } else {
2418 if(!computeOnly)
2419 uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4);
2420 else
2421 uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4);
2424 uri->host_len = uri->canon_len - uri->host_start;
2425 uri->host_type = Uri_HOST_IPV4;
2427 if(!computeOnly)
2428 TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n",
2429 data, uri, flags, computeOnly,
2430 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2431 uri->host_len);
2433 return TRUE;
2436 /* Attempts to canonicalize an IPv4 address.
2438 * If the parse_data represents a URI that has an implicit IPv4 address
2439 * (ex. http://256/, this function will convert 256 into 0.0.1.0). If
2440 * the implicit IP address exceeds the value of UINT_MAX (maximum value
2441 * for an IPv4 address) it's canonicalized as if it were a reg-name.
2443 * If the parse_data contains a partial or full IPv4 address it normalizes it.
2444 * A partial IPv4 address is something like "192.0" and would be normalized to
2445 * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would
2446 * be normalized to "192.2.1.3".
2448 * NOTES:
2449 * Windows ONLY normalizes IPv4 address for known scheme types (one that isn't
2450 * URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from
2451 * the original URI into the canonicalized URI, but, it still recognizes URI's
2452 * host type as HOST_IPV4.
2454 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2455 if(data->has_implicit_ip)
2456 return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly);
2457 else {
2458 uri->host_start = uri->canon_len;
2460 /* Windows only normalizes for known scheme types. */
2461 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2462 /* parse_data contains a partial or full IPv4 address, so normalize it. */
2463 DWORD i, octetDigitCount = 0, octetCount = 0;
2464 BOOL octetHasDigit = FALSE;
2466 for(i = 0; i < data->host_len; ++i) {
2467 if(data->host[i] == '0' && !octetHasDigit) {
2468 /* Can ignore leading zeros if:
2469 * 1) It isn't the last digit of the octet.
2470 * 2) i+1 != data->host_len
2471 * 3) i+1 != '.'
2473 if(octetDigitCount == 2 ||
2474 i+1 == data->host_len ||
2475 data->host[i+1] == '.') {
2476 if(!computeOnly)
2477 uri->canon_uri[uri->canon_len] = data->host[i];
2478 ++uri->canon_len;
2479 TRACE("Adding zero\n");
2481 } else if(data->host[i] == '.') {
2482 if(!computeOnly)
2483 uri->canon_uri[uri->canon_len] = data->host[i];
2484 ++uri->canon_len;
2486 octetDigitCount = 0;
2487 octetHasDigit = FALSE;
2488 ++octetCount;
2489 } else {
2490 if(!computeOnly)
2491 uri->canon_uri[uri->canon_len] = data->host[i];
2492 ++uri->canon_len;
2494 ++octetDigitCount;
2495 octetHasDigit = TRUE;
2499 /* Make sure the canonicalized IP address has 4 dec-octets.
2500 * If doesn't add "0" ones until there is 4;
2502 for( ; octetCount < 3; ++octetCount) {
2503 if(!computeOnly) {
2504 uri->canon_uri[uri->canon_len] = '.';
2505 uri->canon_uri[uri->canon_len+1] = '0';
2508 uri->canon_len += 2;
2510 } else {
2511 /* Windows doesn't normalize addresses in unknown schemes. */
2512 if(!computeOnly)
2513 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2514 uri->canon_len += data->host_len;
2517 uri->host_len = uri->canon_len - uri->host_start;
2518 if(!computeOnly)
2519 TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n",
2520 data, uri, flags, computeOnly,
2521 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2522 uri->host_len);
2525 return TRUE;
2528 /* Attempts to canonicalize the IPv6 address of the URI.
2530 * Multiple things happen during the canonicalization of an IPv6 address:
2531 * 1) Any leading zero's in a h16 component are removed.
2532 * Ex: [0001:0022::] -> [1:22::]
2534 * 2) The longest sequence of zero h16 components are compressed
2535 * into a "::" (elision). If there's a tie, the first is chosen.
2537 * Ex: [0:0:0:0:1:6:7:8] -> [::1:6:7:8]
2538 * [0:0:0:0:1:2::] -> [::1:2:0:0]
2539 * [0:0:1:2:0:0:7:8] -> [::1:2:0:0:7:8]
2541 * 3) If an IPv4 address is attached to the IPv6 address, it's
2542 * also normalized.
2543 * Ex: [::001.002.022.000] -> [::1.2.22.0]
2545 * 4) If an elision is present, but, only represents one h16 component
2546 * it's expanded.
2548 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
2550 * 5) If the IPv6 address contains an IPv4 address and there exists
2551 * at least 1 non-zero h16 component the IPv4 address is converted
2552 * into two h16 components, otherwise it's normalized and kept as is.
2554 * Ex: [::192.200.003.4] -> [::192.200.3.4]
2555 * [ffff::192.200.003.4] -> [ffff::c0c8:3041]
2557 * NOTE:
2558 * For unknown scheme types Windows simply copies the address over without any
2559 * changes.
2561 * IPv4 address can be included in an elision if all its components are 0's.
2563 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri,
2564 DWORD flags, BOOL computeOnly) {
2565 uri->host_start = uri->canon_len;
2567 if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2568 if(!computeOnly)
2569 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2570 uri->canon_len += data->host_len;
2571 } else {
2572 USHORT values[8];
2573 INT elision_start;
2574 DWORD i, elision_len;
2576 if(!ipv6_to_number(&(data->ipv6_address), values)) {
2577 TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n",
2578 data, uri, flags, computeOnly);
2579 return FALSE;
2582 if(!computeOnly)
2583 uri->canon_uri[uri->canon_len] = '[';
2584 ++uri->canon_len;
2586 /* Find where the elision should occur (if any). */
2587 compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len);
2589 TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags,
2590 computeOnly, elision_start, elision_len);
2592 for(i = 0; i < 8; ++i) {
2593 BOOL in_elision = (elision_start > -1 && i >= elision_start &&
2594 i < elision_start+elision_len);
2595 BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision &&
2596 data->ipv6_address.h16_count == 0);
2598 if(i == elision_start) {
2599 if(!computeOnly) {
2600 uri->canon_uri[uri->canon_len] = ':';
2601 uri->canon_uri[uri->canon_len+1] = ':';
2603 uri->canon_len += 2;
2606 /* We can ignore the current component if we're in the elision. */
2607 if(in_elision)
2608 continue;
2610 /* We only add a ':' if we're not at i == 0, or when we're at
2611 * the very end of elision range since the ':' colon was handled
2612 * earlier. Otherwise we would end up with ":::" after elision.
2614 if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) {
2615 if(!computeOnly)
2616 uri->canon_uri[uri->canon_len] = ':';
2617 ++uri->canon_len;
2620 if(do_ipv4) {
2621 UINT val;
2622 DWORD len;
2624 /* Combine the two parts of the IPv4 address values. */
2625 val = values[i];
2626 val <<= 16;
2627 val += values[i+1];
2629 if(!computeOnly)
2630 len = ui2ipv4(uri->canon_uri+uri->canon_len, val);
2631 else
2632 len = ui2ipv4(NULL, val);
2634 uri->canon_len += len;
2635 ++i;
2636 } else {
2637 /* Write a regular h16 component to the URI. */
2639 /* Short circuit for the trivial case. */
2640 if(values[i] == 0) {
2641 if(!computeOnly)
2642 uri->canon_uri[uri->canon_len] = '0';
2643 ++uri->canon_len;
2644 } else {
2645 static const WCHAR formatW[] = {'%','x',0};
2647 if(!computeOnly)
2648 uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len,
2649 formatW, values[i]);
2650 else {
2651 WCHAR tmp[5];
2652 uri->canon_len += sprintfW(tmp, formatW, values[i]);
2658 /* Add the closing ']'. */
2659 if(!computeOnly)
2660 uri->canon_uri[uri->canon_len] = ']';
2661 ++uri->canon_len;
2664 uri->host_len = uri->canon_len - uri->host_start;
2666 if(!computeOnly)
2667 TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags,
2668 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2669 uri->host_len);
2671 return TRUE;
2674 /* Attempts to canonicalize the host of the URI (if any). */
2675 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2676 uri->host_start = -1;
2677 uri->host_len = 0;
2678 uri->domain_offset = -1;
2680 if(data->host) {
2681 switch(data->host_type) {
2682 case Uri_HOST_DNS:
2683 uri->host_type = Uri_HOST_DNS;
2684 if(!canonicalize_reg_name(data, uri, flags, computeOnly))
2685 return FALSE;
2687 break;
2688 case Uri_HOST_IPV4:
2689 uri->host_type = Uri_HOST_IPV4;
2690 if(!canonicalize_ipv4address(data, uri, flags, computeOnly))
2691 return FALSE;
2693 break;
2694 case Uri_HOST_IPV6:
2695 if(!canonicalize_ipv6address(data, uri, flags, computeOnly))
2696 return FALSE;
2698 uri->host_type = Uri_HOST_IPV6;
2699 break;
2700 case Uri_HOST_UNKNOWN:
2701 if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) {
2702 uri->host_start = uri->canon_len;
2704 /* Nothing happens to unknown host types. */
2705 if(!computeOnly)
2706 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2707 uri->canon_len += data->host_len;
2708 uri->host_len = data->host_len;
2711 uri->host_type = Uri_HOST_UNKNOWN;
2712 break;
2713 default:
2714 FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data,
2715 uri, flags, computeOnly, data->host_type);
2716 return FALSE;
2720 return TRUE;
2723 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2724 BOOL has_default_port = FALSE;
2725 USHORT default_port = 0;
2726 DWORD i;
2728 uri->port_offset = -1;
2730 /* Check if the scheme has a default port. */
2731 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
2732 if(default_ports[i].scheme == data->scheme_type) {
2733 has_default_port = TRUE;
2734 default_port = default_ports[i].port;
2735 break;
2739 uri->has_port = data->has_port || has_default_port;
2741 /* Possible cases:
2742 * 1) Has a port which is the default port.
2743 * 2) Has a port (not the default).
2744 * 3) Doesn't have a port, but, scheme has a default port.
2745 * 4) No port.
2747 if(has_default_port && data->has_port && data->port_value == default_port) {
2748 /* If it's the default port and this flag isn't set, don't do anything. */
2749 if(flags & Uri_CREATE_NO_CANONICALIZE) {
2750 uri->port_offset = uri->canon_len-uri->authority_start;
2751 if(!computeOnly)
2752 uri->canon_uri[uri->canon_len] = ':';
2753 ++uri->canon_len;
2755 if(data->port) {
2756 /* Copy the original port over. */
2757 if(!computeOnly)
2758 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR));
2759 uri->canon_len += data->port_len;
2760 } else {
2761 if(!computeOnly)
2762 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value);
2763 else
2764 uri->canon_len += ui2str(NULL, data->port_value);
2768 uri->port = default_port;
2769 } else if(data->has_port) {
2770 uri->port_offset = uri->canon_len-uri->authority_start;
2771 if(!computeOnly)
2772 uri->canon_uri[uri->canon_len] = ':';
2773 ++uri->canon_len;
2775 if(flags & Uri_CREATE_NO_CANONICALIZE && data->port) {
2776 /* Copy the original over without changes. */
2777 if(!computeOnly)
2778 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR));
2779 uri->canon_len += data->port_len;
2780 } else {
2781 if(!computeOnly)
2782 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value);
2783 else
2784 uri->canon_len += ui2str(NULL, data->port_value);
2787 uri->port = data->port_value;
2788 } else if(has_default_port)
2789 uri->port = default_port;
2791 return TRUE;
2794 /* Canonicalizes the authority of the URI represented by the parse_data. */
2795 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2796 uri->authority_start = uri->canon_len;
2797 uri->authority_len = 0;
2799 if(!canonicalize_userinfo(data, uri, flags, computeOnly))
2800 return FALSE;
2802 if(!canonicalize_host(data, uri, flags, computeOnly))
2803 return FALSE;
2805 if(!canonicalize_port(data, uri, flags, computeOnly))
2806 return FALSE;
2808 if(uri->host_start != -1 || (data->is_relative && (data->password || data->username)))
2809 uri->authority_len = uri->canon_len - uri->authority_start;
2810 else
2811 uri->authority_start = -1;
2813 return TRUE;
2816 /* Attempts to canonicalize the path of a hierarchical URI.
2818 * Things that happen:
2819 * 1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN
2820 * flag is set or it's a file URI. Forbidden characters are always encoded
2821 * for file schemes regardless and forbidden characters are never encoded
2822 * for unknown scheme types.
2824 * 2). For known scheme types '\\' are changed to '/'.
2826 * 3). Percent encoded, unreserved characters are decoded to their actual values.
2827 * Unless the scheme type is unknown. For file schemes any percent encoded
2828 * character in the unreserved or reserved set is decoded.
2830 * 4). For File schemes if the path is starts with a drive letter and doesn't
2831 * start with a '/' then one is appended.
2832 * Ex: file://c:/test.mp3 -> file:///c:/test.mp3
2834 * 5). Dot segments are removed from the path for all scheme types
2835 * unless NO_CANONICALIZE flag is set. Dot segments aren't removed
2836 * for wildcard scheme types.
2838 * NOTES:
2839 * file://c:/test%20test -> file:///c:/test%2520test
2840 * file://c:/test%3Etest -> file:///c:/test%253Etest
2841 * if Uri_CREATE_FILE_USE_DOS_PATH is not set:
2842 * file:///c:/test%20test -> file:///c:/test%20test
2843 * file:///c:/test%test -> file:///c:/test%25test
2845 static DWORD canonicalize_path_hierarchical(const WCHAR *path, DWORD path_len, URL_SCHEME scheme_type, BOOL has_host, DWORD flags,
2846 BOOL is_implicit_scheme, WCHAR *ret_path) {
2847 const BOOL known_scheme = scheme_type != URL_SCHEME_UNKNOWN;
2848 const BOOL is_file = scheme_type == URL_SCHEME_FILE;
2849 const BOOL is_res = scheme_type == URL_SCHEME_RES;
2850 const WCHAR *ptr;
2851 BOOL escape_pct = FALSE;
2852 DWORD len = 0;
2854 if(!path)
2855 return 0;
2857 ptr = path;
2859 if(is_file && !has_host) {
2860 /* Check if a '/' needs to be appended for the file scheme. */
2861 if(path_len > 1 && is_drive_path(ptr) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2862 if(ret_path)
2863 ret_path[len] = '/';
2864 len++;
2865 escape_pct = TRUE;
2866 } else if(*ptr == '/') {
2867 if(!(flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2868 /* Copy the extra '/' over. */
2869 if(ret_path)
2870 ret_path[len] = '/';
2871 len++;
2873 ++ptr;
2876 if(is_drive_path(ptr)) {
2877 if(ret_path) {
2878 ret_path[len] = *ptr;
2879 /* If there's a '|' after the drive letter, convert it to a ':'. */
2880 ret_path[len+1] = ':';
2882 ptr += 2;
2883 len += 2;
2887 if(!is_file && *path && *path != '/') {
2888 /* Prepend a '/' to the path if it doesn't have one. */
2889 if(ret_path)
2890 ret_path[len] = '/';
2891 len++;
2894 for(; ptr < path+path_len; ++ptr) {
2895 BOOL do_default_action = TRUE;
2897 if(*ptr == '%' && !is_res) {
2898 const WCHAR *tmp = ptr;
2899 WCHAR val;
2901 /* Check if the % represents a valid encoded char, or if it needs encoding. */
2902 BOOL force_encode = !check_pct_encoded(&tmp) && is_file && !(flags&Uri_CREATE_FILE_USE_DOS_PATH);
2903 val = decode_pct_val(ptr);
2905 if(force_encode || escape_pct) {
2906 /* Escape the percent sign in the file URI. */
2907 if(ret_path)
2908 pct_encode_val(*ptr, ret_path+len);
2909 len += 3;
2910 do_default_action = FALSE;
2911 } else if((is_unreserved(val) && known_scheme) ||
2912 (is_file && !is_implicit_scheme && (is_unreserved(val) || is_reserved(val) ||
2913 (val && flags&Uri_CREATE_FILE_USE_DOS_PATH && !is_forbidden_dos_path_char(val))))) {
2914 if(ret_path)
2915 ret_path[len] = val;
2916 len++;
2918 ptr += 2;
2919 continue;
2921 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2922 /* Convert the '/' back to a '\\'. */
2923 if(ret_path)
2924 ret_path[len] = '\\';
2925 len++;
2926 do_default_action = FALSE;
2927 } else if(*ptr == '\\' && known_scheme) {
2928 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) {
2929 /* Convert '\\' into a '/'. */
2930 if(ret_path)
2931 ret_path[len] = '/';
2932 len++;
2933 do_default_action = FALSE;
2935 } else if(known_scheme && !is_res && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
2936 (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) {
2937 if(!is_file || !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2938 /* Escape the forbidden character. */
2939 if(ret_path)
2940 pct_encode_val(*ptr, ret_path+len);
2941 len += 3;
2942 do_default_action = FALSE;
2946 if(do_default_action) {
2947 if(ret_path)
2948 ret_path[len] = *ptr;
2949 len++;
2953 /* Removing the dot segments only happens when it's not in
2954 * computeOnly mode and it's not a wildcard scheme. File schemes
2955 * with USE_DOS_PATH set don't get dot segments removed.
2957 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) &&
2958 scheme_type != URL_SCHEME_WILDCARD) {
2959 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && ret_path) {
2960 /* Remove the dot segments (if any) and reset everything to the new
2961 * correct length.
2963 len = remove_dot_segments(ret_path, len);
2967 if(ret_path)
2968 TRACE("Canonicalized path %s len=%d\n", debugstr_wn(ret_path, len), len);
2969 return len;
2972 /* Attempts to canonicalize the path for an opaque URI.
2974 * For known scheme types:
2975 * 1) forbidden characters are percent encoded if
2976 * NO_ENCODE_FORBIDDEN_CHARACTERS isn't set.
2978 * 2) Percent encoded, unreserved characters are decoded
2979 * to their actual values, for known scheme types.
2981 * 3) '\\' are changed to '/' for known scheme types
2982 * except for mailto schemes.
2984 * 4) For file schemes, if USE_DOS_PATH is set all '/'
2985 * are converted to backslashes.
2987 * 5) For file schemes, if USE_DOS_PATH isn't set all '\'
2988 * are converted to forward slashes.
2990 static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2991 const WCHAR *ptr;
2992 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2993 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
2994 const BOOL is_mk = data->scheme_type == URL_SCHEME_MK;
2996 if(!data->path) {
2997 uri->path_start = -1;
2998 uri->path_len = 0;
2999 return TRUE;
3002 uri->path_start = uri->canon_len;
3004 if(is_mk){
3005 /* hijack this flag for SCHEME_MK to tell the function when to start
3006 * converting slashes */
3007 flags |= Uri_CREATE_FILE_USE_DOS_PATH;
3010 /* For javascript: URIs, simply copy path part without any canonicalization */
3011 if(data->scheme_type == URL_SCHEME_JAVASCRIPT) {
3012 if(!computeOnly)
3013 memcpy(uri->canon_uri+uri->canon_len, data->path, data->path_len*sizeof(WCHAR));
3014 uri->path_len = data->path_len;
3015 uri->canon_len += data->path_len;
3016 return TRUE;
3019 /* Windows doesn't allow a "//" to appear after the scheme
3020 * of a URI, if it's an opaque URI.
3022 if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') {
3023 /* So it inserts a "/." before the "//" if it exists. */
3024 if(!computeOnly) {
3025 uri->canon_uri[uri->canon_len] = '/';
3026 uri->canon_uri[uri->canon_len+1] = '.';
3029 uri->canon_len += 2;
3032 for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
3033 BOOL do_default_action = TRUE;
3035 if(*ptr == '%' && known_scheme) {
3036 WCHAR val = decode_pct_val(ptr);
3038 if(is_unreserved(val)) {
3039 if(!computeOnly)
3040 uri->canon_uri[uri->canon_len] = val;
3041 ++uri->canon_len;
3043 ptr += 2;
3044 continue;
3046 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
3047 if(!computeOnly)
3048 uri->canon_uri[uri->canon_len] = '\\';
3049 ++uri->canon_len;
3050 do_default_action = FALSE;
3051 } else if(*ptr == '\\') {
3052 if((data->is_relative || is_mk || is_file) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
3053 /* Convert to a '/'. */
3054 if(!computeOnly)
3055 uri->canon_uri[uri->canon_len] = '/';
3056 ++uri->canon_len;
3057 do_default_action = FALSE;
3059 } else if(is_mk && *ptr == ':' && ptr + 1 < data->path + data->path_len && *(ptr + 1) == ':') {
3060 flags &= ~Uri_CREATE_FILE_USE_DOS_PATH;
3061 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
3062 !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
3063 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) {
3064 if(!computeOnly)
3065 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
3066 uri->canon_len += 3;
3067 do_default_action = FALSE;
3071 if(do_default_action) {
3072 if(!computeOnly)
3073 uri->canon_uri[uri->canon_len] = *ptr;
3074 ++uri->canon_len;
3078 if(is_mk && !computeOnly && !(flags & Uri_CREATE_NO_CANONICALIZE)) {
3079 DWORD new_len = remove_dot_segments(uri->canon_uri + uri->path_start,
3080 uri->canon_len - uri->path_start);
3081 uri->canon_len = uri->path_start + new_len;
3084 uri->path_len = uri->canon_len - uri->path_start;
3086 if(!computeOnly)
3087 TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly,
3088 debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len);
3089 return TRUE;
3092 /* Determines how the URI represented by the parse_data should be canonicalized.
3094 * Essentially, if the parse_data represents an hierarchical URI then it calls
3095 * canonicalize_authority and the canonicalization functions for the path. If the
3096 * URI is opaque it canonicalizes the path of the URI.
3098 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3099 if(!data->is_opaque || (data->is_relative && (data->password || data->username))) {
3100 /* "//" is only added for non-wildcard scheme types.
3102 * A "//" is only added to a relative URI if it has a
3103 * host or port component (this only happens if a IUriBuilder
3104 * is generating an IUri).
3106 if((data->is_relative && (data->host || data->has_port)) ||
3107 (!data->is_relative && data->scheme_type != URL_SCHEME_WILDCARD)) {
3108 if(data->scheme_type == URL_SCHEME_WILDCARD)
3109 FIXME("Here\n");
3111 if(!computeOnly) {
3112 INT pos = uri->canon_len;
3114 uri->canon_uri[pos] = '/';
3115 uri->canon_uri[pos+1] = '/';
3117 uri->canon_len += 2;
3120 if(!canonicalize_authority(data, uri, flags, computeOnly))
3121 return FALSE;
3123 if(data->is_relative && (data->password || data->username)) {
3124 if(!canonicalize_path_opaque(data, uri, flags, computeOnly))
3125 return FALSE;
3126 } else {
3127 if(!computeOnly)
3128 uri->path_start = uri->canon_len;
3129 uri->path_len = canonicalize_path_hierarchical(data->path, data->path_len, data->scheme_type, data->host_len != 0,
3130 flags, data->has_implicit_scheme, computeOnly ? NULL : uri->canon_uri+uri->canon_len);
3131 uri->canon_len += uri->path_len;
3132 if(!computeOnly && !uri->path_len)
3133 uri->path_start = -1;
3135 } else {
3136 /* Opaque URI's don't have an authority. */
3137 uri->userinfo_start = uri->userinfo_split = -1;
3138 uri->userinfo_len = 0;
3139 uri->host_start = -1;
3140 uri->host_len = 0;
3141 uri->host_type = Uri_HOST_UNKNOWN;
3142 uri->has_port = FALSE;
3143 uri->authority_start = -1;
3144 uri->authority_len = 0;
3145 uri->domain_offset = -1;
3146 uri->port_offset = -1;
3148 if(is_hierarchical_scheme(data->scheme_type)) {
3149 DWORD i;
3151 /* Absolute URIs aren't displayed for known scheme types
3152 * which should be hierarchical URIs.
3154 uri->display_modifiers |= URI_DISPLAY_NO_ABSOLUTE_URI;
3156 /* Windows also sets the port for these (if they have one). */
3157 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
3158 if(data->scheme_type == default_ports[i].scheme) {
3159 uri->has_port = TRUE;
3160 uri->port = default_ports[i].port;
3161 break;
3166 if(!canonicalize_path_opaque(data, uri, flags, computeOnly))
3167 return FALSE;
3170 if(uri->path_start > -1 && !computeOnly)
3171 /* Finding file extensions happens for both types of URIs. */
3172 uri->extension_offset = find_file_extension(uri->canon_uri+uri->path_start, uri->path_len);
3173 else
3174 uri->extension_offset = -1;
3176 return TRUE;
3179 /* Attempts to canonicalize the query string of the URI.
3181 * Things that happen:
3182 * 1) For known scheme types forbidden characters
3183 * are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set
3184 * or NO_ENCODE_FORBIDDEN_CHARACTERS is set.
3186 * 2) For known scheme types, percent encoded, unreserved characters
3187 * are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set.
3189 static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3190 const WCHAR *ptr, *end;
3191 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
3193 if(!data->query) {
3194 uri->query_start = -1;
3195 uri->query_len = 0;
3196 return TRUE;
3199 uri->query_start = uri->canon_len;
3201 end = data->query+data->query_len;
3202 for(ptr = data->query; ptr < end; ++ptr) {
3203 if(*ptr == '%') {
3204 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
3205 WCHAR val = decode_pct_val(ptr);
3206 if(is_unreserved(val)) {
3207 if(!computeOnly)
3208 uri->canon_uri[uri->canon_len] = val;
3209 ++uri->canon_len;
3211 ptr += 2;
3212 continue;
3215 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
3216 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
3217 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
3218 if(!computeOnly)
3219 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
3220 uri->canon_len += 3;
3221 continue;
3225 if(!computeOnly)
3226 uri->canon_uri[uri->canon_len] = *ptr;
3227 ++uri->canon_len;
3230 uri->query_len = uri->canon_len - uri->query_start;
3232 if(!computeOnly)
3233 TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags,
3234 computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len),
3235 uri->query_len);
3236 return TRUE;
3239 static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3240 const WCHAR *ptr, *end;
3241 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
3243 if(!data->fragment) {
3244 uri->fragment_start = -1;
3245 uri->fragment_len = 0;
3246 return TRUE;
3249 uri->fragment_start = uri->canon_len;
3251 end = data->fragment + data->fragment_len;
3252 for(ptr = data->fragment; ptr < end; ++ptr) {
3253 if(*ptr == '%') {
3254 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
3255 WCHAR val = decode_pct_val(ptr);
3256 if(is_unreserved(val)) {
3257 if(!computeOnly)
3258 uri->canon_uri[uri->canon_len] = val;
3259 ++uri->canon_len;
3261 ptr += 2;
3262 continue;
3265 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
3266 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
3267 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
3268 if(!computeOnly)
3269 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
3270 uri->canon_len += 3;
3271 continue;
3275 if(!computeOnly)
3276 uri->canon_uri[uri->canon_len] = *ptr;
3277 ++uri->canon_len;
3280 uri->fragment_len = uri->canon_len - uri->fragment_start;
3282 if(!computeOnly)
3283 TRACE("(%p %p %x %d): Canonicalized fragment %s len=%d\n", data, uri, flags,
3284 computeOnly, debugstr_wn(uri->canon_uri+uri->fragment_start, uri->fragment_len),
3285 uri->fragment_len);
3286 return TRUE;
3289 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */
3290 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3291 uri->scheme_start = -1;
3292 uri->scheme_len = 0;
3294 if(!data->scheme) {
3295 /* The only type of URI that doesn't have to have a scheme is a relative
3296 * URI.
3298 if(!data->is_relative) {
3299 FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data,
3300 uri, flags, debugstr_w(data->uri));
3301 return FALSE;
3303 } else {
3304 if(!computeOnly) {
3305 DWORD i;
3306 INT pos = uri->canon_len;
3308 for(i = 0; i < data->scheme_len; ++i) {
3309 /* Scheme name must be lower case after canonicalization. */
3310 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]);
3313 uri->canon_uri[i + pos] = ':';
3314 uri->scheme_start = pos;
3316 TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags,
3317 debugstr_wn(uri->canon_uri+uri->scheme_start, data->scheme_len), data->scheme_len);
3320 /* This happens in both computation modes. */
3321 uri->canon_len += data->scheme_len + 1;
3322 uri->scheme_len = data->scheme_len;
3324 return TRUE;
3327 /* Computes what the length of the URI specified by the parse_data will be
3328 * after canonicalization occurs using the specified flags.
3330 * This function will return a non-zero value indicating the length of the canonicalized
3331 * URI, or -1 on error.
3333 static int compute_canonicalized_length(const parse_data *data, DWORD flags) {
3334 Uri uri;
3336 memset(&uri, 0, sizeof(Uri));
3338 TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags,
3339 debugstr_w(data->uri));
3341 if(!canonicalize_scheme(data, &uri, flags, TRUE)) {
3342 ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags);
3343 return -1;
3346 if(!canonicalize_hierpart(data, &uri, flags, TRUE)) {
3347 ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags);
3348 return -1;
3351 if(!canonicalize_query(data, &uri, flags, TRUE)) {
3352 ERR("(%p %x): Failed to compute query string length.\n", data, flags);
3353 return -1;
3356 if(!canonicalize_fragment(data, &uri, flags, TRUE)) {
3357 ERR("(%p %x): Failed to compute fragment length.\n", data, flags);
3358 return -1;
3361 TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len);
3363 return uri.canon_len;
3366 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the
3367 * canonicalization succeeds it will store all the canonicalization information
3368 * in the pointer to the Uri.
3370 * To canonicalize a URI this function first computes what the length of the URI
3371 * specified by the parse_data will be. Once this is done it will then perform the actual
3372 * canonicalization of the URI.
3374 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) {
3375 INT len;
3377 uri->canon_uri = NULL;
3378 uri->canon_size = uri->canon_len = 0;
3380 TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri));
3382 /* First try to compute the length of the URI. */
3383 len = compute_canonicalized_length(data, flags);
3384 if(len == -1) {
3385 ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags,
3386 debugstr_w(data->uri));
3387 return E_INVALIDARG;
3390 uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR));
3391 if(!uri->canon_uri)
3392 return E_OUTOFMEMORY;
3394 uri->canon_size = len;
3395 if(!canonicalize_scheme(data, uri, flags, FALSE)) {
3396 ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags);
3397 return E_INVALIDARG;
3399 uri->scheme_type = data->scheme_type;
3401 if(!canonicalize_hierpart(data, uri, flags, FALSE)) {
3402 ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags);
3403 return E_INVALIDARG;
3406 if(!canonicalize_query(data, uri, flags, FALSE)) {
3407 ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n",
3408 data, uri, flags);
3409 return E_INVALIDARG;
3412 if(!canonicalize_fragment(data, uri, flags, FALSE)) {
3413 ERR("(%p %p %x): Unable to canonicalize fragment of the URI.\n",
3414 data, uri, flags);
3415 return E_INVALIDARG;
3418 /* There's a possibility we didn't use all the space we allocated
3419 * earlier.
3421 if(uri->canon_len < uri->canon_size) {
3422 /* This happens if the URI is hierarchical and dot
3423 * segments were removed from its path.
3425 WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR));
3426 if(!tmp)
3427 return E_OUTOFMEMORY;
3429 uri->canon_uri = tmp;
3430 uri->canon_size = uri->canon_len;
3433 uri->canon_uri[uri->canon_len] = '\0';
3434 TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri));
3436 return S_OK;
3439 static HRESULT get_builder_component(LPWSTR *component, DWORD *component_len,
3440 LPCWSTR source, DWORD source_len,
3441 LPCWSTR *output, DWORD *output_len)
3443 if(!output_len) {
3444 if(output)
3445 *output = NULL;
3446 return E_POINTER;
3449 if(!output) {
3450 *output_len = 0;
3451 return E_POINTER;
3454 if(!(*component) && source) {
3455 /* Allocate 'component', and copy the contents from 'source'
3456 * into the new allocation.
3458 *component = heap_alloc((source_len+1)*sizeof(WCHAR));
3459 if(!(*component))
3460 return E_OUTOFMEMORY;
3462 memcpy(*component, source, source_len*sizeof(WCHAR));
3463 (*component)[source_len] = '\0';
3464 *component_len = source_len;
3467 *output = *component;
3468 *output_len = *component_len;
3469 return *output ? S_OK : S_FALSE;
3472 /* Allocates 'component' and copies the string from 'new_value' into 'component'.
3473 * If 'prefix' is set and 'new_value' isn't NULL, then it checks if 'new_value'
3474 * starts with 'prefix'. If it doesn't then 'prefix' is prepended to 'component'.
3476 * If everything is successful, then will set 'success_flag' in 'flags'.
3478 static HRESULT set_builder_component(LPWSTR *component, DWORD *component_len, LPCWSTR new_value,
3479 WCHAR prefix, DWORD *flags, DWORD success_flag)
3481 heap_free(*component);
3483 if(!new_value) {
3484 *component = NULL;
3485 *component_len = 0;
3486 } else {
3487 BOOL add_prefix = FALSE;
3488 DWORD len = lstrlenW(new_value);
3489 DWORD pos = 0;
3491 if(prefix && *new_value != prefix) {
3492 add_prefix = TRUE;
3493 *component = heap_alloc((len+2)*sizeof(WCHAR));
3494 } else
3495 *component = heap_alloc((len+1)*sizeof(WCHAR));
3497 if(!(*component))
3498 return E_OUTOFMEMORY;
3500 if(add_prefix)
3501 (*component)[pos++] = prefix;
3503 memcpy(*component+pos, new_value, (len+1)*sizeof(WCHAR));
3504 *component_len = len+pos;
3507 *flags |= success_flag;
3508 return S_OK;
3511 static void reset_builder(UriBuilder *builder) {
3512 if(builder->uri)
3513 IUri_Release(&builder->uri->IUri_iface);
3514 builder->uri = NULL;
3516 heap_free(builder->fragment);
3517 builder->fragment = NULL;
3518 builder->fragment_len = 0;
3520 heap_free(builder->host);
3521 builder->host = NULL;
3522 builder->host_len = 0;
3524 heap_free(builder->password);
3525 builder->password = NULL;
3526 builder->password_len = 0;
3528 heap_free(builder->path);
3529 builder->path = NULL;
3530 builder->path_len = 0;
3532 heap_free(builder->query);
3533 builder->query = NULL;
3534 builder->query_len = 0;
3536 heap_free(builder->scheme);
3537 builder->scheme = NULL;
3538 builder->scheme_len = 0;
3540 heap_free(builder->username);
3541 builder->username = NULL;
3542 builder->username_len = 0;
3544 builder->has_port = FALSE;
3545 builder->port = 0;
3546 builder->modified_props = 0;
3549 static HRESULT validate_scheme_name(const UriBuilder *builder, parse_data *data, DWORD flags) {
3550 const WCHAR *component;
3551 const WCHAR *ptr;
3552 const WCHAR **pptr;
3553 DWORD expected_len;
3555 if(builder->scheme) {
3556 ptr = builder->scheme;
3557 expected_len = builder->scheme_len;
3558 } else if(builder->uri && builder->uri->scheme_start > -1) {
3559 ptr = builder->uri->canon_uri+builder->uri->scheme_start;
3560 expected_len = builder->uri->scheme_len;
3561 } else {
3562 static const WCHAR nullW[] = {0};
3563 ptr = nullW;
3564 expected_len = 0;
3567 component = ptr;
3568 pptr = &ptr;
3569 if(parse_scheme(pptr, data, flags, ALLOW_NULL_TERM_SCHEME) &&
3570 data->scheme_len == expected_len) {
3571 if(data->scheme)
3572 TRACE("(%p %p %x): Found valid scheme component %s len=%d.\n", builder, data, flags,
3573 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len);
3574 } else {
3575 TRACE("(%p %p %x): Invalid scheme component found %s.\n", builder, data, flags,
3576 debugstr_wn(component, expected_len));
3577 return INET_E_INVALID_URL;
3580 return S_OK;
3583 static HRESULT validate_username(const UriBuilder *builder, parse_data *data, DWORD flags) {
3584 const WCHAR *ptr;
3585 const WCHAR **pptr;
3586 DWORD expected_len;
3588 if(builder->username) {
3589 ptr = builder->username;
3590 expected_len = builder->username_len;
3591 } else if(!(builder->modified_props & Uri_HAS_USER_NAME) && builder->uri &&
3592 builder->uri->userinfo_start > -1 && builder->uri->userinfo_split != 0) {
3593 /* Just use the username from the base Uri. */
3594 data->username = builder->uri->canon_uri+builder->uri->userinfo_start;
3595 data->username_len = (builder->uri->userinfo_split > -1) ?
3596 builder->uri->userinfo_split : builder->uri->userinfo_len;
3597 ptr = NULL;
3598 } else {
3599 ptr = NULL;
3600 expected_len = 0;
3603 if(ptr) {
3604 const WCHAR *component = ptr;
3605 pptr = &ptr;
3606 if(parse_username(pptr, data, flags, ALLOW_NULL_TERM_USER_NAME) &&
3607 data->username_len == expected_len)
3608 TRACE("(%p %p %x): Found valid username component %s len=%d.\n", builder, data, flags,
3609 debugstr_wn(data->username, data->username_len), data->username_len);
3610 else {
3611 TRACE("(%p %p %x): Invalid username component found %s.\n", builder, data, flags,
3612 debugstr_wn(component, expected_len));
3613 return INET_E_INVALID_URL;
3617 return S_OK;
3620 static HRESULT validate_password(const UriBuilder *builder, parse_data *data, DWORD flags) {
3621 const WCHAR *ptr;
3622 const WCHAR **pptr;
3623 DWORD expected_len;
3625 if(builder->password) {
3626 ptr = builder->password;
3627 expected_len = builder->password_len;
3628 } else if(!(builder->modified_props & Uri_HAS_PASSWORD) && builder->uri &&
3629 builder->uri->userinfo_split > -1) {
3630 data->password = builder->uri->canon_uri+builder->uri->userinfo_start+builder->uri->userinfo_split+1;
3631 data->password_len = builder->uri->userinfo_len-builder->uri->userinfo_split-1;
3632 ptr = NULL;
3633 } else {
3634 ptr = NULL;
3635 expected_len = 0;
3638 if(ptr) {
3639 const WCHAR *component = ptr;
3640 pptr = &ptr;
3641 if(parse_password(pptr, data, flags, ALLOW_NULL_TERM_PASSWORD) &&
3642 data->password_len == expected_len)
3643 TRACE("(%p %p %x): Found valid password component %s len=%d.\n", builder, data, flags,
3644 debugstr_wn(data->password, data->password_len), data->password_len);
3645 else {
3646 TRACE("(%p %p %x): Invalid password component found %s.\n", builder, data, flags,
3647 debugstr_wn(component, expected_len));
3648 return INET_E_INVALID_URL;
3652 return S_OK;
3655 static HRESULT validate_userinfo(const UriBuilder *builder, parse_data *data, DWORD flags) {
3656 HRESULT hr;
3658 hr = validate_username(builder, data, flags);
3659 if(FAILED(hr))
3660 return hr;
3662 hr = validate_password(builder, data, flags);
3663 if(FAILED(hr))
3664 return hr;
3666 return S_OK;
3669 static HRESULT validate_host(const UriBuilder *builder, parse_data *data, DWORD flags) {
3670 const WCHAR *ptr;
3671 const WCHAR **pptr;
3672 DWORD expected_len;
3674 if(builder->host) {
3675 ptr = builder->host;
3676 expected_len = builder->host_len;
3677 } else if(!(builder->modified_props & Uri_HAS_HOST) && builder->uri && builder->uri->host_start > -1) {
3678 ptr = builder->uri->canon_uri + builder->uri->host_start;
3679 expected_len = builder->uri->host_len;
3680 } else
3681 ptr = NULL;
3683 if(ptr) {
3684 const WCHAR *component = ptr;
3685 DWORD extras = ALLOW_BRACKETLESS_IP_LITERAL|IGNORE_PORT_DELIMITER|SKIP_IP_FUTURE_CHECK;
3686 pptr = &ptr;
3688 if(parse_host(pptr, data, flags, extras) && data->host_len == expected_len)
3689 TRACE("(%p %p %x): Found valid host name %s len=%d type=%d.\n", builder, data, flags,
3690 debugstr_wn(data->host, data->host_len), data->host_len, data->host_type);
3691 else {
3692 TRACE("(%p %p %x): Invalid host name found %s.\n", builder, data, flags,
3693 debugstr_wn(component, expected_len));
3694 return INET_E_INVALID_URL;
3698 return S_OK;
3701 static void setup_port(const UriBuilder *builder, parse_data *data, DWORD flags) {
3702 if(builder->modified_props & Uri_HAS_PORT) {
3703 if(builder->has_port) {
3704 data->has_port = TRUE;
3705 data->port_value = builder->port;
3707 } else if(builder->uri && builder->uri->has_port) {
3708 data->has_port = TRUE;
3709 data->port_value = builder->uri->port;
3712 if(data->has_port)
3713 TRACE("(%p %p %x): Using %u as port for IUri.\n", builder, data, flags, data->port_value);
3716 static HRESULT validate_path(const UriBuilder *builder, parse_data *data, DWORD flags) {
3717 const WCHAR *ptr = NULL;
3718 const WCHAR *component;
3719 const WCHAR **pptr;
3720 DWORD expected_len;
3721 BOOL check_len = TRUE;
3722 BOOL valid = FALSE;
3724 if(builder->path) {
3725 ptr = builder->path;
3726 expected_len = builder->path_len;
3727 } else if(!(builder->modified_props & Uri_HAS_PATH) &&
3728 builder->uri && builder->uri->path_start > -1) {
3729 ptr = builder->uri->canon_uri+builder->uri->path_start;
3730 expected_len = builder->uri->path_len;
3731 } else {
3732 static const WCHAR nullW[] = {0};
3733 ptr = nullW;
3734 check_len = FALSE;
3735 expected_len = -1;
3738 component = ptr;
3739 pptr = &ptr;
3741 /* How the path is validated depends on what type of
3742 * URI it is.
3744 valid = data->is_opaque ?
3745 parse_path_opaque(pptr, data, flags) : parse_path_hierarchical(pptr, data, flags);
3747 if(!valid || (check_len && expected_len != data->path_len)) {
3748 TRACE("(%p %p %x): Invalid path component %s.\n", builder, data, flags,
3749 debugstr_wn(component, expected_len) );
3750 return INET_E_INVALID_URL;
3753 TRACE("(%p %p %x): Valid path component %s len=%d.\n", builder, data, flags,
3754 debugstr_wn(data->path, data->path_len), data->path_len);
3756 return S_OK;
3759 static HRESULT validate_query(const UriBuilder *builder, parse_data *data, DWORD flags) {
3760 const WCHAR *ptr = NULL;
3761 const WCHAR **pptr;
3762 DWORD expected_len;
3764 if(builder->query) {
3765 ptr = builder->query;
3766 expected_len = builder->query_len;
3767 } else if(!(builder->modified_props & Uri_HAS_QUERY) && builder->uri &&
3768 builder->uri->query_start > -1) {
3769 ptr = builder->uri->canon_uri+builder->uri->query_start;
3770 expected_len = builder->uri->query_len;
3773 if(ptr) {
3774 const WCHAR *component = ptr;
3775 pptr = &ptr;
3777 if(parse_query(pptr, data, flags) && expected_len == data->query_len)
3778 TRACE("(%p %p %x): Valid query component %s len=%d.\n", builder, data, flags,
3779 debugstr_wn(data->query, data->query_len), data->query_len);
3780 else {
3781 TRACE("(%p %p %x): Invalid query component %s.\n", builder, data, flags,
3782 debugstr_wn(component, expected_len));
3783 return INET_E_INVALID_URL;
3787 return S_OK;
3790 static HRESULT validate_fragment(const UriBuilder *builder, parse_data *data, DWORD flags) {
3791 const WCHAR *ptr = NULL;
3792 const WCHAR **pptr;
3793 DWORD expected_len;
3795 if(builder->fragment) {
3796 ptr = builder->fragment;
3797 expected_len = builder->fragment_len;
3798 } else if(!(builder->modified_props & Uri_HAS_FRAGMENT) && builder->uri &&
3799 builder->uri->fragment_start > -1) {
3800 ptr = builder->uri->canon_uri+builder->uri->fragment_start;
3801 expected_len = builder->uri->fragment_len;
3804 if(ptr) {
3805 const WCHAR *component = ptr;
3806 pptr = &ptr;
3808 if(parse_fragment(pptr, data, flags) && expected_len == data->fragment_len)
3809 TRACE("(%p %p %x): Valid fragment component %s len=%d.\n", builder, data, flags,
3810 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len);
3811 else {
3812 TRACE("(%p %p %x): Invalid fragment component %s.\n", builder, data, flags,
3813 debugstr_wn(component, expected_len));
3814 return INET_E_INVALID_URL;
3818 return S_OK;
3821 static HRESULT validate_components(const UriBuilder *builder, parse_data *data, DWORD flags) {
3822 HRESULT hr;
3824 memset(data, 0, sizeof(parse_data));
3826 TRACE("(%p %p %x): Beginning to validate builder components.\n", builder, data, flags);
3828 hr = validate_scheme_name(builder, data, flags);
3829 if(FAILED(hr))
3830 return hr;
3832 /* Extra validation for file schemes. */
3833 if(data->scheme_type == URL_SCHEME_FILE) {
3834 if((builder->password || (builder->uri && builder->uri->userinfo_split > -1)) ||
3835 (builder->username || (builder->uri && builder->uri->userinfo_start > -1))) {
3836 TRACE("(%p %p %x): File schemes can't contain a username or password.\n",
3837 builder, data, flags);
3838 return INET_E_INVALID_URL;
3842 hr = validate_userinfo(builder, data, flags);
3843 if(FAILED(hr))
3844 return hr;
3846 hr = validate_host(builder, data, flags);
3847 if(FAILED(hr))
3848 return hr;
3850 setup_port(builder, data, flags);
3852 /* The URI is opaque if it doesn't have an authority component. */
3853 if(!data->is_relative)
3854 data->is_opaque = !data->username && !data->password && !data->host && !data->has_port
3855 && data->scheme_type != URL_SCHEME_FILE;
3856 else
3857 data->is_opaque = !data->host && !data->has_port;
3859 hr = validate_path(builder, data, flags);
3860 if(FAILED(hr))
3861 return hr;
3863 hr = validate_query(builder, data, flags);
3864 if(FAILED(hr))
3865 return hr;
3867 hr = validate_fragment(builder, data, flags);
3868 if(FAILED(hr))
3869 return hr;
3871 TRACE("(%p %p %x): Finished validating builder components.\n", builder, data, flags);
3873 return S_OK;
3876 static HRESULT compare_file_paths(const Uri *a, const Uri *b, BOOL *ret)
3878 WCHAR *canon_path_a, *canon_path_b;
3879 DWORD len_a, len_b;
3881 if(!a->path_len) {
3882 *ret = !b->path_len;
3883 return S_OK;
3886 if(!b->path_len) {
3887 *ret = FALSE;
3888 return S_OK;
3891 /* Fast path */
3892 if(a->path_len == b->path_len && !memicmpW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len)) {
3893 *ret = TRUE;
3894 return S_OK;
3897 len_a = canonicalize_path_hierarchical(a->canon_uri+a->path_start, a->path_len, a->scheme_type, FALSE, 0, FALSE, NULL);
3898 len_b = canonicalize_path_hierarchical(b->canon_uri+b->path_start, b->path_len, b->scheme_type, FALSE, 0, FALSE, NULL);
3900 canon_path_a = heap_alloc(len_a*sizeof(WCHAR));
3901 if(!canon_path_a)
3902 return E_OUTOFMEMORY;
3903 canon_path_b = heap_alloc(len_b*sizeof(WCHAR));
3904 if(!canon_path_b) {
3905 heap_free(canon_path_a);
3906 return E_OUTOFMEMORY;
3909 len_a = canonicalize_path_hierarchical(a->canon_uri+a->path_start, a->path_len, a->scheme_type, FALSE, 0, FALSE, canon_path_a);
3910 len_b = canonicalize_path_hierarchical(b->canon_uri+b->path_start, b->path_len, b->scheme_type, FALSE, 0, FALSE, canon_path_b);
3912 *ret = len_a == len_b && !memicmpW(canon_path_a, canon_path_b, len_a);
3914 heap_free(canon_path_a);
3915 heap_free(canon_path_b);
3916 return S_OK;
3919 /* Checks if the two Uri's are logically equivalent. It's a simple
3920 * comparison, since they are both of type Uri, and it can access
3921 * the properties of each Uri directly without the need to go
3922 * through the "IUri_Get*" interface calls.
3924 static HRESULT compare_uris(const Uri *a, const Uri *b, BOOL *ret) {
3925 const BOOL known_scheme = a->scheme_type != URL_SCHEME_UNKNOWN;
3926 const BOOL are_hierarchical = a->authority_start > -1 && b->authority_start > -1;
3927 HRESULT hres;
3929 *ret = FALSE;
3931 if(a->scheme_type != b->scheme_type)
3932 return S_OK;
3934 /* Only compare the scheme names (if any) if their unknown scheme types. */
3935 if(!known_scheme) {
3936 if((a->scheme_start > -1 && b->scheme_start > -1) &&
3937 (a->scheme_len == b->scheme_len)) {
3938 /* Make sure the schemes are the same. */
3939 if(StrCmpNW(a->canon_uri+a->scheme_start, b->canon_uri+b->scheme_start, a->scheme_len))
3940 return S_OK;
3941 } else if(a->scheme_len != b->scheme_len)
3942 /* One of the Uri's has a scheme name, while the other doesn't. */
3943 return S_OK;
3946 /* If they have a userinfo component, perform case sensitive compare. */
3947 if((a->userinfo_start > -1 && b->userinfo_start > -1) &&
3948 (a->userinfo_len == b->userinfo_len)) {
3949 if(StrCmpNW(a->canon_uri+a->userinfo_start, b->canon_uri+b->userinfo_start, a->userinfo_len))
3950 return S_OK;
3951 } else if(a->userinfo_len != b->userinfo_len)
3952 /* One of the Uri's had a userinfo, while the other one doesn't. */
3953 return S_OK;
3955 /* Check if they have a host name. */
3956 if((a->host_start > -1 && b->host_start > -1) &&
3957 (a->host_len == b->host_len)) {
3958 /* Perform a case insensitive compare if they are a known scheme type. */
3959 if(known_scheme) {
3960 if(StrCmpNIW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len))
3961 return S_OK;
3962 } else if(StrCmpNW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len))
3963 return S_OK;
3964 } else if(a->host_len != b->host_len)
3965 /* One of the Uri's had a host, while the other one didn't. */
3966 return S_OK;
3968 if(a->has_port && b->has_port) {
3969 if(a->port != b->port)
3970 return S_OK;
3971 } else if(a->has_port || b->has_port)
3972 /* One had a port, while the other one didn't. */
3973 return S_OK;
3975 /* Windows is weird with how it handles paths. For example
3976 * One URI could be "http://google.com" (after canonicalization)
3977 * and one could be "http://google.com/" and the IsEqual function
3978 * would still evaluate to TRUE, but, only if they are both hierarchical
3979 * URIs.
3981 if(a->scheme_type == URL_SCHEME_FILE) {
3982 BOOL cmp;
3984 hres = compare_file_paths(a, b, &cmp);
3985 if(FAILED(hres) || !cmp)
3986 return hres;
3987 } else if((a->path_start > -1 && b->path_start > -1) &&
3988 (a->path_len == b->path_len)) {
3989 if(StrCmpNW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len))
3990 return S_OK;
3991 } else if(are_hierarchical && a->path_len == -1 && b->path_len == 0) {
3992 if(*(a->canon_uri+a->path_start) != '/')
3993 return S_OK;
3994 } else if(are_hierarchical && b->path_len == 1 && a->path_len == 0) {
3995 if(*(b->canon_uri+b->path_start) != '/')
3996 return S_OK;
3997 } else if(a->path_len != b->path_len)
3998 return S_OK;
4000 /* Compare the query strings of the two URIs. */
4001 if((a->query_start > -1 && b->query_start > -1) &&
4002 (a->query_len == b->query_len)) {
4003 if(StrCmpNW(a->canon_uri+a->query_start, b->canon_uri+b->query_start, a->query_len))
4004 return S_OK;
4005 } else if(a->query_len != b->query_len)
4006 return S_OK;
4008 if((a->fragment_start > -1 && b->fragment_start > -1) &&
4009 (a->fragment_len == b->fragment_len)) {
4010 if(StrCmpNW(a->canon_uri+a->fragment_start, b->canon_uri+b->fragment_start, a->fragment_len))
4011 return S_OK;
4012 } else if(a->fragment_len != b->fragment_len)
4013 return S_OK;
4015 /* If we get here, the two URIs are equivalent. */
4016 *ret = TRUE;
4017 return S_OK;
4020 static void convert_to_dos_path(const WCHAR *path, DWORD path_len,
4021 WCHAR *output, DWORD *output_len)
4023 const WCHAR *ptr = path;
4025 if(path_len > 3 && *ptr == '/' && is_drive_path(path+1))
4026 /* Skip over the leading / before the drive path. */
4027 ++ptr;
4029 for(; ptr < path+path_len; ++ptr) {
4030 if(*ptr == '/') {
4031 if(output)
4032 *output++ = '\\';
4033 (*output_len)++;
4034 } else {
4035 if(output)
4036 *output++ = *ptr;
4037 (*output_len)++;
4042 /* Generates a raw uri string using the parse_data. */
4043 static DWORD generate_raw_uri(const parse_data *data, BSTR uri, DWORD flags) {
4044 DWORD length = 0;
4046 if(data->scheme) {
4047 if(uri) {
4048 memcpy(uri, data->scheme, data->scheme_len*sizeof(WCHAR));
4049 uri[data->scheme_len] = ':';
4051 length += data->scheme_len+1;
4054 if(!data->is_opaque) {
4055 /* For the "//" which appears before the authority component. */
4056 if(uri) {
4057 uri[length] = '/';
4058 uri[length+1] = '/';
4060 length += 2;
4062 /* Check if we need to add the "\\" before the host name
4063 * of a UNC server name in a DOS path.
4065 if(flags & RAW_URI_CONVERT_TO_DOS_PATH &&
4066 data->scheme_type == URL_SCHEME_FILE && data->host) {
4067 if(uri) {
4068 uri[length] = '\\';
4069 uri[length+1] = '\\';
4071 length += 2;
4075 if(data->username) {
4076 if(uri)
4077 memcpy(uri+length, data->username, data->username_len*sizeof(WCHAR));
4078 length += data->username_len;
4081 if(data->password) {
4082 if(uri) {
4083 uri[length] = ':';
4084 memcpy(uri+length+1, data->password, data->password_len*sizeof(WCHAR));
4086 length += data->password_len+1;
4089 if(data->password || data->username) {
4090 if(uri)
4091 uri[length] = '@';
4092 ++length;
4095 if(data->host) {
4096 /* IPv6 addresses get the brackets added around them if they don't already
4097 * have them.
4099 const BOOL add_brackets = data->host_type == Uri_HOST_IPV6 && *(data->host) != '[';
4100 if(add_brackets) {
4101 if(uri)
4102 uri[length] = '[';
4103 ++length;
4106 if(uri)
4107 memcpy(uri+length, data->host, data->host_len*sizeof(WCHAR));
4108 length += data->host_len;
4110 if(add_brackets) {
4111 if(uri)
4112 uri[length] = ']';
4113 length++;
4117 if(data->has_port) {
4118 /* The port isn't included in the raw uri if it's the default
4119 * port for the scheme type.
4121 DWORD i;
4122 BOOL is_default = FALSE;
4124 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
4125 if(data->scheme_type == default_ports[i].scheme &&
4126 data->port_value == default_ports[i].port)
4127 is_default = TRUE;
4130 if(!is_default || flags & RAW_URI_FORCE_PORT_DISP) {
4131 if(uri)
4132 uri[length] = ':';
4133 ++length;
4135 if(uri)
4136 length += ui2str(uri+length, data->port_value);
4137 else
4138 length += ui2str(NULL, data->port_value);
4142 /* Check if a '/' should be added before the path for hierarchical URIs. */
4143 if(!data->is_opaque && data->path && *(data->path) != '/') {
4144 if(uri)
4145 uri[length] = '/';
4146 ++length;
4149 if(data->path) {
4150 if(!data->is_opaque && data->scheme_type == URL_SCHEME_FILE &&
4151 flags & RAW_URI_CONVERT_TO_DOS_PATH) {
4152 DWORD len = 0;
4154 if(uri)
4155 convert_to_dos_path(data->path, data->path_len, uri+length, &len);
4156 else
4157 convert_to_dos_path(data->path, data->path_len, NULL, &len);
4159 length += len;
4160 } else {
4161 if(uri)
4162 memcpy(uri+length, data->path, data->path_len*sizeof(WCHAR));
4163 length += data->path_len;
4167 if(data->query) {
4168 if(uri)
4169 memcpy(uri+length, data->query, data->query_len*sizeof(WCHAR));
4170 length += data->query_len;
4173 if(data->fragment) {
4174 if(uri)
4175 memcpy(uri+length, data->fragment, data->fragment_len*sizeof(WCHAR));
4176 length += data->fragment_len;
4179 if(uri)
4180 TRACE("(%p %p): Generated raw uri=%s len=%d\n", data, uri, debugstr_wn(uri, length), length);
4181 else
4182 TRACE("(%p %p): Computed raw uri len=%d\n", data, uri, length);
4184 return length;
4187 static HRESULT generate_uri(const UriBuilder *builder, const parse_data *data, Uri *uri, DWORD flags) {
4188 HRESULT hr;
4189 DWORD length = generate_raw_uri(data, NULL, 0);
4190 uri->raw_uri = SysAllocStringLen(NULL, length);
4191 if(!uri->raw_uri)
4192 return E_OUTOFMEMORY;
4194 generate_raw_uri(data, uri->raw_uri, 0);
4196 hr = canonicalize_uri(data, uri, flags);
4197 if(FAILED(hr)) {
4198 if(hr == E_INVALIDARG)
4199 return INET_E_INVALID_URL;
4200 return hr;
4203 uri->create_flags = flags;
4204 return S_OK;
4207 static inline Uri* impl_from_IUri(IUri *iface)
4209 return CONTAINING_RECORD(iface, Uri, IUri_iface);
4212 static inline void destroy_uri_obj(Uri *This)
4214 SysFreeString(This->raw_uri);
4215 heap_free(This->canon_uri);
4216 heap_free(This);
4219 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv)
4221 Uri *This = impl_from_IUri(iface);
4223 if(IsEqualGUID(&IID_IUnknown, riid)) {
4224 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
4225 *ppv = &This->IUri_iface;
4226 }else if(IsEqualGUID(&IID_IUri, riid)) {
4227 TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
4228 *ppv = &This->IUri_iface;
4229 }else if(IsEqualGUID(&IID_IUriBuilderFactory, riid)) {
4230 TRACE("(%p)->(IID_IUriBuilderFactory %p)\n", This, ppv);
4231 *ppv = &This->IUriBuilderFactory_iface;
4232 }else if(IsEqualGUID(&IID_IPersistStream, riid)) {
4233 TRACE("(%p)->(IID_IPersistStream %p)\n", This, ppv);
4234 *ppv = &This->IPersistStream_iface;
4235 }else if(IsEqualGUID(&IID_IMarshal, riid)) {
4236 TRACE("(%p)->(IID_IMarshal %p)\n", This, ppv);
4237 *ppv = &This->IMarshal_iface;
4238 }else if(IsEqualGUID(&IID_IUriObj, riid)) {
4239 TRACE("(%p)->(IID_IUriObj %p)\n", This, ppv);
4240 *ppv = This;
4241 return S_OK;
4242 }else {
4243 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
4244 *ppv = NULL;
4245 return E_NOINTERFACE;
4248 IUnknown_AddRef((IUnknown*)*ppv);
4249 return S_OK;
4252 static ULONG WINAPI Uri_AddRef(IUri *iface)
4254 Uri *This = impl_from_IUri(iface);
4255 LONG ref = InterlockedIncrement(&This->ref);
4257 TRACE("(%p) ref=%d\n", This, ref);
4259 return ref;
4262 static ULONG WINAPI Uri_Release(IUri *iface)
4264 Uri *This = impl_from_IUri(iface);
4265 LONG ref = InterlockedDecrement(&This->ref);
4267 TRACE("(%p) ref=%d\n", This, ref);
4269 if(!ref)
4270 destroy_uri_obj(This);
4272 return ref;
4275 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags)
4277 Uri *This = impl_from_IUri(iface);
4278 HRESULT hres;
4279 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pbstrProperty, dwFlags);
4281 if(!This->create_flags)
4282 return E_UNEXPECTED;
4283 if(!pbstrProperty)
4284 return E_POINTER;
4286 if(uriProp > Uri_PROPERTY_STRING_LAST) {
4287 /* It only returns S_FALSE for the ZONE property... */
4288 if(uriProp == Uri_PROPERTY_ZONE) {
4289 *pbstrProperty = SysAllocStringLen(NULL, 0);
4290 if(!(*pbstrProperty))
4291 return E_OUTOFMEMORY;
4292 return S_FALSE;
4295 *pbstrProperty = NULL;
4296 return E_INVALIDARG;
4299 /* Don't have support for flags yet. */
4300 if(dwFlags) {
4301 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
4302 return E_NOTIMPL;
4305 switch(uriProp) {
4306 case Uri_PROPERTY_ABSOLUTE_URI:
4307 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) {
4308 *pbstrProperty = SysAllocStringLen(NULL, 0);
4309 hres = S_FALSE;
4310 } else {
4311 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) {
4312 if(This->userinfo_len == 0) {
4313 /* Don't include the '@' after the userinfo component. */
4314 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-1);
4315 hres = S_OK;
4316 if(*pbstrProperty) {
4317 /* Copy everything before it. */
4318 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR));
4320 /* And everything after it. */
4321 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+1,
4322 (This->canon_len-This->userinfo_start-1)*sizeof(WCHAR));
4324 } else if(This->userinfo_split == 0 && This->userinfo_len == 1) {
4325 /* Don't include the ":@" */
4326 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-2);
4327 hres = S_OK;
4328 if(*pbstrProperty) {
4329 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR));
4330 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+2,
4331 (This->canon_len-This->userinfo_start-2)*sizeof(WCHAR));
4333 } else {
4334 *pbstrProperty = SysAllocString(This->canon_uri);
4335 hres = S_OK;
4337 } else {
4338 *pbstrProperty = SysAllocString(This->canon_uri);
4339 hres = S_OK;
4343 if(!(*pbstrProperty))
4344 hres = E_OUTOFMEMORY;
4346 break;
4347 case Uri_PROPERTY_AUTHORITY:
4348 if(This->authority_start > -1) {
4349 if(This->port_offset > -1 && is_default_port(This->scheme_type, This->port) &&
4350 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH)
4351 /* Don't include the port in the authority component. */
4352 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->port_offset);
4353 else
4354 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len);
4355 hres = S_OK;
4356 } else {
4357 *pbstrProperty = SysAllocStringLen(NULL, 0);
4358 hres = S_FALSE;
4361 if(!(*pbstrProperty))
4362 hres = E_OUTOFMEMORY;
4364 break;
4365 case Uri_PROPERTY_DISPLAY_URI:
4366 /* The Display URI contains everything except for the userinfo for known
4367 * scheme types.
4369 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) {
4370 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-This->userinfo_len);
4372 if(*pbstrProperty) {
4373 /* Copy everything before the userinfo over. */
4374 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR));
4375 /* Copy everything after the userinfo over. */
4376 memcpy(*pbstrProperty+This->userinfo_start,
4377 This->canon_uri+This->userinfo_start+This->userinfo_len+1,
4378 (This->canon_len-(This->userinfo_start+This->userinfo_len+1))*sizeof(WCHAR));
4380 } else
4381 *pbstrProperty = SysAllocString(This->canon_uri);
4383 if(!(*pbstrProperty))
4384 hres = E_OUTOFMEMORY;
4385 else
4386 hres = S_OK;
4388 break;
4389 case Uri_PROPERTY_DOMAIN:
4390 if(This->domain_offset > -1) {
4391 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset,
4392 This->host_len-This->domain_offset);
4393 hres = S_OK;
4394 } else {
4395 *pbstrProperty = SysAllocStringLen(NULL, 0);
4396 hres = S_FALSE;
4399 if(!(*pbstrProperty))
4400 hres = E_OUTOFMEMORY;
4402 break;
4403 case Uri_PROPERTY_EXTENSION:
4404 if(This->extension_offset > -1) {
4405 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start+This->extension_offset,
4406 This->path_len-This->extension_offset);
4407 hres = S_OK;
4408 } else {
4409 *pbstrProperty = SysAllocStringLen(NULL, 0);
4410 hres = S_FALSE;
4413 if(!(*pbstrProperty))
4414 hres = E_OUTOFMEMORY;
4416 break;
4417 case Uri_PROPERTY_FRAGMENT:
4418 if(This->fragment_start > -1) {
4419 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->fragment_start, This->fragment_len);
4420 hres = S_OK;
4421 } else {
4422 *pbstrProperty = SysAllocStringLen(NULL, 0);
4423 hres = S_FALSE;
4426 if(!(*pbstrProperty))
4427 hres = E_OUTOFMEMORY;
4429 break;
4430 case Uri_PROPERTY_HOST:
4431 if(This->host_start > -1) {
4432 /* The '[' and ']' aren't included for IPv6 addresses. */
4433 if(This->host_type == Uri_HOST_IPV6)
4434 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2);
4435 else
4436 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len);
4438 hres = S_OK;
4439 } else {
4440 *pbstrProperty = SysAllocStringLen(NULL, 0);
4441 hres = S_FALSE;
4444 if(!(*pbstrProperty))
4445 hres = E_OUTOFMEMORY;
4447 break;
4448 case Uri_PROPERTY_PASSWORD:
4449 if(This->userinfo_split > -1) {
4450 *pbstrProperty = SysAllocStringLen(
4451 This->canon_uri+This->userinfo_start+This->userinfo_split+1,
4452 This->userinfo_len-This->userinfo_split-1);
4453 hres = S_OK;
4454 } else {
4455 *pbstrProperty = SysAllocStringLen(NULL, 0);
4456 hres = S_FALSE;
4459 if(!(*pbstrProperty))
4460 return E_OUTOFMEMORY;
4462 break;
4463 case Uri_PROPERTY_PATH:
4464 if(This->path_start > -1) {
4465 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len);
4466 hres = S_OK;
4467 } else {
4468 *pbstrProperty = SysAllocStringLen(NULL, 0);
4469 hres = S_FALSE;
4472 if(!(*pbstrProperty))
4473 hres = E_OUTOFMEMORY;
4475 break;
4476 case Uri_PROPERTY_PATH_AND_QUERY:
4477 if(This->path_start > -1) {
4478 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len+This->query_len);
4479 hres = S_OK;
4480 } else if(This->query_start > -1) {
4481 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len);
4482 hres = S_OK;
4483 } else {
4484 *pbstrProperty = SysAllocStringLen(NULL, 0);
4485 hres = S_FALSE;
4488 if(!(*pbstrProperty))
4489 hres = E_OUTOFMEMORY;
4491 break;
4492 case Uri_PROPERTY_QUERY:
4493 if(This->query_start > -1) {
4494 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len);
4495 hres = S_OK;
4496 } else {
4497 *pbstrProperty = SysAllocStringLen(NULL, 0);
4498 hres = S_FALSE;
4501 if(!(*pbstrProperty))
4502 hres = E_OUTOFMEMORY;
4504 break;
4505 case Uri_PROPERTY_RAW_URI:
4506 *pbstrProperty = SysAllocString(This->raw_uri);
4507 if(!(*pbstrProperty))
4508 hres = E_OUTOFMEMORY;
4509 else
4510 hres = S_OK;
4511 break;
4512 case Uri_PROPERTY_SCHEME_NAME:
4513 if(This->scheme_start > -1) {
4514 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len);
4515 hres = S_OK;
4516 } else {
4517 *pbstrProperty = SysAllocStringLen(NULL, 0);
4518 hres = S_FALSE;
4521 if(!(*pbstrProperty))
4522 hres = E_OUTOFMEMORY;
4524 break;
4525 case Uri_PROPERTY_USER_INFO:
4526 if(This->userinfo_start > -1) {
4527 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len);
4528 hres = S_OK;
4529 } else {
4530 *pbstrProperty = SysAllocStringLen(NULL, 0);
4531 hres = S_FALSE;
4534 if(!(*pbstrProperty))
4535 hres = E_OUTOFMEMORY;
4537 break;
4538 case Uri_PROPERTY_USER_NAME:
4539 if(This->userinfo_start > -1 && This->userinfo_split != 0) {
4540 /* If userinfo_split is set, that means a password exists
4541 * so the username is only from userinfo_start to userinfo_split.
4543 if(This->userinfo_split > -1) {
4544 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split);
4545 hres = S_OK;
4546 } else {
4547 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len);
4548 hres = S_OK;
4550 } else {
4551 *pbstrProperty = SysAllocStringLen(NULL, 0);
4552 hres = S_FALSE;
4555 if(!(*pbstrProperty))
4556 return E_OUTOFMEMORY;
4558 break;
4559 default:
4560 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
4561 hres = E_NOTIMPL;
4564 return hres;
4567 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
4569 Uri *This = impl_from_IUri(iface);
4570 HRESULT hres;
4571 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pcchProperty, dwFlags);
4573 if(!This->create_flags)
4574 return E_UNEXPECTED;
4575 if(!pcchProperty)
4576 return E_INVALIDARG;
4578 /* Can only return a length for a property if it's a string. */
4579 if(uriProp > Uri_PROPERTY_STRING_LAST)
4580 return E_INVALIDARG;
4582 /* Don't have support for flags yet. */
4583 if(dwFlags) {
4584 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
4585 return E_NOTIMPL;
4588 switch(uriProp) {
4589 case Uri_PROPERTY_ABSOLUTE_URI:
4590 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) {
4591 *pcchProperty = 0;
4592 hres = S_FALSE;
4593 } else {
4594 if(This->scheme_type != URL_SCHEME_UNKNOWN) {
4595 if(This->userinfo_start > -1 && This->userinfo_len == 0)
4596 /* Don't include the '@' in the length. */
4597 *pcchProperty = This->canon_len-1;
4598 else if(This->userinfo_start > -1 && This->userinfo_len == 1 &&
4599 This->userinfo_split == 0)
4600 /* Don't include the ":@" in the length. */
4601 *pcchProperty = This->canon_len-2;
4602 else
4603 *pcchProperty = This->canon_len;
4604 } else
4605 *pcchProperty = This->canon_len;
4607 hres = S_OK;
4610 break;
4611 case Uri_PROPERTY_AUTHORITY:
4612 if(This->port_offset > -1 &&
4613 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH &&
4614 is_default_port(This->scheme_type, This->port))
4615 /* Only count up until the port in the authority. */
4616 *pcchProperty = This->port_offset;
4617 else
4618 *pcchProperty = This->authority_len;
4619 hres = (This->authority_start > -1) ? S_OK : S_FALSE;
4620 break;
4621 case Uri_PROPERTY_DISPLAY_URI:
4622 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1)
4623 *pcchProperty = This->canon_len-This->userinfo_len-1;
4624 else
4625 *pcchProperty = This->canon_len;
4627 hres = S_OK;
4628 break;
4629 case Uri_PROPERTY_DOMAIN:
4630 if(This->domain_offset > -1)
4631 *pcchProperty = This->host_len - This->domain_offset;
4632 else
4633 *pcchProperty = 0;
4635 hres = (This->domain_offset > -1) ? S_OK : S_FALSE;
4636 break;
4637 case Uri_PROPERTY_EXTENSION:
4638 if(This->extension_offset > -1) {
4639 *pcchProperty = This->path_len - This->extension_offset;
4640 hres = S_OK;
4641 } else {
4642 *pcchProperty = 0;
4643 hres = S_FALSE;
4646 break;
4647 case Uri_PROPERTY_FRAGMENT:
4648 *pcchProperty = This->fragment_len;
4649 hres = (This->fragment_start > -1) ? S_OK : S_FALSE;
4650 break;
4651 case Uri_PROPERTY_HOST:
4652 *pcchProperty = This->host_len;
4654 /* '[' and ']' aren't included in the length. */
4655 if(This->host_type == Uri_HOST_IPV6)
4656 *pcchProperty -= 2;
4658 hres = (This->host_start > -1) ? S_OK : S_FALSE;
4659 break;
4660 case Uri_PROPERTY_PASSWORD:
4661 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0;
4662 hres = (This->userinfo_split > -1) ? S_OK : S_FALSE;
4663 break;
4664 case Uri_PROPERTY_PATH:
4665 *pcchProperty = This->path_len;
4666 hres = (This->path_start > -1) ? S_OK : S_FALSE;
4667 break;
4668 case Uri_PROPERTY_PATH_AND_QUERY:
4669 *pcchProperty = This->path_len+This->query_len;
4670 hres = (This->path_start > -1 || This->query_start > -1) ? S_OK : S_FALSE;
4671 break;
4672 case Uri_PROPERTY_QUERY:
4673 *pcchProperty = This->query_len;
4674 hres = (This->query_start > -1) ? S_OK : S_FALSE;
4675 break;
4676 case Uri_PROPERTY_RAW_URI:
4677 *pcchProperty = SysStringLen(This->raw_uri);
4678 hres = S_OK;
4679 break;
4680 case Uri_PROPERTY_SCHEME_NAME:
4681 *pcchProperty = This->scheme_len;
4682 hres = (This->scheme_start > -1) ? S_OK : S_FALSE;
4683 break;
4684 case Uri_PROPERTY_USER_INFO:
4685 *pcchProperty = This->userinfo_len;
4686 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
4687 break;
4688 case Uri_PROPERTY_USER_NAME:
4689 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len;
4690 if(This->userinfo_split == 0)
4691 hres = S_FALSE;
4692 else
4693 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
4694 break;
4695 default:
4696 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
4697 hres = E_NOTIMPL;
4700 return hres;
4703 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
4705 Uri *This = impl_from_IUri(iface);
4706 HRESULT hres;
4708 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pcchProperty, dwFlags);
4710 if(!This->create_flags)
4711 return E_UNEXPECTED;
4712 if(!pcchProperty)
4713 return E_INVALIDARG;
4715 /* Microsoft's implementation for the ZONE property of a URI seems to be lacking...
4716 * From what I can tell, instead of checking which URLZONE the URI belongs to it
4717 * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone
4718 * function.
4720 if(uriProp == Uri_PROPERTY_ZONE) {
4721 *pcchProperty = URLZONE_INVALID;
4722 return E_NOTIMPL;
4725 if(uriProp < Uri_PROPERTY_DWORD_START) {
4726 *pcchProperty = 0;
4727 return E_INVALIDARG;
4730 switch(uriProp) {
4731 case Uri_PROPERTY_HOST_TYPE:
4732 *pcchProperty = This->host_type;
4733 hres = S_OK;
4734 break;
4735 case Uri_PROPERTY_PORT:
4736 if(!This->has_port) {
4737 *pcchProperty = 0;
4738 hres = S_FALSE;
4739 } else {
4740 *pcchProperty = This->port;
4741 hres = S_OK;
4744 break;
4745 case Uri_PROPERTY_SCHEME:
4746 *pcchProperty = This->scheme_type;
4747 hres = S_OK;
4748 break;
4749 default:
4750 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
4751 hres = E_NOTIMPL;
4754 return hres;
4757 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty)
4759 Uri *This = impl_from_IUri(iface);
4761 TRACE("(%p %s)->(%d %p)\n", This, debugstr_w(This->canon_uri), uriProp, pfHasProperty);
4763 if(!pfHasProperty)
4764 return E_INVALIDARG;
4766 switch(uriProp) {
4767 case Uri_PROPERTY_ABSOLUTE_URI:
4768 *pfHasProperty = !(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI);
4769 break;
4770 case Uri_PROPERTY_AUTHORITY:
4771 *pfHasProperty = This->authority_start > -1;
4772 break;
4773 case Uri_PROPERTY_DISPLAY_URI:
4774 *pfHasProperty = TRUE;
4775 break;
4776 case Uri_PROPERTY_DOMAIN:
4777 *pfHasProperty = This->domain_offset > -1;
4778 break;
4779 case Uri_PROPERTY_EXTENSION:
4780 *pfHasProperty = This->extension_offset > -1;
4781 break;
4782 case Uri_PROPERTY_FRAGMENT:
4783 *pfHasProperty = This->fragment_start > -1;
4784 break;
4785 case Uri_PROPERTY_HOST:
4786 *pfHasProperty = This->host_start > -1;
4787 break;
4788 case Uri_PROPERTY_PASSWORD:
4789 *pfHasProperty = This->userinfo_split > -1;
4790 break;
4791 case Uri_PROPERTY_PATH:
4792 *pfHasProperty = This->path_start > -1;
4793 break;
4794 case Uri_PROPERTY_PATH_AND_QUERY:
4795 *pfHasProperty = (This->path_start > -1 || This->query_start > -1);
4796 break;
4797 case Uri_PROPERTY_QUERY:
4798 *pfHasProperty = This->query_start > -1;
4799 break;
4800 case Uri_PROPERTY_RAW_URI:
4801 *pfHasProperty = TRUE;
4802 break;
4803 case Uri_PROPERTY_SCHEME_NAME:
4804 *pfHasProperty = This->scheme_start > -1;
4805 break;
4806 case Uri_PROPERTY_USER_INFO:
4807 *pfHasProperty = This->userinfo_start > -1;
4808 break;
4809 case Uri_PROPERTY_USER_NAME:
4810 if(This->userinfo_split == 0)
4811 *pfHasProperty = FALSE;
4812 else
4813 *pfHasProperty = This->userinfo_start > -1;
4814 break;
4815 case Uri_PROPERTY_HOST_TYPE:
4816 *pfHasProperty = TRUE;
4817 break;
4818 case Uri_PROPERTY_PORT:
4819 *pfHasProperty = This->has_port;
4820 break;
4821 case Uri_PROPERTY_SCHEME:
4822 *pfHasProperty = TRUE;
4823 break;
4824 case Uri_PROPERTY_ZONE:
4825 *pfHasProperty = FALSE;
4826 break;
4827 default:
4828 FIXME("(%p)->(%d %p): Unsupported property type.\n", This, uriProp, pfHasProperty);
4829 return E_NOTIMPL;
4832 return S_OK;
4835 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri)
4837 TRACE("(%p)->(%p)\n", iface, pstrAbsoluteUri);
4838 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_ABSOLUTE_URI, pstrAbsoluteUri, 0);
4841 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority)
4843 TRACE("(%p)->(%p)\n", iface, pstrAuthority);
4844 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0);
4847 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri)
4849 TRACE("(%p)->(%p)\n", iface, pstrDisplayUri);
4850 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DISPLAY_URI, pstrDisplayUri, 0);
4853 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain)
4855 TRACE("(%p)->(%p)\n", iface, pstrDomain);
4856 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0);
4859 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension)
4861 TRACE("(%p)->(%p)\n", iface, pstrExtension);
4862 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_EXTENSION, pstrExtension, 0);
4865 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment)
4867 TRACE("(%p)->(%p)\n", iface, pstrFragment);
4868 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_FRAGMENT, pstrFragment, 0);
4871 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost)
4873 TRACE("(%p)->(%p)\n", iface, pstrHost);
4874 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0);
4877 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword)
4879 TRACE("(%p)->(%p)\n", iface, pstrPassword);
4880 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0);
4883 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath)
4885 TRACE("(%p)->(%p)\n", iface, pstrPath);
4886 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH, pstrPath, 0);
4889 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery)
4891 TRACE("(%p)->(%p)\n", iface, pstrPathAndQuery);
4892 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH_AND_QUERY, pstrPathAndQuery, 0);
4895 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery)
4897 TRACE("(%p)->(%p)\n", iface, pstrQuery);
4898 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_QUERY, pstrQuery, 0);
4901 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri)
4903 TRACE("(%p)->(%p)\n", iface, pstrRawUri);
4904 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0);
4907 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName)
4909 TRACE("(%p)->(%p)\n", iface, pstrSchemeName);
4910 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0);
4913 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo)
4915 TRACE("(%p)->(%p)\n", iface, pstrUserInfo);
4916 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0);
4919 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName)
4921 TRACE("(%p)->(%p)\n", iface, pstrUserName);
4922 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0);
4925 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType)
4927 TRACE("(%p)->(%p)\n", iface, pdwHostType);
4928 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0);
4931 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort)
4933 TRACE("(%p)->(%p)\n", iface, pdwPort);
4934 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0);
4937 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme)
4939 TRACE("(%p)->(%p)\n", iface, pdwScheme);
4940 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0);
4943 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone)
4945 TRACE("(%p)->(%p)\n", iface, pdwZone);
4946 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0);
4949 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties)
4951 Uri *This = impl_from_IUri(iface);
4952 TRACE("(%p %s)->(%p)\n", This, debugstr_w(This->canon_uri), pdwProperties);
4954 if(!This->create_flags)
4955 return E_UNEXPECTED;
4956 if(!pdwProperties)
4957 return E_INVALIDARG;
4959 /* All URIs have these. */
4960 *pdwProperties = Uri_HAS_DISPLAY_URI|Uri_HAS_RAW_URI|Uri_HAS_SCHEME|Uri_HAS_HOST_TYPE;
4962 if(!(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI))
4963 *pdwProperties |= Uri_HAS_ABSOLUTE_URI;
4965 if(This->scheme_start > -1)
4966 *pdwProperties |= Uri_HAS_SCHEME_NAME;
4968 if(This->authority_start > -1) {
4969 *pdwProperties |= Uri_HAS_AUTHORITY;
4970 if(This->userinfo_start > -1) {
4971 *pdwProperties |= Uri_HAS_USER_INFO;
4972 if(This->userinfo_split != 0)
4973 *pdwProperties |= Uri_HAS_USER_NAME;
4975 if(This->userinfo_split > -1)
4976 *pdwProperties |= Uri_HAS_PASSWORD;
4977 if(This->host_start > -1)
4978 *pdwProperties |= Uri_HAS_HOST;
4979 if(This->domain_offset > -1)
4980 *pdwProperties |= Uri_HAS_DOMAIN;
4983 if(This->has_port)
4984 *pdwProperties |= Uri_HAS_PORT;
4985 if(This->path_start > -1)
4986 *pdwProperties |= Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY;
4987 if(This->query_start > -1)
4988 *pdwProperties |= Uri_HAS_QUERY|Uri_HAS_PATH_AND_QUERY;
4990 if(This->extension_offset > -1)
4991 *pdwProperties |= Uri_HAS_EXTENSION;
4993 if(This->fragment_start > -1)
4994 *pdwProperties |= Uri_HAS_FRAGMENT;
4996 return S_OK;
4999 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual)
5001 Uri *This = impl_from_IUri(iface);
5002 Uri *other;
5004 TRACE("(%p %s)->(%p %p)\n", This, debugstr_w(This->canon_uri), pUri, pfEqual);
5006 if(!This->create_flags)
5007 return E_UNEXPECTED;
5008 if(!pfEqual)
5009 return E_POINTER;
5011 if(!pUri) {
5012 *pfEqual = FALSE;
5014 /* For some reason Windows returns S_OK here... */
5015 return S_OK;
5018 /* Try to convert it to a Uri (allows for a more simple comparison). */
5019 if(!(other = get_uri_obj(pUri))) {
5020 FIXME("(%p)->(%p %p) No support for unknown IUri's yet.\n", iface, pUri, pfEqual);
5021 return E_NOTIMPL;
5024 TRACE("comparing to %s\n", debugstr_w(other->canon_uri));
5025 return compare_uris(This, other, pfEqual);
5028 static const IUriVtbl UriVtbl = {
5029 Uri_QueryInterface,
5030 Uri_AddRef,
5031 Uri_Release,
5032 Uri_GetPropertyBSTR,
5033 Uri_GetPropertyLength,
5034 Uri_GetPropertyDWORD,
5035 Uri_HasProperty,
5036 Uri_GetAbsoluteUri,
5037 Uri_GetAuthority,
5038 Uri_GetDisplayUri,
5039 Uri_GetDomain,
5040 Uri_GetExtension,
5041 Uri_GetFragment,
5042 Uri_GetHost,
5043 Uri_GetPassword,
5044 Uri_GetPath,
5045 Uri_GetPathAndQuery,
5046 Uri_GetQuery,
5047 Uri_GetRawUri,
5048 Uri_GetSchemeName,
5049 Uri_GetUserInfo,
5050 Uri_GetUserName,
5051 Uri_GetHostType,
5052 Uri_GetPort,
5053 Uri_GetScheme,
5054 Uri_GetZone,
5055 Uri_GetProperties,
5056 Uri_IsEqual
5059 static inline Uri* impl_from_IUriBuilderFactory(IUriBuilderFactory *iface)
5061 return CONTAINING_RECORD(iface, Uri, IUriBuilderFactory_iface);
5064 static HRESULT WINAPI UriBuilderFactory_QueryInterface(IUriBuilderFactory *iface, REFIID riid, void **ppv)
5066 Uri *This = impl_from_IUriBuilderFactory(iface);
5067 return IUri_QueryInterface(&This->IUri_iface, riid, ppv);
5070 static ULONG WINAPI UriBuilderFactory_AddRef(IUriBuilderFactory *iface)
5072 Uri *This = impl_from_IUriBuilderFactory(iface);
5073 return IUri_AddRef(&This->IUri_iface);
5076 static ULONG WINAPI UriBuilderFactory_Release(IUriBuilderFactory *iface)
5078 Uri *This = impl_from_IUriBuilderFactory(iface);
5079 return IUri_Release(&This->IUri_iface);
5082 static HRESULT WINAPI UriBuilderFactory_CreateIUriBuilder(IUriBuilderFactory *iface,
5083 DWORD dwFlags,
5084 DWORD_PTR dwReserved,
5085 IUriBuilder **ppIUriBuilder)
5087 Uri *This = impl_from_IUriBuilderFactory(iface);
5088 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder);
5090 if(!ppIUriBuilder)
5091 return E_POINTER;
5093 if(dwFlags || dwReserved) {
5094 *ppIUriBuilder = NULL;
5095 return E_INVALIDARG;
5098 return CreateIUriBuilder(NULL, 0, 0, ppIUriBuilder);
5101 static HRESULT WINAPI UriBuilderFactory_CreateInitializedIUriBuilder(IUriBuilderFactory *iface,
5102 DWORD dwFlags,
5103 DWORD_PTR dwReserved,
5104 IUriBuilder **ppIUriBuilder)
5106 Uri *This = impl_from_IUriBuilderFactory(iface);
5107 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder);
5109 if(!ppIUriBuilder)
5110 return E_POINTER;
5112 if(dwFlags || dwReserved) {
5113 *ppIUriBuilder = NULL;
5114 return E_INVALIDARG;
5117 return CreateIUriBuilder(&This->IUri_iface, 0, 0, ppIUriBuilder);
5120 static const IUriBuilderFactoryVtbl UriBuilderFactoryVtbl = {
5121 UriBuilderFactory_QueryInterface,
5122 UriBuilderFactory_AddRef,
5123 UriBuilderFactory_Release,
5124 UriBuilderFactory_CreateIUriBuilder,
5125 UriBuilderFactory_CreateInitializedIUriBuilder
5128 static inline Uri* impl_from_IPersistStream(IPersistStream *iface)
5130 return CONTAINING_RECORD(iface, Uri, IPersistStream_iface);
5133 static HRESULT WINAPI PersistStream_QueryInterface(IPersistStream *iface, REFIID riid, void **ppvObject)
5135 Uri *This = impl_from_IPersistStream(iface);
5136 return IUri_QueryInterface(&This->IUri_iface, riid, ppvObject);
5139 static ULONG WINAPI PersistStream_AddRef(IPersistStream *iface)
5141 Uri *This = impl_from_IPersistStream(iface);
5142 return IUri_AddRef(&This->IUri_iface);
5145 static ULONG WINAPI PersistStream_Release(IPersistStream *iface)
5147 Uri *This = impl_from_IPersistStream(iface);
5148 return IUri_Release(&This->IUri_iface);
5151 static HRESULT WINAPI PersistStream_GetClassID(IPersistStream *iface, CLSID *pClassID)
5153 Uri *This = impl_from_IPersistStream(iface);
5154 TRACE("(%p)->(%p)\n", This, pClassID);
5156 if(!pClassID)
5157 return E_INVALIDARG;
5159 *pClassID = CLSID_CUri;
5160 return S_OK;
5163 static HRESULT WINAPI PersistStream_IsDirty(IPersistStream *iface)
5165 Uri *This = impl_from_IPersistStream(iface);
5166 TRACE("(%p)\n", This);
5167 return S_FALSE;
5170 struct persist_uri {
5171 DWORD size;
5172 DWORD unk1[2];
5173 DWORD create_flags;
5174 DWORD unk2[3];
5175 DWORD fields_no;
5176 BYTE data[1];
5179 static HRESULT WINAPI PersistStream_Load(IPersistStream *iface, IStream *pStm)
5181 Uri *This = impl_from_IPersistStream(iface);
5182 struct persist_uri *data;
5183 parse_data parse;
5184 DWORD size;
5185 HRESULT hr;
5187 TRACE("(%p)->(%p)\n", This, pStm);
5189 if(This->create_flags)
5190 return E_UNEXPECTED;
5191 if(!pStm)
5192 return E_INVALIDARG;
5194 hr = IStream_Read(pStm, &size, sizeof(DWORD), NULL);
5195 if(FAILED(hr))
5196 return hr;
5197 data = heap_alloc(size);
5198 if(!data)
5199 return E_OUTOFMEMORY;
5200 hr = IStream_Read(pStm, data->unk1, size-sizeof(DWORD)-2, NULL);
5201 if(FAILED(hr)) {
5202 heap_free(data);
5203 return hr;
5206 if(size < sizeof(struct persist_uri)) {
5207 heap_free(data);
5208 return S_OK;
5211 if(*(DWORD*)data->data != Uri_PROPERTY_RAW_URI) {
5212 heap_free(data);
5213 ERR("Can't find raw_uri\n");
5214 return E_UNEXPECTED;
5217 This->raw_uri = SysAllocString((WCHAR*)(data->data+sizeof(DWORD)*2));
5218 if(!This->raw_uri) {
5219 heap_free(data);
5220 return E_OUTOFMEMORY;
5222 This->create_flags = data->create_flags;
5223 heap_free(data);
5224 TRACE("%x %s\n", This->create_flags, debugstr_w(This->raw_uri));
5226 memset(&parse, 0, sizeof(parse_data));
5227 parse.uri = This->raw_uri;
5228 if(!parse_uri(&parse, This->create_flags)) {
5229 SysFreeString(This->raw_uri);
5230 This->create_flags = 0;
5231 return E_UNEXPECTED;
5234 hr = canonicalize_uri(&parse, This, This->create_flags);
5235 if(FAILED(hr)) {
5236 SysFreeString(This->raw_uri);
5237 This->create_flags = 0;
5238 return hr;
5241 return S_OK;
5244 static inline BYTE* persist_stream_add_strprop(Uri *This, BYTE *p, DWORD type, DWORD len, WCHAR *data)
5246 len *= sizeof(WCHAR);
5247 *(DWORD*)p = type;
5248 p += sizeof(DWORD);
5249 *(DWORD*)p = len+sizeof(WCHAR);
5250 p += sizeof(DWORD);
5251 memcpy(p, data, len);
5252 p += len;
5253 *(WCHAR*)p = 0;
5254 return p+sizeof(WCHAR);
5257 static inline void persist_stream_save(Uri *This, IStream *pStm, BOOL marshal, struct persist_uri *data)
5259 BYTE *p = NULL;
5261 data->create_flags = This->create_flags;
5263 if(This->create_flags) {
5264 data->fields_no = 1;
5265 p = persist_stream_add_strprop(This, data->data, Uri_PROPERTY_RAW_URI,
5266 SysStringLen(This->raw_uri), This->raw_uri);
5268 if(This->scheme_type!=URL_SCHEME_HTTP && This->scheme_type!=URL_SCHEME_HTTPS
5269 && This->scheme_type!=URL_SCHEME_FTP)
5270 return;
5272 if(This->fragment_len) {
5273 data->fields_no++;
5274 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_FRAGMENT,
5275 This->fragment_len, This->canon_uri+This->fragment_start);
5278 if(This->host_len) {
5279 data->fields_no++;
5280 if(This->host_type == Uri_HOST_IPV6)
5281 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_HOST,
5282 This->host_len-2, This->canon_uri+This->host_start+1);
5283 else
5284 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_HOST,
5285 This->host_len, This->canon_uri+This->host_start);
5288 if(This->userinfo_split > -1) {
5289 data->fields_no++;
5290 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PASSWORD,
5291 This->userinfo_len-This->userinfo_split-1,
5292 This->canon_uri+This->userinfo_start+This->userinfo_split+1);
5295 if(This->path_len) {
5296 data->fields_no++;
5297 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PATH,
5298 This->path_len, This->canon_uri+This->path_start);
5299 } else if(marshal) {
5300 WCHAR no_path = '/';
5301 data->fields_no++;
5302 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PATH, 1, &no_path);
5305 if(This->has_port) {
5306 data->fields_no++;
5307 *(DWORD*)p = Uri_PROPERTY_PORT;
5308 p += sizeof(DWORD);
5309 *(DWORD*)p = sizeof(DWORD);
5310 p += sizeof(DWORD);
5311 *(DWORD*)p = This->port;
5312 p += sizeof(DWORD);
5315 if(This->query_len) {
5316 data->fields_no++;
5317 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_QUERY,
5318 This->query_len, This->canon_uri+This->query_start);
5321 if(This->scheme_len) {
5322 data->fields_no++;
5323 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_SCHEME_NAME,
5324 This->scheme_len, This->canon_uri+This->scheme_start);
5327 if(This->userinfo_start>-1 && This->userinfo_split!=0) {
5328 data->fields_no++;
5329 if(This->userinfo_split > -1)
5330 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_USER_NAME,
5331 This->userinfo_split, This->canon_uri+This->userinfo_start);
5332 else
5333 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_USER_NAME,
5334 This->userinfo_len, This->canon_uri+This->userinfo_start);
5338 static HRESULT WINAPI PersistStream_Save(IPersistStream *iface, IStream *pStm, BOOL fClearDirty)
5340 Uri *This = impl_from_IPersistStream(iface);
5341 struct persist_uri *data;
5342 ULARGE_INTEGER size;
5343 HRESULT hres;
5345 TRACE("(%p)->(%p %x)\n", This, pStm, fClearDirty);
5347 if(!pStm)
5348 return E_INVALIDARG;
5350 hres = IPersistStream_GetSizeMax(&This->IPersistStream_iface, &size);
5351 if(FAILED(hres))
5352 return hres;
5354 data = heap_alloc_zero(size.u.LowPart);
5355 if(!data)
5356 return E_OUTOFMEMORY;
5357 data->size = size.u.LowPart;
5358 persist_stream_save(This, pStm, FALSE, data);
5360 hres = IStream_Write(pStm, data, data->size-2, NULL);
5361 heap_free(data);
5362 return hres;
5365 static HRESULT WINAPI PersistStream_GetSizeMax(IPersistStream *iface, ULARGE_INTEGER *pcbSize)
5367 Uri *This = impl_from_IPersistStream(iface);
5368 TRACE("(%p)->(%p)\n", This, pcbSize);
5370 if(!pcbSize)
5371 return E_INVALIDARG;
5373 pcbSize->u.LowPart = 2+sizeof(struct persist_uri);
5374 pcbSize->u.HighPart = 0;
5375 if(This->create_flags)
5376 pcbSize->u.LowPart += (SysStringLen(This->raw_uri)+1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5377 else /* there's no place for fields no */
5378 pcbSize->u.LowPart -= sizeof(DWORD);
5379 if(This->scheme_type!=URL_SCHEME_HTTP && This->scheme_type!=URL_SCHEME_HTTPS
5380 && This->scheme_type!=URL_SCHEME_FTP)
5381 return S_OK;
5383 if(This->fragment_len)
5384 pcbSize->u.LowPart += (This->fragment_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5385 if(This->host_len) {
5386 if(This->host_type == Uri_HOST_IPV6)
5387 pcbSize->u.LowPart += (This->host_len-1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5388 else
5389 pcbSize->u.LowPart += (This->host_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5391 if(This->userinfo_split > -1)
5392 pcbSize->u.LowPart += (This->userinfo_len-This->userinfo_split)*sizeof(WCHAR) + 2*sizeof(DWORD);
5393 if(This->path_len)
5394 pcbSize->u.LowPart += (This->path_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5395 if(This->has_port)
5396 pcbSize->u.LowPart += 3*sizeof(DWORD);
5397 if(This->query_len)
5398 pcbSize->u.LowPart += (This->query_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5399 if(This->scheme_len)
5400 pcbSize->u.LowPart += (This->scheme_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5401 if(This->userinfo_start>-1 && This->userinfo_split!=0) {
5402 if(This->userinfo_split > -1)
5403 pcbSize->u.LowPart += (This->userinfo_split+1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5404 else
5405 pcbSize->u.LowPart += (This->userinfo_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD);
5407 return S_OK;
5410 static const IPersistStreamVtbl PersistStreamVtbl = {
5411 PersistStream_QueryInterface,
5412 PersistStream_AddRef,
5413 PersistStream_Release,
5414 PersistStream_GetClassID,
5415 PersistStream_IsDirty,
5416 PersistStream_Load,
5417 PersistStream_Save,
5418 PersistStream_GetSizeMax
5421 static inline Uri* impl_from_IMarshal(IMarshal *iface)
5423 return CONTAINING_RECORD(iface, Uri, IMarshal_iface);
5426 static HRESULT WINAPI Marshal_QueryInterface(IMarshal *iface, REFIID riid, void **ppvObject)
5428 Uri *This = impl_from_IMarshal(iface);
5429 return IUri_QueryInterface(&This->IUri_iface, riid, ppvObject);
5432 static ULONG WINAPI Marshal_AddRef(IMarshal *iface)
5434 Uri *This = impl_from_IMarshal(iface);
5435 return IUri_AddRef(&This->IUri_iface);
5438 static ULONG WINAPI Marshal_Release(IMarshal *iface)
5440 Uri *This = impl_from_IMarshal(iface);
5441 return IUri_Release(&This->IUri_iface);
5444 static HRESULT WINAPI Marshal_GetUnmarshalClass(IMarshal *iface, REFIID riid, void *pv,
5445 DWORD dwDestContext, void *pvDestContext, DWORD mshlflags, CLSID *pCid)
5447 Uri *This = impl_from_IMarshal(iface);
5448 TRACE("(%p)->(%s %p %x %p %x %p)\n", This, debugstr_guid(riid), pv,
5449 dwDestContext, pvDestContext, mshlflags, pCid);
5451 if(!pCid || (dwDestContext!=MSHCTX_LOCAL && dwDestContext!=MSHCTX_NOSHAREDMEM
5452 && dwDestContext!=MSHCTX_INPROC))
5453 return E_INVALIDARG;
5455 *pCid = CLSID_CUri;
5456 return S_OK;
5459 struct inproc_marshal_uri {
5460 DWORD size;
5461 DWORD mshlflags;
5462 DWORD unk[4]; /* process identifier? */
5463 Uri *uri;
5466 static HRESULT WINAPI Marshal_GetMarshalSizeMax(IMarshal *iface, REFIID riid, void *pv,
5467 DWORD dwDestContext, void *pvDestContext, DWORD mshlflags, DWORD *pSize)
5469 Uri *This = impl_from_IMarshal(iface);
5470 ULARGE_INTEGER size;
5471 HRESULT hres;
5472 TRACE("(%p)->(%s %p %x %p %x %p)\n", This, debugstr_guid(riid), pv,
5473 dwDestContext, pvDestContext, mshlflags, pSize);
5475 if(!pSize || (dwDestContext!=MSHCTX_LOCAL && dwDestContext!=MSHCTX_NOSHAREDMEM
5476 && dwDestContext!=MSHCTX_INPROC))
5477 return E_INVALIDARG;
5479 if(dwDestContext == MSHCTX_INPROC) {
5480 *pSize = sizeof(struct inproc_marshal_uri);
5481 return S_OK;
5484 hres = IPersistStream_GetSizeMax(&This->IPersistStream_iface, &size);
5485 if(FAILED(hres))
5486 return hres;
5487 if(!This->path_len && (This->scheme_type==URL_SCHEME_HTTP
5488 || This->scheme_type==URL_SCHEME_HTTPS
5489 || This->scheme_type==URL_SCHEME_FTP))
5490 size.u.LowPart += 3*sizeof(DWORD);
5491 *pSize = size.u.LowPart+2*sizeof(DWORD);
5492 return S_OK;
5495 static HRESULT WINAPI Marshal_MarshalInterface(IMarshal *iface, IStream *pStm, REFIID riid,
5496 void *pv, DWORD dwDestContext, void *pvDestContext, DWORD mshlflags)
5498 Uri *This = impl_from_IMarshal(iface);
5499 DWORD *data;
5500 DWORD size;
5501 HRESULT hres;
5503 TRACE("(%p)->(%p %s %p %x %p %x)\n", This, pStm, debugstr_guid(riid), pv,
5504 dwDestContext, pvDestContext, mshlflags);
5506 if(!pStm || mshlflags!=MSHLFLAGS_NORMAL || (dwDestContext!=MSHCTX_LOCAL
5507 && dwDestContext!=MSHCTX_NOSHAREDMEM && dwDestContext!=MSHCTX_INPROC))
5508 return E_INVALIDARG;
5510 if(dwDestContext == MSHCTX_INPROC) {
5511 struct inproc_marshal_uri data;
5513 data.size = sizeof(data);
5514 data.mshlflags = MSHCTX_INPROC;
5515 data.unk[0] = 0;
5516 data.unk[1] = 0;
5517 data.unk[2] = 0;
5518 data.unk[3] = 0;
5519 data.uri = This;
5521 hres = IStream_Write(pStm, &data, data.size, NULL);
5522 if(FAILED(hres))
5523 return hres;
5525 IUri_AddRef(&This->IUri_iface);
5526 return S_OK;
5529 hres = IMarshal_GetMarshalSizeMax(iface, riid, pv, dwDestContext,
5530 pvDestContext, mshlflags, &size);
5531 if(FAILED(hres))
5532 return hres;
5534 data = heap_alloc_zero(size);
5535 if(!data)
5536 return E_OUTOFMEMORY;
5538 data[0] = size;
5539 data[1] = dwDestContext;
5540 data[2] = size-2*sizeof(DWORD);
5541 persist_stream_save(This, pStm, TRUE, (struct persist_uri*)(data+2));
5543 hres = IStream_Write(pStm, data, data[0]-2, NULL);
5544 heap_free(data);
5545 return hres;
5548 static HRESULT WINAPI Marshal_UnmarshalInterface(IMarshal *iface,
5549 IStream *pStm, REFIID riid, void **ppv)
5551 Uri *This = impl_from_IMarshal(iface);
5552 DWORD header[2];
5553 HRESULT hres;
5555 TRACE("(%p)->(%p %s %p)\n", This, pStm, debugstr_guid(riid), ppv);
5557 if(This->create_flags)
5558 return E_UNEXPECTED;
5559 if(!pStm || !riid || !ppv)
5560 return E_INVALIDARG;
5562 hres = IStream_Read(pStm, header, sizeof(header), NULL);
5563 if(FAILED(hres))
5564 return hres;
5566 if(header[1]!=MSHCTX_LOCAL && header[1]!=MSHCTX_NOSHAREDMEM
5567 && header[1]!=MSHCTX_INPROC)
5568 return E_UNEXPECTED;
5570 if(header[1] == MSHCTX_INPROC) {
5571 struct inproc_marshal_uri data;
5572 parse_data parse;
5574 hres = IStream_Read(pStm, data.unk, sizeof(data)-2*sizeof(DWORD), NULL);
5575 if(FAILED(hres))
5576 return hres;
5578 This->raw_uri = SysAllocString(data.uri->raw_uri);
5579 if(!This->raw_uri) {
5580 return E_OUTOFMEMORY;
5583 memset(&parse, 0, sizeof(parse_data));
5584 parse.uri = This->raw_uri;
5586 if(!parse_uri(&parse, data.uri->create_flags))
5587 return E_INVALIDARG;
5589 hres = canonicalize_uri(&parse, This, data.uri->create_flags);
5590 if(FAILED(hres))
5591 return hres;
5593 This->create_flags = data.uri->create_flags;
5594 IUri_Release(&data.uri->IUri_iface);
5596 return IUri_QueryInterface(&This->IUri_iface, riid, ppv);
5599 hres = IPersistStream_Load(&This->IPersistStream_iface, pStm);
5600 if(FAILED(hres))
5601 return hres;
5603 return IUri_QueryInterface(&This->IUri_iface, riid, ppv);
5606 static HRESULT WINAPI Marshal_ReleaseMarshalData(IMarshal *iface, IStream *pStm)
5608 Uri *This = impl_from_IMarshal(iface);
5609 LARGE_INTEGER off;
5610 DWORD header[2];
5611 HRESULT hres;
5613 TRACE("(%p)->(%p)\n", This, pStm);
5615 if(!pStm)
5616 return E_INVALIDARG;
5618 hres = IStream_Read(pStm, header, 2*sizeof(DWORD), NULL);
5619 if(FAILED(hres))
5620 return hres;
5622 if(header[1] == MSHCTX_INPROC) {
5623 struct inproc_marshal_uri data;
5625 hres = IStream_Read(pStm, data.unk, sizeof(data)-2*sizeof(DWORD), NULL);
5626 if(FAILED(hres))
5627 return hres;
5629 IUri_Release(&data.uri->IUri_iface);
5630 return S_OK;
5633 off.u.LowPart = header[0]-sizeof(header)-2;
5634 off.u.HighPart = 0;
5635 return IStream_Seek(pStm, off, STREAM_SEEK_CUR, NULL);
5638 static HRESULT WINAPI Marshal_DisconnectObject(IMarshal *iface, DWORD dwReserved)
5640 Uri *This = impl_from_IMarshal(iface);
5641 TRACE("(%p)->(%x)\n", This, dwReserved);
5642 return S_OK;
5645 static const IMarshalVtbl MarshalVtbl = {
5646 Marshal_QueryInterface,
5647 Marshal_AddRef,
5648 Marshal_Release,
5649 Marshal_GetUnmarshalClass,
5650 Marshal_GetMarshalSizeMax,
5651 Marshal_MarshalInterface,
5652 Marshal_UnmarshalInterface,
5653 Marshal_ReleaseMarshalData,
5654 Marshal_DisconnectObject
5657 HRESULT Uri_Construct(IUnknown *pUnkOuter, LPVOID *ppobj)
5659 Uri *ret = heap_alloc_zero(sizeof(Uri));
5661 TRACE("(%p %p)\n", pUnkOuter, ppobj);
5663 *ppobj = ret;
5664 if(!ret)
5665 return E_OUTOFMEMORY;
5667 ret->IUri_iface.lpVtbl = &UriVtbl;
5668 ret->IUriBuilderFactory_iface.lpVtbl = &UriBuilderFactoryVtbl;
5669 ret->IPersistStream_iface.lpVtbl = &PersistStreamVtbl;
5670 ret->IMarshal_iface.lpVtbl = &MarshalVtbl;
5671 ret->ref = 1;
5673 *ppobj = &ret->IUri_iface;
5674 return S_OK;
5677 /***********************************************************************
5678 * CreateUri (urlmon.@)
5680 * Creates a new IUri object using the URI represented by pwzURI. This function
5681 * parses and validates the components of pwzURI and then canonicalizes the
5682 * parsed components.
5684 * PARAMS
5685 * pwzURI [I] The URI to parse, validate, and canonicalize.
5686 * dwFlags [I] Flags which can affect how the parsing/canonicalization is performed.
5687 * dwReserved [I] Reserved (not used).
5688 * ppURI [O] The resulting IUri after parsing/canonicalization occurs.
5690 * RETURNS
5691 * Success: Returns S_OK. ppURI contains the pointer to the newly allocated IUri.
5692 * Failure: E_INVALIDARG if there are invalid flag combinations in dwFlags, or an
5693 * invalid parameter, or pwzURI doesn't represent a valid URI.
5694 * E_OUTOFMEMORY if any memory allocation fails.
5696 * NOTES
5697 * Default flags:
5698 * Uri_CREATE_CANONICALIZE, Uri_CREATE_DECODE_EXTRA_INFO, Uri_CREATE_CRACK_UNKNOWN_SCHEMES,
5699 * Uri_CREATE_PRE_PROCESS_HTML_URI, Uri_CREATE_NO_IE_SETTINGS.
5701 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI)
5703 const DWORD supported_flags = Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME|
5704 Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME|Uri_CREATE_NO_CANONICALIZE|Uri_CREATE_CANONICALIZE|
5705 Uri_CREATE_DECODE_EXTRA_INFO|Uri_CREATE_NO_DECODE_EXTRA_INFO|Uri_CREATE_CRACK_UNKNOWN_SCHEMES|
5706 Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES|Uri_CREATE_PRE_PROCESS_HTML_URI|Uri_CREATE_NO_PRE_PROCESS_HTML_URI|
5707 Uri_CREATE_NO_IE_SETTINGS|Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS|Uri_CREATE_FILE_USE_DOS_PATH;
5708 Uri *ret;
5709 HRESULT hr;
5710 parse_data data;
5712 TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI);
5714 if(!ppURI)
5715 return E_INVALIDARG;
5717 if(!pwzURI) {
5718 *ppURI = NULL;
5719 return E_INVALIDARG;
5722 /* Check for invalid flags. */
5723 if(has_invalid_flag_combination(dwFlags)) {
5724 *ppURI = NULL;
5725 return E_INVALIDARG;
5728 /* Currently unsupported. */
5729 if(dwFlags & ~supported_flags)
5730 FIXME("Ignoring unsupported flag(s) %x\n", dwFlags & ~supported_flags);
5732 hr = Uri_Construct(NULL, (void**)&ret);
5733 if(FAILED(hr)) {
5734 *ppURI = NULL;
5735 return hr;
5738 /* Explicitly set the default flags if it doesn't cause a flag conflict. */
5739 apply_default_flags(&dwFlags);
5741 /* Pre process the URI, unless told otherwise. */
5742 if(!(dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI))
5743 ret->raw_uri = pre_process_uri(pwzURI);
5744 else
5745 ret->raw_uri = SysAllocString(pwzURI);
5747 if(!ret->raw_uri) {
5748 heap_free(ret);
5749 return E_OUTOFMEMORY;
5752 memset(&data, 0, sizeof(parse_data));
5753 data.uri = ret->raw_uri;
5755 /* Validate and parse the URI into its components. */
5756 if(!parse_uri(&data, dwFlags)) {
5757 /* Encountered an unsupported or invalid URI */
5758 IUri_Release(&ret->IUri_iface);
5759 *ppURI = NULL;
5760 return E_INVALIDARG;
5763 /* Canonicalize the URI. */
5764 hr = canonicalize_uri(&data, ret, dwFlags);
5765 if(FAILED(hr)) {
5766 IUri_Release(&ret->IUri_iface);
5767 *ppURI = NULL;
5768 return hr;
5771 ret->create_flags = dwFlags;
5773 *ppURI = &ret->IUri_iface;
5774 return S_OK;
5777 /***********************************************************************
5778 * CreateUriWithFragment (urlmon.@)
5780 * Creates a new IUri object. This is almost the same as CreateUri, expect that
5781 * it allows you to explicitly specify a fragment (pwzFragment) for pwzURI.
5783 * PARAMS
5784 * pwzURI [I] The URI to parse and perform canonicalization on.
5785 * pwzFragment [I] The explicit fragment string which should be added to pwzURI.
5786 * dwFlags [I] The flags which will be passed to CreateUri.
5787 * dwReserved [I] Reserved (not used).
5788 * ppURI [O] The resulting IUri after parsing/canonicalization.
5790 * RETURNS
5791 * Success: S_OK. ppURI contains the pointer to the newly allocated IUri.
5792 * Failure: E_INVALIDARG if pwzURI already contains a fragment and pwzFragment
5793 * isn't NULL. Will also return E_INVALIDARG for the same reasons as
5794 * CreateUri will. E_OUTOFMEMORY if any allocation fails.
5796 HRESULT WINAPI CreateUriWithFragment(LPCWSTR pwzURI, LPCWSTR pwzFragment, DWORD dwFlags,
5797 DWORD_PTR dwReserved, IUri **ppURI)
5799 HRESULT hres;
5800 TRACE("(%s %s %x %x %p)\n", debugstr_w(pwzURI), debugstr_w(pwzFragment), dwFlags, (DWORD)dwReserved, ppURI);
5802 if(!ppURI)
5803 return E_INVALIDARG;
5805 if(!pwzURI) {
5806 *ppURI = NULL;
5807 return E_INVALIDARG;
5810 /* Check if a fragment should be appended to the URI string. */
5811 if(pwzFragment) {
5812 WCHAR *uriW;
5813 DWORD uri_len, frag_len;
5814 BOOL add_pound;
5816 /* Check if the original URI already has a fragment component. */
5817 if(StrChrW(pwzURI, '#')) {
5818 *ppURI = NULL;
5819 return E_INVALIDARG;
5822 uri_len = lstrlenW(pwzURI);
5823 frag_len = lstrlenW(pwzFragment);
5825 /* If the fragment doesn't start with a '#', one will be added. */
5826 add_pound = *pwzFragment != '#';
5828 if(add_pound)
5829 uriW = heap_alloc((uri_len+frag_len+2)*sizeof(WCHAR));
5830 else
5831 uriW = heap_alloc((uri_len+frag_len+1)*sizeof(WCHAR));
5833 if(!uriW)
5834 return E_OUTOFMEMORY;
5836 memcpy(uriW, pwzURI, uri_len*sizeof(WCHAR));
5837 if(add_pound)
5838 uriW[uri_len++] = '#';
5839 memcpy(uriW+uri_len, pwzFragment, (frag_len+1)*sizeof(WCHAR));
5841 hres = CreateUri(uriW, dwFlags, 0, ppURI);
5843 heap_free(uriW);
5844 } else
5845 /* A fragment string wasn't specified, so just forward the call. */
5846 hres = CreateUri(pwzURI, dwFlags, 0, ppURI);
5848 return hres;
5851 static HRESULT build_uri(const UriBuilder *builder, IUri **uri, DWORD create_flags,
5852 DWORD use_orig_flags, DWORD encoding_mask)
5854 HRESULT hr;
5855 parse_data data;
5856 Uri *ret;
5858 if(!uri)
5859 return E_POINTER;
5861 if(encoding_mask && (!builder->uri || builder->modified_props)) {
5862 *uri = NULL;
5863 return E_NOTIMPL;
5866 /* Decide what flags should be used when creating the Uri. */
5867 if((use_orig_flags & UriBuilder_USE_ORIGINAL_FLAGS) && builder->uri)
5868 create_flags = builder->uri->create_flags;
5869 else {
5870 if(has_invalid_flag_combination(create_flags)) {
5871 *uri = NULL;
5872 return E_INVALIDARG;
5875 /* Set the default flags if they don't cause a conflict. */
5876 apply_default_flags(&create_flags);
5879 /* Return the base IUri if no changes have been made and the create_flags match. */
5880 if(builder->uri && !builder->modified_props && builder->uri->create_flags == create_flags) {
5881 *uri = &builder->uri->IUri_iface;
5882 IUri_AddRef(*uri);
5883 return S_OK;
5886 hr = validate_components(builder, &data, create_flags);
5887 if(FAILED(hr)) {
5888 *uri = NULL;
5889 return hr;
5892 hr = Uri_Construct(NULL, (void**)&ret);
5893 if(FAILED(hr)) {
5894 *uri = NULL;
5895 return hr;
5898 hr = generate_uri(builder, &data, ret, create_flags);
5899 if(FAILED(hr)) {
5900 IUri_Release(&ret->IUri_iface);
5901 *uri = NULL;
5902 return hr;
5905 *uri = &ret->IUri_iface;
5906 return S_OK;
5909 static inline UriBuilder* impl_from_IUriBuilder(IUriBuilder *iface)
5911 return CONTAINING_RECORD(iface, UriBuilder, IUriBuilder_iface);
5914 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv)
5916 UriBuilder *This = impl_from_IUriBuilder(iface);
5918 if(IsEqualGUID(&IID_IUnknown, riid)) {
5919 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
5920 *ppv = &This->IUriBuilder_iface;
5921 }else if(IsEqualGUID(&IID_IUriBuilder, riid)) {
5922 TRACE("(%p)->(IID_IUriBuilder %p)\n", This, ppv);
5923 *ppv = &This->IUriBuilder_iface;
5924 }else {
5925 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
5926 *ppv = NULL;
5927 return E_NOINTERFACE;
5930 IUnknown_AddRef((IUnknown*)*ppv);
5931 return S_OK;
5934 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface)
5936 UriBuilder *This = impl_from_IUriBuilder(iface);
5937 LONG ref = InterlockedIncrement(&This->ref);
5939 TRACE("(%p) ref=%d\n", This, ref);
5941 return ref;
5944 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface)
5946 UriBuilder *This = impl_from_IUriBuilder(iface);
5947 LONG ref = InterlockedDecrement(&This->ref);
5949 TRACE("(%p) ref=%d\n", This, ref);
5951 if(!ref) {
5952 if(This->uri) IUri_Release(&This->uri->IUri_iface);
5953 heap_free(This->fragment);
5954 heap_free(This->host);
5955 heap_free(This->password);
5956 heap_free(This->path);
5957 heap_free(This->query);
5958 heap_free(This->scheme);
5959 heap_free(This->username);
5960 heap_free(This);
5963 return ref;
5966 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface,
5967 DWORD dwAllowEncodingPropertyMask,
5968 DWORD_PTR dwReserved,
5969 IUri **ppIUri)
5971 UriBuilder *This = impl_from_IUriBuilder(iface);
5972 HRESULT hr;
5973 TRACE("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
5975 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask);
5976 if(hr == E_NOTIMPL)
5977 FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
5978 return hr;
5981 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface,
5982 DWORD dwCreateFlags,
5983 DWORD dwAllowEncodingPropertyMask,
5984 DWORD_PTR dwReserved,
5985 IUri **ppIUri)
5987 UriBuilder *This = impl_from_IUriBuilder(iface);
5988 HRESULT hr;
5989 TRACE("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
5991 if(dwCreateFlags == -1)
5992 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask);
5993 else
5994 hr = build_uri(This, ppIUri, dwCreateFlags, 0, dwAllowEncodingPropertyMask);
5996 if(hr == E_NOTIMPL)
5997 FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
5998 return hr;
6001 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface,
6002 DWORD dwCreateFlags,
6003 DWORD dwUriBuilderFlags,
6004 DWORD dwAllowEncodingPropertyMask,
6005 DWORD_PTR dwReserved,
6006 IUri **ppIUri)
6008 UriBuilder *This = impl_from_IUriBuilder(iface);
6009 HRESULT hr;
6010 TRACE("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
6011 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
6013 hr = build_uri(This, ppIUri, dwCreateFlags, dwUriBuilderFlags, dwAllowEncodingPropertyMask);
6014 if(hr == E_NOTIMPL)
6015 FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
6016 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
6017 return hr;
6020 static HRESULT WINAPI UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri)
6022 UriBuilder *This = impl_from_IUriBuilder(iface);
6023 TRACE("(%p)->(%p)\n", This, ppIUri);
6025 if(!ppIUri)
6026 return E_POINTER;
6028 if(This->uri) {
6029 IUri *uri = &This->uri->IUri_iface;
6030 IUri_AddRef(uri);
6031 *ppIUri = uri;
6032 } else
6033 *ppIUri = NULL;
6035 return S_OK;
6038 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri)
6040 UriBuilder *This = impl_from_IUriBuilder(iface);
6041 TRACE("(%p)->(%p)\n", This, pIUri);
6043 if(pIUri) {
6044 Uri *uri;
6046 if((uri = get_uri_obj(pIUri))) {
6047 /* Only reset the builder if its Uri isn't the same as
6048 * the Uri passed to the function.
6050 if(This->uri != uri) {
6051 reset_builder(This);
6053 This->uri = uri;
6054 if(uri->has_port)
6055 This->port = uri->port;
6057 IUri_AddRef(pIUri);
6059 } else {
6060 FIXME("(%p)->(%p) Unknown IUri types not supported yet.\n", This, pIUri);
6061 return E_NOTIMPL;
6063 } else if(This->uri)
6064 /* Only reset the builder if its Uri isn't NULL. */
6065 reset_builder(This);
6067 return S_OK;
6070 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment)
6072 UriBuilder *This = impl_from_IUriBuilder(iface);
6073 TRACE("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment);
6075 if(!This->uri || This->uri->fragment_start == -1 || This->modified_props & Uri_HAS_FRAGMENT)
6076 return get_builder_component(&This->fragment, &This->fragment_len, NULL, 0, ppwzFragment, pcchFragment);
6077 else
6078 return get_builder_component(&This->fragment, &This->fragment_len, This->uri->canon_uri+This->uri->fragment_start,
6079 This->uri->fragment_len, ppwzFragment, pcchFragment);
6082 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost)
6084 UriBuilder *This = impl_from_IUriBuilder(iface);
6085 TRACE("(%p)->(%p %p)\n", This, pcchHost, ppwzHost);
6087 if(!This->uri || This->uri->host_start == -1 || This->modified_props & Uri_HAS_HOST)
6088 return get_builder_component(&This->host, &This->host_len, NULL, 0, ppwzHost, pcchHost);
6089 else {
6090 if(This->uri->host_type == Uri_HOST_IPV6)
6091 /* Don't include the '[' and ']' around the address. */
6092 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start+1,
6093 This->uri->host_len-2, ppwzHost, pcchHost);
6094 else
6095 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start,
6096 This->uri->host_len, ppwzHost, pcchHost);
6100 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword)
6102 UriBuilder *This = impl_from_IUriBuilder(iface);
6103 TRACE("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword);
6105 if(!This->uri || This->uri->userinfo_split == -1 || This->modified_props & Uri_HAS_PASSWORD)
6106 return get_builder_component(&This->password, &This->password_len, NULL, 0, ppwzPassword, pcchPassword);
6107 else {
6108 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start+This->uri->userinfo_split+1;
6109 DWORD len = This->uri->userinfo_len-This->uri->userinfo_split-1;
6110 return get_builder_component(&This->password, &This->password_len, start, len, ppwzPassword, pcchPassword);
6114 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath)
6116 UriBuilder *This = impl_from_IUriBuilder(iface);
6117 TRACE("(%p)->(%p %p)\n", This, pcchPath, ppwzPath);
6119 if(!This->uri || This->uri->path_start == -1 || This->modified_props & Uri_HAS_PATH)
6120 return get_builder_component(&This->path, &This->path_len, NULL, 0, ppwzPath, pcchPath);
6121 else
6122 return get_builder_component(&This->path, &This->path_len, This->uri->canon_uri+This->uri->path_start,
6123 This->uri->path_len, ppwzPath, pcchPath);
6126 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort)
6128 UriBuilder *This = impl_from_IUriBuilder(iface);
6129 TRACE("(%p)->(%p %p)\n", This, pfHasPort, pdwPort);
6131 if(!pfHasPort) {
6132 if(pdwPort)
6133 *pdwPort = 0;
6134 return E_POINTER;
6137 if(!pdwPort) {
6138 *pfHasPort = FALSE;
6139 return E_POINTER;
6142 *pfHasPort = This->has_port;
6143 *pdwPort = This->port;
6144 return S_OK;
6147 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery)
6149 UriBuilder *This = impl_from_IUriBuilder(iface);
6150 TRACE("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery);
6152 if(!This->uri || This->uri->query_start == -1 || This->modified_props & Uri_HAS_QUERY)
6153 return get_builder_component(&This->query, &This->query_len, NULL, 0, ppwzQuery, pcchQuery);
6154 else
6155 return get_builder_component(&This->query, &This->query_len, This->uri->canon_uri+This->uri->query_start,
6156 This->uri->query_len, ppwzQuery, pcchQuery);
6159 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName)
6161 UriBuilder *This = impl_from_IUriBuilder(iface);
6162 TRACE("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName);
6164 if(!This->uri || This->uri->scheme_start == -1 || This->modified_props & Uri_HAS_SCHEME_NAME)
6165 return get_builder_component(&This->scheme, &This->scheme_len, NULL, 0, ppwzSchemeName, pcchSchemeName);
6166 else
6167 return get_builder_component(&This->scheme, &This->scheme_len, This->uri->canon_uri+This->uri->scheme_start,
6168 This->uri->scheme_len, ppwzSchemeName, pcchSchemeName);
6171 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName)
6173 UriBuilder *This = impl_from_IUriBuilder(iface);
6174 TRACE("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName);
6176 if(!This->uri || This->uri->userinfo_start == -1 || This->uri->userinfo_split == 0 ||
6177 This->modified_props & Uri_HAS_USER_NAME)
6178 return get_builder_component(&This->username, &This->username_len, NULL, 0, ppwzUserName, pcchUserName);
6179 else {
6180 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start;
6182 /* Check if there's a password in the userinfo section. */
6183 if(This->uri->userinfo_split > -1)
6184 /* Don't include the password. */
6185 return get_builder_component(&This->username, &This->username_len, start,
6186 This->uri->userinfo_split, ppwzUserName, pcchUserName);
6187 else
6188 return get_builder_component(&This->username, &This->username_len, start,
6189 This->uri->userinfo_len, ppwzUserName, pcchUserName);
6193 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue)
6195 UriBuilder *This = impl_from_IUriBuilder(iface);
6196 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
6197 return set_builder_component(&This->fragment, &This->fragment_len, pwzNewValue, '#',
6198 &This->modified_props, Uri_HAS_FRAGMENT);
6201 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue)
6203 UriBuilder *This = impl_from_IUriBuilder(iface);
6204 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
6206 /* Host name can't be set to NULL. */
6207 if(!pwzNewValue)
6208 return E_INVALIDARG;
6210 return set_builder_component(&This->host, &This->host_len, pwzNewValue, 0,
6211 &This->modified_props, Uri_HAS_HOST);
6214 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue)
6216 UriBuilder *This = impl_from_IUriBuilder(iface);
6217 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
6218 return set_builder_component(&This->password, &This->password_len, pwzNewValue, 0,
6219 &This->modified_props, Uri_HAS_PASSWORD);
6222 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue)
6224 UriBuilder *This = impl_from_IUriBuilder(iface);
6225 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
6226 return set_builder_component(&This->path, &This->path_len, pwzNewValue, 0,
6227 &This->modified_props, Uri_HAS_PATH);
6230 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue)
6232 UriBuilder *This = impl_from_IUriBuilder(iface);
6233 TRACE("(%p)->(%d %d)\n", This, fHasPort, dwNewValue);
6235 This->has_port = fHasPort;
6236 This->port = dwNewValue;
6237 This->modified_props |= Uri_HAS_PORT;
6238 return S_OK;
6241 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue)
6243 UriBuilder *This = impl_from_IUriBuilder(iface);
6244 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
6245 return set_builder_component(&This->query, &This->query_len, pwzNewValue, '?',
6246 &This->modified_props, Uri_HAS_QUERY);
6249 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue)
6251 UriBuilder *This = impl_from_IUriBuilder(iface);
6252 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
6254 /* Only set the scheme name if it's not NULL or empty. */
6255 if(!pwzNewValue || !*pwzNewValue)
6256 return E_INVALIDARG;
6258 return set_builder_component(&This->scheme, &This->scheme_len, pwzNewValue, 0,
6259 &This->modified_props, Uri_HAS_SCHEME_NAME);
6262 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue)
6264 UriBuilder *This = impl_from_IUriBuilder(iface);
6265 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
6266 return set_builder_component(&This->username, &This->username_len, pwzNewValue, 0,
6267 &This->modified_props, Uri_HAS_USER_NAME);
6270 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask)
6272 const DWORD accepted_flags = Uri_HAS_AUTHORITY|Uri_HAS_DOMAIN|Uri_HAS_EXTENSION|Uri_HAS_FRAGMENT|Uri_HAS_HOST|
6273 Uri_HAS_PASSWORD|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY|
6274 Uri_HAS_USER_INFO|Uri_HAS_USER_NAME;
6276 UriBuilder *This = impl_from_IUriBuilder(iface);
6277 TRACE("(%p)->(0x%08x)\n", This, dwPropertyMask);
6279 if(dwPropertyMask & ~accepted_flags)
6280 return E_INVALIDARG;
6282 if(dwPropertyMask & Uri_HAS_FRAGMENT)
6283 UriBuilder_SetFragment(iface, NULL);
6285 /* Even though you can't set the host name to NULL or an
6286 * empty string, you can still remove it... for some reason.
6288 if(dwPropertyMask & Uri_HAS_HOST)
6289 set_builder_component(&This->host, &This->host_len, NULL, 0,
6290 &This->modified_props, Uri_HAS_HOST);
6292 if(dwPropertyMask & Uri_HAS_PASSWORD)
6293 UriBuilder_SetPassword(iface, NULL);
6295 if(dwPropertyMask & Uri_HAS_PATH)
6296 UriBuilder_SetPath(iface, NULL);
6298 if(dwPropertyMask & Uri_HAS_PORT)
6299 UriBuilder_SetPort(iface, FALSE, 0);
6301 if(dwPropertyMask & Uri_HAS_QUERY)
6302 UriBuilder_SetQuery(iface, NULL);
6304 if(dwPropertyMask & Uri_HAS_USER_NAME)
6305 UriBuilder_SetUserName(iface, NULL);
6307 return S_OK;
6310 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified)
6312 UriBuilder *This = impl_from_IUriBuilder(iface);
6313 TRACE("(%p)->(%p)\n", This, pfModified);
6315 if(!pfModified)
6316 return E_POINTER;
6318 *pfModified = This->modified_props > 0;
6319 return S_OK;
6322 static const IUriBuilderVtbl UriBuilderVtbl = {
6323 UriBuilder_QueryInterface,
6324 UriBuilder_AddRef,
6325 UriBuilder_Release,
6326 UriBuilder_CreateUriSimple,
6327 UriBuilder_CreateUri,
6328 UriBuilder_CreateUriWithFlags,
6329 UriBuilder_GetIUri,
6330 UriBuilder_SetIUri,
6331 UriBuilder_GetFragment,
6332 UriBuilder_GetHost,
6333 UriBuilder_GetPassword,
6334 UriBuilder_GetPath,
6335 UriBuilder_GetPort,
6336 UriBuilder_GetQuery,
6337 UriBuilder_GetSchemeName,
6338 UriBuilder_GetUserName,
6339 UriBuilder_SetFragment,
6340 UriBuilder_SetHost,
6341 UriBuilder_SetPassword,
6342 UriBuilder_SetPath,
6343 UriBuilder_SetPort,
6344 UriBuilder_SetQuery,
6345 UriBuilder_SetSchemeName,
6346 UriBuilder_SetUserName,
6347 UriBuilder_RemoveProperties,
6348 UriBuilder_HasBeenModified,
6351 /***********************************************************************
6352 * CreateIUriBuilder (urlmon.@)
6354 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder)
6356 UriBuilder *ret;
6358 TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder);
6360 if(!ppIUriBuilder)
6361 return E_POINTER;
6363 ret = heap_alloc_zero(sizeof(UriBuilder));
6364 if(!ret)
6365 return E_OUTOFMEMORY;
6367 ret->IUriBuilder_iface.lpVtbl = &UriBuilderVtbl;
6368 ret->ref = 1;
6370 if(pIUri) {
6371 Uri *uri;
6373 if((uri = get_uri_obj(pIUri))) {
6374 if(!uri->create_flags) {
6375 heap_free(ret);
6376 return E_UNEXPECTED;
6378 IUri_AddRef(pIUri);
6379 ret->uri = uri;
6381 if(uri->has_port)
6382 /* Windows doesn't set 'has_port' to TRUE in this case. */
6383 ret->port = uri->port;
6385 } else {
6386 heap_free(ret);
6387 *ppIUriBuilder = NULL;
6388 FIXME("(%p %x %x %p): Unknown IUri types not supported yet.\n", pIUri, dwFlags,
6389 (DWORD)dwReserved, ppIUriBuilder);
6390 return E_NOTIMPL;
6394 *ppIUriBuilder = &ret->IUriBuilder_iface;
6395 return S_OK;
6398 /* Merges the base path with the relative path and stores the resulting path
6399 * and path len in 'result' and 'result_len'.
6401 static HRESULT merge_paths(parse_data *data, const WCHAR *base, DWORD base_len, const WCHAR *relative,
6402 DWORD relative_len, WCHAR **result, DWORD *result_len, DWORD flags)
6404 const WCHAR *end = NULL;
6405 DWORD base_copy_len = 0;
6406 WCHAR *ptr;
6408 if(base_len) {
6409 if(data->scheme_type == URL_SCHEME_MK && *relative == '/') {
6410 /* Find '::' segment */
6411 for(end = base; end < base+base_len-1; end++) {
6412 if(end[0] == ':' && end[1] == ':') {
6413 end++;
6414 break;
6418 /* If not found, try finding the end of @xxx: */
6419 if(end == base+base_len-1)
6420 end = *base == '@' ? memchr(base, ':', base_len) : NULL;
6421 }else {
6422 /* Find the characters that will be copied over from the base path. */
6423 end = memrchrW(base, '/', base_len);
6424 if(!end && data->scheme_type == URL_SCHEME_FILE)
6425 /* Try looking for a '\\'. */
6426 end = memrchrW(base, '\\', base_len);
6430 if(end) {
6431 base_copy_len = (end+1)-base;
6432 *result = heap_alloc((base_copy_len+relative_len+1)*sizeof(WCHAR));
6433 } else
6434 *result = heap_alloc((relative_len+1)*sizeof(WCHAR));
6436 if(!(*result)) {
6437 *result_len = 0;
6438 return E_OUTOFMEMORY;
6441 ptr = *result;
6442 if(end) {
6443 memcpy(ptr, base, base_copy_len*sizeof(WCHAR));
6444 ptr += base_copy_len;
6447 memcpy(ptr, relative, relative_len*sizeof(WCHAR));
6448 ptr += relative_len;
6449 *ptr = '\0';
6451 *result_len = (ptr-*result);
6452 TRACE("ret %s\n", debugstr_wn(*result, *result_len));
6453 return S_OK;
6456 static HRESULT combine_uri(Uri *base, Uri *relative, DWORD flags, IUri **result, DWORD extras) {
6457 Uri *ret;
6458 HRESULT hr;
6459 parse_data data;
6460 Uri *proc_uri = base;
6461 DWORD create_flags = 0, len = 0;
6463 memset(&data, 0, sizeof(parse_data));
6465 /* Base case is when the relative Uri has a scheme name,
6466 * if it does, then 'result' will contain the same data
6467 * as the relative Uri.
6469 if(relative->scheme_start > -1) {
6470 data.uri = SysAllocString(relative->raw_uri);
6471 if(!data.uri) {
6472 *result = NULL;
6473 return E_OUTOFMEMORY;
6476 parse_uri(&data, Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME);
6478 hr = Uri_Construct(NULL, (void**)&ret);
6479 if(FAILED(hr)) {
6480 *result = NULL;
6481 return hr;
6484 if(extras & COMBINE_URI_FORCE_FLAG_USE) {
6485 if(flags & URL_DONT_SIMPLIFY)
6486 create_flags |= Uri_CREATE_NO_CANONICALIZE;
6487 if(flags & URL_DONT_UNESCAPE_EXTRA_INFO)
6488 create_flags |= Uri_CREATE_NO_DECODE_EXTRA_INFO;
6491 ret->raw_uri = data.uri;
6492 hr = canonicalize_uri(&data, ret, create_flags);
6493 if(FAILED(hr)) {
6494 IUri_Release(&ret->IUri_iface);
6495 *result = NULL;
6496 return hr;
6499 apply_default_flags(&create_flags);
6500 ret->create_flags = create_flags;
6502 *result = &ret->IUri_iface;
6503 } else {
6504 WCHAR *path = NULL;
6505 DWORD raw_flags = 0;
6507 if(base->scheme_start > -1) {
6508 data.scheme = base->canon_uri+base->scheme_start;
6509 data.scheme_len = base->scheme_len;
6510 data.scheme_type = base->scheme_type;
6511 } else {
6512 data.is_relative = TRUE;
6513 data.scheme_type = URL_SCHEME_UNKNOWN;
6514 create_flags |= Uri_CREATE_ALLOW_RELATIVE;
6517 if(relative->authority_start > -1)
6518 proc_uri = relative;
6520 if(proc_uri->authority_start > -1) {
6521 if(proc_uri->userinfo_start > -1 && proc_uri->userinfo_split != 0) {
6522 data.username = proc_uri->canon_uri+proc_uri->userinfo_start;
6523 data.username_len = (proc_uri->userinfo_split > -1) ? proc_uri->userinfo_split : proc_uri->userinfo_len;
6526 if(proc_uri->userinfo_split > -1) {
6527 data.password = proc_uri->canon_uri+proc_uri->userinfo_start+proc_uri->userinfo_split+1;
6528 data.password_len = proc_uri->userinfo_len-proc_uri->userinfo_split-1;
6531 if(proc_uri->host_start > -1) {
6532 data.host = proc_uri->canon_uri+proc_uri->host_start;
6533 data.host_len = proc_uri->host_len;
6534 data.host_type = proc_uri->host_type;
6537 if(proc_uri->has_port) {
6538 data.has_port = TRUE;
6539 data.port_value = proc_uri->port;
6541 } else if(base->scheme_type != URL_SCHEME_FILE)
6542 data.is_opaque = TRUE;
6544 if(proc_uri == relative || relative->path_start == -1 || !relative->path_len) {
6545 if(proc_uri->path_start > -1) {
6546 data.path = proc_uri->canon_uri+proc_uri->path_start;
6547 data.path_len = proc_uri->path_len;
6548 } else if(!data.is_opaque) {
6549 /* Just set the path as a '/' if the base didn't have
6550 * one and if it's a hierarchical URI.
6552 static const WCHAR slashW[] = {'/',0};
6553 data.path = slashW;
6554 data.path_len = 1;
6557 if(relative->query_start > -1)
6558 proc_uri = relative;
6560 if(proc_uri->query_start > -1) {
6561 data.query = proc_uri->canon_uri+proc_uri->query_start;
6562 data.query_len = proc_uri->query_len;
6564 } else {
6565 const WCHAR *ptr, **pptr;
6566 DWORD path_offset = 0, path_len = 0;
6568 /* There's two possibilities on what will happen to the path component
6569 * of the result IUri. First, if the relative path begins with a '/'
6570 * then the resulting path will just be the relative path. Second, if
6571 * relative path doesn't begin with a '/' then the base path and relative
6572 * path are merged together.
6574 if(relative->path_len && *(relative->canon_uri+relative->path_start) == '/' && data.scheme_type != URL_SCHEME_MK) {
6575 WCHAR *tmp = NULL;
6576 BOOL copy_drive_path = FALSE;
6578 /* If the relative IUri's path starts with a '/', then we
6579 * don't use the base IUri's path. Unless the base IUri
6580 * is a file URI, in which case it uses the drive path of
6581 * the base IUri (if it has any) in the new path.
6583 if(base->scheme_type == URL_SCHEME_FILE) {
6584 if(base->path_len > 3 && *(base->canon_uri+base->path_start) == '/' &&
6585 is_drive_path(base->canon_uri+base->path_start+1)) {
6586 path_len += 3;
6587 copy_drive_path = TRUE;
6591 path_len += relative->path_len;
6593 path = heap_alloc((path_len+1)*sizeof(WCHAR));
6594 if(!path) {
6595 *result = NULL;
6596 return E_OUTOFMEMORY;
6599 tmp = path;
6601 /* Copy the base paths, drive path over. */
6602 if(copy_drive_path) {
6603 memcpy(tmp, base->canon_uri+base->path_start, 3*sizeof(WCHAR));
6604 tmp += 3;
6607 memcpy(tmp, relative->canon_uri+relative->path_start, relative->path_len*sizeof(WCHAR));
6608 path[path_len] = '\0';
6609 } else {
6610 /* Merge the base path with the relative path. */
6611 hr = merge_paths(&data, base->canon_uri+base->path_start, base->path_len,
6612 relative->canon_uri+relative->path_start, relative->path_len,
6613 &path, &path_len, flags);
6614 if(FAILED(hr)) {
6615 *result = NULL;
6616 return hr;
6619 /* If the resulting IUri is a file URI, the drive path isn't
6620 * reduced out when the dot segments are removed.
6622 if(path_len >= 3 && data.scheme_type == URL_SCHEME_FILE && !data.host) {
6623 if(*path == '/' && is_drive_path(path+1))
6624 path_offset = 2;
6625 else if(is_drive_path(path))
6626 path_offset = 1;
6630 /* Check if the dot segments need to be removed from the path. */
6631 if(!(flags & URL_DONT_SIMPLIFY) && !data.is_opaque) {
6632 DWORD offset = (path_offset > 0) ? path_offset+1 : 0;
6633 DWORD new_len = remove_dot_segments(path+offset,path_len-offset);
6635 if(new_len != path_len) {
6636 WCHAR *tmp = heap_realloc(path, (offset+new_len+1)*sizeof(WCHAR));
6637 if(!tmp) {
6638 heap_free(path);
6639 *result = NULL;
6640 return E_OUTOFMEMORY;
6643 tmp[new_len+offset] = '\0';
6644 path = tmp;
6645 path_len = new_len+offset;
6649 if(relative->query_start > -1) {
6650 data.query = relative->canon_uri+relative->query_start;
6651 data.query_len = relative->query_len;
6654 /* Make sure the path component is valid. */
6655 ptr = path;
6656 pptr = &ptr;
6657 if((data.is_opaque && !parse_path_opaque(pptr, &data, 0)) ||
6658 (!data.is_opaque && !parse_path_hierarchical(pptr, &data, 0))) {
6659 heap_free(path);
6660 *result = NULL;
6661 return E_INVALIDARG;
6665 if(relative->fragment_start > -1) {
6666 data.fragment = relative->canon_uri+relative->fragment_start;
6667 data.fragment_len = relative->fragment_len;
6670 if(flags & URL_DONT_SIMPLIFY)
6671 raw_flags |= RAW_URI_FORCE_PORT_DISP;
6672 if(flags & URL_FILE_USE_PATHURL)
6673 raw_flags |= RAW_URI_CONVERT_TO_DOS_PATH;
6675 len = generate_raw_uri(&data, data.uri, raw_flags);
6676 data.uri = SysAllocStringLen(NULL, len);
6677 if(!data.uri) {
6678 heap_free(path);
6679 *result = NULL;
6680 return E_OUTOFMEMORY;
6683 generate_raw_uri(&data, data.uri, raw_flags);
6685 hr = Uri_Construct(NULL, (void**)&ret);
6686 if(FAILED(hr)) {
6687 SysFreeString(data.uri);
6688 heap_free(path);
6689 *result = NULL;
6690 return hr;
6693 if(flags & URL_DONT_SIMPLIFY)
6694 create_flags |= Uri_CREATE_NO_CANONICALIZE;
6695 if(flags & URL_FILE_USE_PATHURL)
6696 create_flags |= Uri_CREATE_FILE_USE_DOS_PATH;
6698 ret->raw_uri = data.uri;
6699 hr = canonicalize_uri(&data, ret, create_flags);
6700 if(FAILED(hr)) {
6701 IUri_Release(&ret->IUri_iface);
6702 *result = NULL;
6703 return hr;
6706 if(flags & URL_DONT_SIMPLIFY)
6707 ret->display_modifiers |= URI_DISPLAY_NO_DEFAULT_PORT_AUTH;
6709 apply_default_flags(&create_flags);
6710 ret->create_flags = create_flags;
6711 *result = &ret->IUri_iface;
6713 heap_free(path);
6716 return S_OK;
6719 /***********************************************************************
6720 * CoInternetCombineIUri (urlmon.@)
6722 HRESULT WINAPI CoInternetCombineIUri(IUri *pBaseUri, IUri *pRelativeUri, DWORD dwCombineFlags,
6723 IUri **ppCombinedUri, DWORD_PTR dwReserved)
6725 HRESULT hr;
6726 IInternetProtocolInfo *info;
6727 Uri *relative, *base;
6728 TRACE("(%p %p %x %p %x)\n", pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved);
6730 if(!ppCombinedUri)
6731 return E_INVALIDARG;
6733 if(!pBaseUri || !pRelativeUri) {
6734 *ppCombinedUri = NULL;
6735 return E_INVALIDARG;
6738 relative = get_uri_obj(pRelativeUri);
6739 base = get_uri_obj(pBaseUri);
6740 if(!relative || !base) {
6741 *ppCombinedUri = NULL;
6742 FIXME("(%p %p %x %p %x) Unknown IUri types not supported yet.\n",
6743 pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved);
6744 return E_NOTIMPL;
6747 info = get_protocol_info(base->canon_uri);
6748 if(info) {
6749 WCHAR result[INTERNET_MAX_URL_LENGTH+1];
6750 DWORD result_len = 0;
6752 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, relative->canon_uri, dwCombineFlags,
6753 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0);
6754 IInternetProtocolInfo_Release(info);
6755 if(SUCCEEDED(hr)) {
6756 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri);
6757 if(SUCCEEDED(hr))
6758 return hr;
6762 return combine_uri(base, relative, dwCombineFlags, ppCombinedUri, 0);
6765 /***********************************************************************
6766 * CoInternetCombineUrlEx (urlmon.@)
6768 HRESULT WINAPI CoInternetCombineUrlEx(IUri *pBaseUri, LPCWSTR pwzRelativeUrl, DWORD dwCombineFlags,
6769 IUri **ppCombinedUri, DWORD_PTR dwReserved)
6771 IUri *relative;
6772 Uri *base;
6773 HRESULT hr;
6774 IInternetProtocolInfo *info;
6776 TRACE("(%p %s %x %p %x)\n", pBaseUri, debugstr_w(pwzRelativeUrl), dwCombineFlags,
6777 ppCombinedUri, (DWORD)dwReserved);
6779 if(!ppCombinedUri)
6780 return E_POINTER;
6782 if(!pwzRelativeUrl) {
6783 *ppCombinedUri = NULL;
6784 return E_UNEXPECTED;
6787 if(!pBaseUri) {
6788 *ppCombinedUri = NULL;
6789 return E_INVALIDARG;
6792 base = get_uri_obj(pBaseUri);
6793 if(!base) {
6794 *ppCombinedUri = NULL;
6795 FIXME("(%p %s %x %p %x) Unknown IUri's not supported yet.\n", pBaseUri, debugstr_w(pwzRelativeUrl),
6796 dwCombineFlags, ppCombinedUri, (DWORD)dwReserved);
6797 return E_NOTIMPL;
6800 info = get_protocol_info(base->canon_uri);
6801 if(info) {
6802 WCHAR result[INTERNET_MAX_URL_LENGTH+1];
6803 DWORD result_len = 0;
6805 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, pwzRelativeUrl, dwCombineFlags,
6806 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0);
6807 IInternetProtocolInfo_Release(info);
6808 if(SUCCEEDED(hr)) {
6809 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri);
6810 if(SUCCEEDED(hr))
6811 return hr;
6815 hr = CreateUri(pwzRelativeUrl, Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME, 0, &relative);
6816 if(FAILED(hr)) {
6817 *ppCombinedUri = NULL;
6818 return hr;
6821 hr = combine_uri(base, get_uri_obj(relative), dwCombineFlags, ppCombinedUri, COMBINE_URI_FORCE_FLAG_USE);
6823 IUri_Release(relative);
6824 return hr;
6827 static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output,
6828 DWORD output_len, DWORD *result_len)
6830 const WCHAR *ptr = NULL;
6831 WCHAR *path = NULL;
6832 const WCHAR **pptr;
6833 DWORD len = 0;
6834 BOOL reduce_path;
6836 /* URL_UNESCAPE only has effect if none of the URL_ESCAPE flags are set. */
6837 const BOOL allow_unescape = !(flags & URL_ESCAPE_UNSAFE) &&
6838 !(flags & URL_ESCAPE_SPACES_ONLY) &&
6839 !(flags & URL_ESCAPE_PERCENT);
6842 /* Check if the dot segments need to be removed from the
6843 * path component.
6845 if(uri->scheme_start > -1 && uri->path_start > -1) {
6846 ptr = uri->canon_uri+uri->scheme_start+uri->scheme_len+1;
6847 pptr = &ptr;
6849 reduce_path = !(flags & URL_DONT_SIMPLIFY) &&
6850 ptr && check_hierarchical(pptr);
6852 for(ptr = uri->canon_uri; ptr < uri->canon_uri+uri->canon_len; ++ptr) {
6853 BOOL do_default_action = TRUE;
6855 /* Keep track of the path if we need to remove dot segments from
6856 * it later.
6858 if(reduce_path && !path && ptr == uri->canon_uri+uri->path_start)
6859 path = output+len;
6861 /* Check if it's time to reduce the path. */
6862 if(reduce_path && ptr == uri->canon_uri+uri->path_start+uri->path_len) {
6863 DWORD current_path_len = (output+len) - path;
6864 DWORD new_path_len = remove_dot_segments(path, current_path_len);
6866 /* Update the current length. */
6867 len -= (current_path_len-new_path_len);
6868 reduce_path = FALSE;
6871 if(*ptr == '%') {
6872 const WCHAR decoded = decode_pct_val(ptr);
6873 if(decoded) {
6874 if(allow_unescape && (flags & URL_UNESCAPE)) {
6875 if(len < output_len)
6876 output[len] = decoded;
6877 len++;
6878 ptr += 2;
6879 do_default_action = FALSE;
6883 /* See if %'s needed to encoded. */
6884 if(do_default_action && (flags & URL_ESCAPE_PERCENT)) {
6885 if(len + 3 < output_len)
6886 pct_encode_val(*ptr, output+len);
6887 len += 3;
6888 do_default_action = FALSE;
6890 } else if(*ptr == ' ') {
6891 if((flags & URL_ESCAPE_SPACES_ONLY) &&
6892 !(flags & URL_ESCAPE_UNSAFE)) {
6893 if(len + 3 < output_len)
6894 pct_encode_val(*ptr, output+len);
6895 len += 3;
6896 do_default_action = FALSE;
6898 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr)) {
6899 if(flags & URL_ESCAPE_UNSAFE) {
6900 if(len + 3 < output_len)
6901 pct_encode_val(*ptr, output+len);
6902 len += 3;
6903 do_default_action = FALSE;
6907 if(do_default_action) {
6908 if(len < output_len)
6909 output[len] = *ptr;
6910 len++;
6914 /* Sometimes the path is the very last component of the IUri, so
6915 * see if the dot segments need to be reduced now.
6917 if(reduce_path && path) {
6918 DWORD current_path_len = (output+len) - path;
6919 DWORD new_path_len = remove_dot_segments(path, current_path_len);
6921 /* Update the current length. */
6922 len -= (current_path_len-new_path_len);
6925 if(len < output_len)
6926 output[len] = 0;
6927 else
6928 output[output_len-1] = 0;
6930 /* The null terminator isn't included in the length. */
6931 *result_len = len;
6932 if(len >= output_len)
6933 return STRSAFE_E_INSUFFICIENT_BUFFER;
6935 return S_OK;
6938 static HRESULT parse_friendly(IUri *uri, LPWSTR output, DWORD output_len,
6939 DWORD *result_len)
6941 HRESULT hr;
6942 DWORD display_len;
6943 BSTR display;
6945 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DISPLAY_URI, &display_len, 0);
6946 if(FAILED(hr)) {
6947 *result_len = 0;
6948 return hr;
6951 *result_len = display_len;
6952 if(display_len+1 > output_len)
6953 return STRSAFE_E_INSUFFICIENT_BUFFER;
6955 hr = IUri_GetDisplayUri(uri, &display);
6956 if(FAILED(hr)) {
6957 *result_len = 0;
6958 return hr;
6961 memcpy(output, display, (display_len+1)*sizeof(WCHAR));
6962 SysFreeString(display);
6963 return S_OK;
6966 static HRESULT parse_rootdocument(const Uri *uri, LPWSTR output, DWORD output_len,
6967 DWORD *result_len)
6969 static const WCHAR colon_slashesW[] = {':','/','/'};
6971 WCHAR *ptr;
6972 DWORD len = 0;
6974 /* Windows only returns the root document if the URI has an authority
6975 * and it's not an unknown scheme type or a file scheme type.
6977 if(uri->authority_start == -1 ||
6978 uri->scheme_type == URL_SCHEME_UNKNOWN ||
6979 uri->scheme_type == URL_SCHEME_FILE) {
6980 *result_len = 0;
6981 if(!output_len)
6982 return STRSAFE_E_INSUFFICIENT_BUFFER;
6984 output[0] = 0;
6985 return S_OK;
6988 len = uri->scheme_len+uri->authority_len;
6989 /* For the "://" and '/' which will be added. */
6990 len += 4;
6992 if(len+1 > output_len) {
6993 *result_len = len;
6994 return STRSAFE_E_INSUFFICIENT_BUFFER;
6997 ptr = output;
6998 memcpy(ptr, uri->canon_uri+uri->scheme_start, uri->scheme_len*sizeof(WCHAR));
7000 /* Add the "://". */
7001 ptr += uri->scheme_len;
7002 memcpy(ptr, colon_slashesW, sizeof(colon_slashesW));
7004 /* Add the authority. */
7005 ptr += sizeof(colon_slashesW)/sizeof(WCHAR);
7006 memcpy(ptr, uri->canon_uri+uri->authority_start, uri->authority_len*sizeof(WCHAR));
7008 /* Add the '/' after the authority. */
7009 ptr += uri->authority_len;
7010 *ptr = '/';
7011 ptr[1] = 0;
7013 *result_len = len;
7014 return S_OK;
7017 static HRESULT parse_document(const Uri *uri, LPWSTR output, DWORD output_len,
7018 DWORD *result_len)
7020 DWORD len = 0;
7022 /* It has to be a known scheme type, but, it can't be a file
7023 * scheme. It also has to hierarchical.
7025 if(uri->scheme_type == URL_SCHEME_UNKNOWN ||
7026 uri->scheme_type == URL_SCHEME_FILE ||
7027 uri->authority_start == -1) {
7028 *result_len = 0;
7029 if(output_len < 1)
7030 return STRSAFE_E_INSUFFICIENT_BUFFER;
7032 output[0] = 0;
7033 return S_OK;
7036 if(uri->fragment_start > -1)
7037 len = uri->fragment_start;
7038 else
7039 len = uri->canon_len;
7041 *result_len = len;
7042 if(len+1 > output_len)
7043 return STRSAFE_E_INSUFFICIENT_BUFFER;
7045 memcpy(output, uri->canon_uri, len*sizeof(WCHAR));
7046 output[len] = 0;
7047 return S_OK;
7050 static HRESULT parse_path_from_url(const Uri *uri, LPWSTR output, DWORD output_len,
7051 DWORD *result_len)
7053 const WCHAR *path_ptr;
7054 WCHAR buffer[INTERNET_MAX_URL_LENGTH+1];
7055 WCHAR *ptr;
7057 if(uri->scheme_type != URL_SCHEME_FILE) {
7058 *result_len = 0;
7059 if(output_len > 0)
7060 output[0] = 0;
7061 return E_INVALIDARG;
7064 ptr = buffer;
7065 if(uri->host_start > -1) {
7066 static const WCHAR slash_slashW[] = {'\\','\\'};
7068 memcpy(ptr, slash_slashW, sizeof(slash_slashW));
7069 ptr += sizeof(slash_slashW)/sizeof(WCHAR);
7070 memcpy(ptr, uri->canon_uri+uri->host_start, uri->host_len*sizeof(WCHAR));
7071 ptr += uri->host_len;
7074 path_ptr = uri->canon_uri+uri->path_start;
7075 if(uri->path_len > 3 && *path_ptr == '/' && is_drive_path(path_ptr+1))
7076 /* Skip past the '/' in front of the drive path. */
7077 ++path_ptr;
7079 for(; path_ptr < uri->canon_uri+uri->path_start+uri->path_len; ++path_ptr, ++ptr) {
7080 BOOL do_default_action = TRUE;
7082 if(*path_ptr == '%') {
7083 const WCHAR decoded = decode_pct_val(path_ptr);
7084 if(decoded) {
7085 *ptr = decoded;
7086 path_ptr += 2;
7087 do_default_action = FALSE;
7089 } else if(*path_ptr == '/') {
7090 *ptr = '\\';
7091 do_default_action = FALSE;
7094 if(do_default_action)
7095 *ptr = *path_ptr;
7098 *ptr = 0;
7100 *result_len = ptr-buffer;
7101 if(*result_len+1 > output_len)
7102 return STRSAFE_E_INSUFFICIENT_BUFFER;
7104 memcpy(output, buffer, (*result_len+1)*sizeof(WCHAR));
7105 return S_OK;
7108 static HRESULT parse_url_from_path(IUri *uri, LPWSTR output, DWORD output_len,
7109 DWORD *result_len)
7111 HRESULT hr;
7112 BSTR received;
7113 DWORD len = 0;
7115 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_ABSOLUTE_URI, &len, 0);
7116 if(FAILED(hr)) {
7117 *result_len = 0;
7118 return hr;
7121 *result_len = len;
7122 if(len+1 > output_len)
7123 return STRSAFE_E_INSUFFICIENT_BUFFER;
7125 hr = IUri_GetAbsoluteUri(uri, &received);
7126 if(FAILED(hr)) {
7127 *result_len = 0;
7128 return hr;
7131 memcpy(output, received, (len+1)*sizeof(WCHAR));
7132 SysFreeString(received);
7134 return S_OK;
7137 static HRESULT parse_schema(IUri *uri, LPWSTR output, DWORD output_len,
7138 DWORD *result_len)
7140 HRESULT hr;
7141 DWORD len;
7142 BSTR received;
7144 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_SCHEME_NAME, &len, 0);
7145 if(FAILED(hr)) {
7146 *result_len = 0;
7147 return hr;
7150 *result_len = len;
7151 if(len+1 > output_len)
7152 return STRSAFE_E_INSUFFICIENT_BUFFER;
7154 hr = IUri_GetSchemeName(uri, &received);
7155 if(FAILED(hr)) {
7156 *result_len = 0;
7157 return hr;
7160 memcpy(output, received, (len+1)*sizeof(WCHAR));
7161 SysFreeString(received);
7163 return S_OK;
7166 static HRESULT parse_site(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len)
7168 HRESULT hr;
7169 DWORD len;
7170 BSTR received;
7172 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_HOST, &len, 0);
7173 if(FAILED(hr)) {
7174 *result_len = 0;
7175 return hr;
7178 *result_len = len;
7179 if(len+1 > output_len)
7180 return STRSAFE_E_INSUFFICIENT_BUFFER;
7182 hr = IUri_GetHost(uri, &received);
7183 if(FAILED(hr)) {
7184 *result_len = 0;
7185 return hr;
7188 memcpy(output, received, (len+1)*sizeof(WCHAR));
7189 SysFreeString(received);
7191 return S_OK;
7194 static HRESULT parse_domain(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len)
7196 HRESULT hr;
7197 DWORD len;
7198 BSTR received;
7200 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DOMAIN, &len, 0);
7201 if(FAILED(hr)) {
7202 *result_len = 0;
7203 return hr;
7206 *result_len = len;
7207 if(len+1 > output_len)
7208 return STRSAFE_E_INSUFFICIENT_BUFFER;
7210 hr = IUri_GetDomain(uri, &received);
7211 if(FAILED(hr)) {
7212 *result_len = 0;
7213 return hr;
7216 memcpy(output, received, (len+1)*sizeof(WCHAR));
7217 SysFreeString(received);
7219 return S_OK;
7222 static HRESULT parse_anchor(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len)
7224 HRESULT hr;
7225 DWORD len;
7226 BSTR received;
7228 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_FRAGMENT, &len, 0);
7229 if(FAILED(hr)) {
7230 *result_len = 0;
7231 return hr;
7234 *result_len = len;
7235 if(len+1 > output_len)
7236 return STRSAFE_E_INSUFFICIENT_BUFFER;
7238 hr = IUri_GetFragment(uri, &received);
7239 if(FAILED(hr)) {
7240 *result_len = 0;
7241 return hr;
7244 memcpy(output, received, (len+1)*sizeof(WCHAR));
7245 SysFreeString(received);
7247 return S_OK;
7250 /***********************************************************************
7251 * CoInternetParseIUri (urlmon.@)
7253 HRESULT WINAPI CoInternetParseIUri(IUri *pIUri, PARSEACTION ParseAction, DWORD dwFlags,
7254 LPWSTR pwzResult, DWORD cchResult, DWORD *pcchResult,
7255 DWORD_PTR dwReserved)
7257 HRESULT hr;
7258 Uri *uri;
7259 IInternetProtocolInfo *info;
7261 TRACE("(%p %d %x %p %d %p %x)\n", pIUri, ParseAction, dwFlags, pwzResult,
7262 cchResult, pcchResult, (DWORD)dwReserved);
7264 if(!pcchResult)
7265 return E_POINTER;
7267 if(!pwzResult || !pIUri) {
7268 *pcchResult = 0;
7269 return E_INVALIDARG;
7272 if(!(uri = get_uri_obj(pIUri))) {
7273 *pcchResult = 0;
7274 FIXME("(%p %d %x %p %d %p %x) Unknown IUri's not supported for this action.\n",
7275 pIUri, ParseAction, dwFlags, pwzResult, cchResult, pcchResult, (DWORD)dwReserved);
7276 return E_NOTIMPL;
7279 info = get_protocol_info(uri->canon_uri);
7280 if(info) {
7281 hr = IInternetProtocolInfo_ParseUrl(info, uri->canon_uri, ParseAction, dwFlags,
7282 pwzResult, cchResult, pcchResult, 0);
7283 IInternetProtocolInfo_Release(info);
7284 if(SUCCEEDED(hr)) return hr;
7287 switch(ParseAction) {
7288 case PARSE_CANONICALIZE:
7289 hr = parse_canonicalize(uri, dwFlags, pwzResult, cchResult, pcchResult);
7290 break;
7291 case PARSE_FRIENDLY:
7292 hr = parse_friendly(pIUri, pwzResult, cchResult, pcchResult);
7293 break;
7294 case PARSE_ROOTDOCUMENT:
7295 hr = parse_rootdocument(uri, pwzResult, cchResult, pcchResult);
7296 break;
7297 case PARSE_DOCUMENT:
7298 hr = parse_document(uri, pwzResult, cchResult, pcchResult);
7299 break;
7300 case PARSE_PATH_FROM_URL:
7301 hr = parse_path_from_url(uri, pwzResult, cchResult, pcchResult);
7302 break;
7303 case PARSE_URL_FROM_PATH:
7304 hr = parse_url_from_path(pIUri, pwzResult, cchResult, pcchResult);
7305 break;
7306 case PARSE_SCHEMA:
7307 hr = parse_schema(pIUri, pwzResult, cchResult, pcchResult);
7308 break;
7309 case PARSE_SITE:
7310 hr = parse_site(pIUri, pwzResult, cchResult, pcchResult);
7311 break;
7312 case PARSE_DOMAIN:
7313 hr = parse_domain(pIUri, pwzResult, cchResult, pcchResult);
7314 break;
7315 case PARSE_LOCATION:
7316 case PARSE_ANCHOR:
7317 hr = parse_anchor(pIUri, pwzResult, cchResult, pcchResult);
7318 break;
7319 case PARSE_SECURITY_URL:
7320 case PARSE_MIME:
7321 case PARSE_SERVER:
7322 case PARSE_SECURITY_DOMAIN:
7323 *pcchResult = 0;
7324 hr = E_FAIL;
7325 break;
7326 default:
7327 *pcchResult = 0;
7328 hr = E_NOTIMPL;
7329 FIXME("(%p %d %x %p %d %p %x) Partial stub.\n", pIUri, ParseAction, dwFlags,
7330 pwzResult, cchResult, pcchResult, (DWORD)dwReserved);
7333 return hr;