From fdfcfc6aa05ea03138ce298f85906448b2ee9fb9 Mon Sep 17 00:00:00 2001 From: Thomas Mullaly Date: Tue, 27 Jul 2010 20:03:47 -0400 Subject: [PATCH] urlmon: Implemented canonicalization of query strings. --- dlls/urlmon/tests/uri.c | 210 ++++++++++++++++++++++++++++++++++++++++++++++++ dlls/urlmon/uri.c | 74 +++++++++++++++++ 2 files changed, 284 insertions(+) diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c index 9a0e6ff9530..1cb8647b8d9 100644 --- a/dlls/urlmon/tests/uri.c +++ b/dlls/urlmon/tests/uri.c @@ -2921,6 +2921,216 @@ static const uri_properties uri_tests[] = { {URL_SCHEME_HTTP,S_OK,FALSE}, {URLZONE_INVALID,E_NOTIMPL,FALSE}, } + }, + /* Forbidden characters in query aren't percent encoded for known scheme types with this flag. */ + { "http://www.winehq.org/tests/..?query=<|>&return=y", Uri_CREATE_NO_DECODE_EXTRA_INFO, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_PORT|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"http://www.winehq.org/?query=<|>&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"http://www.winehq.org/?query=<|>&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=<|>&return=y",S_OK,TRUE}, + {"?query=<|>&return=y",S_OK,TRUE}, + {"http://www.winehq.org/tests/..?query=<|>&return=y",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } + }, + /* Forbidden characters in query aren't percent encoded for known scheme types with this flag. */ + { "http://www.winehq.org/tests/..?query=<|>&return=y", Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_PORT|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"http://www.winehq.org/?query=<|>&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"http://www.winehq.org/?query=<|>&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=<|>&return=y",S_OK,TRUE}, + {"?query=<|>&return=y",S_OK,TRUE}, + {"http://www.winehq.org/tests/..?query=<|>&return=y",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } + }, + /* Forbidden characters are encoded for known scheme types. */ + { "http://www.winehq.org/tests/..?query=<|>&return=y", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_PORT|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"http://www.winehq.org/?query=%3C%7C%3E&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"http://www.winehq.org/?query=%3C%7C%3E&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=%3C%7C%3E&return=y",S_OK,TRUE}, + {"?query=%3C%7C%3E&return=y",S_OK,TRUE}, + {"http://www.winehq.org/tests/..?query=<|>&return=y",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } + }, + /* Forbidden characters are not encoded for unknown scheme types. */ + { "zip://www.winehq.org/tests/..?query=<|>&return=y", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"zip://www.winehq.org/?query=<|>&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"zip://www.winehq.org/?query=<|>&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=<|>&return=y",S_OK,TRUE}, + {"?query=<|>&return=y",S_OK,TRUE}, + {"zip://www.winehq.org/tests/..?query=<|>&return=y",S_OK,FALSE}, + {"zip",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_UNKNOWN,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } + }, + /* Percent encoded, unreserved characters are decoded for known scheme types. */ + { "http://www.winehq.org/tests/..?query=%30%31&return=y", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_PORT|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"http://www.winehq.org/?query=01&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"http://www.winehq.org/?query=01&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=01&return=y",S_OK,TRUE}, + {"?query=01&return=y",S_OK,TRUE}, + {"http://www.winehq.org/tests/..?query=%30%31&return=y",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } + }, + /* Percent encoded, unreserved characters aren't decoded for unknown scheme types. */ + { "zip://www.winehq.org/tests/..?query=%30%31&return=y", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"zip://www.winehq.org/?query=%30%31&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"zip://www.winehq.org/?query=%30%31&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=%30%31&return=y",S_OK,TRUE}, + {"?query=%30%31&return=y",S_OK,TRUE}, + {"zip://www.winehq.org/tests/..?query=%30%31&return=y",S_OK,FALSE}, + {"zip",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_UNKNOWN,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } + }, + /* Percent encoded characters aren't decoded when NO_DECODE_EXTRA_INFO is set. */ + { "http://www.winehq.org/tests/..?query=%30%31&return=y", Uri_CREATE_NO_DECODE_EXTRA_INFO, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_PORT|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"http://www.winehq.org/?query=%30%31&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"http://www.winehq.org/?query=%30%31&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=%30%31&return=y",S_OK,TRUE}, + {"?query=%30%31&return=y",S_OK,TRUE}, + {"http://www.winehq.org/tests/..?query=%30%31&return=y",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } } }; diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c index c63cffa900b..0469afbdcb4 100644 --- a/dlls/urlmon/uri.c +++ b/dlls/urlmon/uri.c @@ -62,6 +62,9 @@ typedef struct { INT path_start; DWORD path_len; INT extension_offset; + + INT query_start; + DWORD query_len; } Uri; typedef struct { @@ -2643,6 +2646,66 @@ static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, return TRUE; } +/* Attempts to canonicalize the query string of the URI. + * + * Things that happen: + * 1) For known scheme types forbidden characters + * are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set + * or NO_ENCODE_FORBIDDEN_CHARACTERS is set. + * + * 2) For known scheme types, percent encoded, unreserved characters + * are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set. + */ +static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { + const WCHAR *ptr, *end; + const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; + + if(!data->query) { + uri->query_start = -1; + uri->query_len = 0; + return TRUE; + } + + uri->query_start = uri->canon_len; + + end = data->query+data->query_len; + for(ptr = data->query; ptr < end; ++ptr) { + if(*ptr == '%') { + if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { + WCHAR val = decode_pct_val(ptr); + if(is_unreserved(val)) { + if(!computeOnly) + uri->canon_uri[uri->canon_len] = val; + ++uri->canon_len; + + ptr += 2; + continue; + } + } + } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) { + if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && + !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { + if(!computeOnly) + pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); + uri->canon_len += 3; + continue; + } + } + + if(!computeOnly) + uri->canon_uri[uri->canon_len] = *ptr; + ++uri->canon_len; + } + + uri->query_len = uri->canon_len - uri->query_start; + + if(!computeOnly) + TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags, + computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len), + uri->query_len); + return TRUE; +} + /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */ static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { uri->scheme_start = -1; @@ -2705,6 +2768,11 @@ static int compute_canonicalized_length(const parse_data *data, DWORD flags) { return -1; } + if(!canonicalize_query(data, &uri, flags, TRUE)) { + ERR("(%p %x): Failed to compute query string length.\n", data, flags); + return -1; + } + TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len); return uri.canon_len; @@ -2752,6 +2820,12 @@ static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) { return E_INVALIDARG; } + if(!canonicalize_query(data, uri, flags, FALSE)) { + ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n", + data, uri, flags); + return E_INVALIDARG; + } + /* There's a possibility we didn't use all the space we allocated * earlier. */ -- 2.11.4.GIT