From 36b1006c6b30a3fc5ca1ffe066b98c226cbb9ef4 Mon Sep 17 00:00:00 2001 From: Thomas Mullaly Date: Mon, 26 Jul 2010 20:30:21 -0400 Subject: [PATCH] urlmon: Implemented a parser for URI query strings. --- dlls/urlmon/tests/uri.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++- dlls/urlmon/uri.c | 47 +++++++++++++++++++++++++++++++++++- 2 files changed, 109 insertions(+), 2 deletions(-) diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c index be437515361..9a0e6ff9530 100644 --- a/dlls/urlmon/tests/uri.c +++ b/dlls/urlmon/tests/uri.c @@ -2861,6 +2861,66 @@ static const uri_properties uri_tests[] = { {URL_SCHEME_UNKNOWN,S_OK,FALSE}, {URLZONE_INVALID,E_NOTIMPL,FALSE} } + }, + /* Unknown scheme types can have invalid % encoded data in query string. */ + { "zip://www.winehq.org/tests/..?query=%xx&return=y", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"zip://www.winehq.org/?query=%xx&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"zip://www.winehq.org/?query=%xx&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=%xx&return=y",S_OK,TRUE}, + {"?query=%xx&return=y",S_OK,TRUE}, + {"zip://www.winehq.org/tests/..?query=%xx&return=y",S_OK,FALSE}, + {"zip",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_UNKNOWN,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } + }, + /* Known scheme types can have invalid % encoded data with the right flags. */ + { "http://www.winehq.org/tests/..?query=%xx&return=y", Uri_CREATE_NO_DECODE_EXTRA_INFO, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST| + Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_PORT|Uri_HAS_QUERY|Uri_HAS_RAW_URI| + Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"http://www.winehq.org/?query=%xx&return=y",S_OK,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"http://www.winehq.org/?query=%xx&return=y",S_OK,TRUE}, + {"winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"www.winehq.org",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/?query=%xx&return=y",S_OK,TRUE}, + {"?query=%xx&return=y",S_OK,TRUE}, + {"http://www.winehq.org/tests/..?query=%xx&return=y",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE}, + } } }; @@ -2913,7 +2973,9 @@ static const invalid_uri invalid_uri_tests[] = { {"gopher://www.google.com\\test",Uri_CREATE_NO_CANONICALIZE,FALSE}, /* Not allowed to have invalid % encoded data in opaque URI path. */ {"news:test%XX",0,FALSE}, - {"mailto:wine@winehq%G8.com",0,FALSE} + {"mailto:wine@winehq%G8.com",0,FALSE}, + /* Known scheme types can't have invalid % encoded data in query string. */ + {"http://google.com/?query=te%xx",0,FALSE} }; typedef struct _uri_equality { diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c index 49503317424..c63cffa900b 100644 --- a/dlls/urlmon/uri.c +++ b/dlls/urlmon/uri.c @@ -120,6 +120,9 @@ typedef struct { const WCHAR *path; DWORD path_len; + + const WCHAR *query; + DWORD query_len; } parse_data; static const CHAR hexDigits[] = "0123456789ABCDEF"; @@ -1738,6 +1741,45 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) { return TRUE; } +/* Attempts to parse the query string from the URI. + * + * NOTES: + * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded + * data is allowed appear in the query string. For unknown scheme types + * invalid percent encoded data is allowed to appear reguardless. + */ +static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) { + const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; + + if(**ptr != '?') { + TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags); + return TRUE; + } + + data->query = *ptr; + + ++(*ptr); + while(**ptr && **ptr != '#') { + if(**ptr == '%' && known_scheme && + !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { + if(!check_pct_encoded(ptr)) { + *ptr = data->query; + data->query = NULL; + return FALSE; + } else + continue; + } + + ++(*ptr); + } + + data->query_len = *ptr - data->query; + + TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags, + debugstr_wn(data->query, data->query_len), data->query_len); + return TRUE; +} + /* Parses and validates the components of the specified by data->uri * and stores the information it parses into 'data'. * @@ -1758,7 +1800,10 @@ static BOOL parse_uri(parse_data *data, DWORD flags) { if(!parse_hierpart(pptr, data, flags)) return FALSE; - /* TODO: Parse query and fragment (if the URI has one). */ + if(!parse_query(pptr, data, flags)) + return FALSE; + + /* TODO: Parse fragment (if the URI has one). */ TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags); return TRUE; -- 2.11.4.GIT