From: Stefan Becker Date: Wed, 19 Mar 2014 19:53:43 +0000 (+0200) Subject: Fix #237: HTML escaping not removed from URL X-Git-Tag: 1.18.1~10^2~3 X-Git-Url: https://repo.or.cz/w/siplcs.git/commitdiff_plain/934aadcedd9824697d26684b48a4057e6103d0be Fix #237: HTML escaping not removed from URL When you copy & paste (or drag & drop) an URL that includes an escapeable character, then Pidgin will escape the URL when creating the HREF attribute. I.e. http://test/?a=1&b=1 will result in the following HTML received by SIPE: http://test/?a=1&b=1 As we parse the HTML to reduce it to plain text, we need to un-escape the HREF attribute to get the correct URL text to send to the other side. NOTE: At the time of this writing Pidgin has a bug. If you enter by hand the following URL http://test/?a=1&b=1 SIPE will receive the following HTML http://test/?a=1&amp;b=1 i.e. the URL has not been correctly escaped. As SIPE will unescape the HREF attribute, we will send to the other side http://test/?a=1&b=1 --- diff --git a/src/core/sipmsg.c b/src/core/sipmsg.c index 778f61a0..316f8021 100644 --- a/src/core/sipmsg.c +++ b/src/core/sipmsg.c @@ -3,7 +3,7 @@ * * pidgin-sipe * - * Copyright (C) 2010-2013 SIPE Project + * Copyright (C) 2010-2014 SIPE Project * Copyright (C) 2008 Novell, Inc. * Copyright (C) 2005 Thomas Butter * @@ -387,7 +387,7 @@ void sipmsg_parse_p_asserted_identity(const gchar *header, gchar **sip_uri, } parts = g_strsplit(header, ",", 0); - + for (p = parts; *p; p++) { gchar *uri = sipmsg_find_part_of_header(*p, "<", ">", NULL); if (!uri) @@ -872,6 +872,29 @@ sipe_parse_html(const char *html, char **attributes, char **message) g_return_if_fail(attributes != NULL); g_return_if_fail(message != NULL); +#define _HTML_UNESCAPE \ + if (!g_ascii_strncasecmp(c, "<", 4)) { \ + msg[retcount++] = '<'; \ + c += 4; \ + } else if (!g_ascii_strncasecmp(c, ">", 4)) { \ + msg[retcount++] = '>'; \ + c += 4; \ + } else if (!g_ascii_strncasecmp(c, " ", 6)) { \ + msg[retcount++] = ' '; \ + c += 6; \ + } else if (!g_ascii_strncasecmp(c, """, 6)) { \ + msg[retcount++] = '"'; \ + c += 6; \ + } else if (!g_ascii_strncasecmp(c, "&", 5)) { \ + msg[retcount++] = '&'; \ + c += 5; \ + } else if (!g_ascii_strncasecmp(c, "'", 6)) { \ + msg[retcount++] = '\''; \ + c += 6; \ + } else { \ + msg[retcount++] = *c++; \ + } + len = strlen(html); msg = g_malloc0(len + 1); @@ -933,7 +956,10 @@ sipe_parse_html(const char *html, char **attributes, char **message) c += 7; while ((*c != '\0') && g_ascii_strncasecmp(c, "\">", 2)) - msg[retcount++] = *c++; + if (*c == '&') { + _HTML_UNESCAPE; + } else + msg[retcount++] = *c++; if (*c != '\0') c += 2; @@ -1046,38 +1072,7 @@ sipe_parse_html(const char *html, char **attributes, char **message) } else if (*c == '&') { - if (!g_ascii_strncasecmp(c, "<", 4)) - { - msg[retcount++] = '<'; - c += 4; - } - else if (!g_ascii_strncasecmp(c, ">", 4)) - { - msg[retcount++] = '>'; - c += 4; - } - else if (!g_ascii_strncasecmp(c, " ", 6)) - { - msg[retcount++] = ' '; - c += 6; - } - else if (!g_ascii_strncasecmp(c, """, 6)) - { - msg[retcount++] = '"'; - c += 6; - } - else if (!g_ascii_strncasecmp(c, "&", 5)) - { - msg[retcount++] = '&'; - c += 5; - } - else if (!g_ascii_strncasecmp(c, "'", 6)) - { - msg[retcount++] = '\''; - c += 6; - } - else - msg[retcount++] = *c++; + _HTML_UNESCAPE; } else msg[retcount++] = *c++; @@ -1092,6 +1087,8 @@ sipe_parse_html(const char *html, char **attributes, char **message) *message = msg; g_free(fontface); + +#undef _HTML_UNESCAPE } // End of TEMP