Fix #237: HTML escaping not removed from URL
authorStefan Becker <chemobejk@gmail.com>
Wed, 19 Mar 2014 19:53:43 +0000 (19 21:53 +0200)
committerStefan Becker <chemobejk@gmail.com>
Wed, 19 Mar 2014 19:53:43 +0000 (19 21:53 +0200)
When you copy & paste (or drag & drop) an URL that includes an
escapeable character, then Pidgin will escape the URL when creating the
HREF attribute. I.e.

   http://test/?a=1&b=1

will result in the following HTML received by SIPE:

   <a href="http://test/?a=1&amp;b=1">http://test/?a=1&amp;b=1</a>

As we parse the HTML to reduce it to plain text, we need to un-escape
the HREF attribute to get the correct URL text to send to the other
side.

NOTE: At the time of this writing Pidgin has a bug. If you enter by hand
the following URL

   http://test/?a=1&amp;b=1

SIPE will receive the following HTML

   <A HREF="http://test/?a=1&amp;b=1">http://test/?a=1&amp;amp;b=1</A>

i.e. the URL has not been correctly escaped. As SIPE will unescape the
HREF attribute, we will send to the other side

   http://test/?a=1&b=1

src/core/sipmsg.c

index 778f61a..316f802 100644 (file)
@@ -3,7 +3,7 @@
  *
  * pidgin-sipe
  *
- * Copyright (C) 2010-2013 SIPE Project <http://sipe.sourceforge.net/>
+ * Copyright (C) 2010-2014 SIPE Project <http://sipe.sourceforge.net/>
  * Copyright (C) 2008 Novell, Inc.
  * Copyright (C) 2005 Thomas Butter <butter@uni-mannheim.de>
  *
@@ -387,7 +387,7 @@ void sipmsg_parse_p_asserted_identity(const gchar *header, gchar **sip_uri,
        }
 
        parts = g_strsplit(header, ",", 0);
-       
+
        for (p = parts; *p; p++) {
                gchar *uri = sipmsg_find_part_of_header(*p, "<", ">", NULL);
                if (!uri)
@@ -872,6 +872,29 @@ sipe_parse_html(const char *html, char **attributes, char **message)
        g_return_if_fail(attributes != NULL);
        g_return_if_fail(message    != NULL);
 
+#define _HTML_UNESCAPE \
+       if (!g_ascii_strncasecmp(c, "&lt;", 4)) { \
+               msg[retcount++] = '<'; \
+               c += 4; \
+       } else if (!g_ascii_strncasecmp(c, "&gt;", 4)) { \
+               msg[retcount++] = '>'; \
+               c += 4; \
+       } else if (!g_ascii_strncasecmp(c, "&nbsp;", 6)) { \
+               msg[retcount++] = ' '; \
+               c += 6; \
+       } else if (!g_ascii_strncasecmp(c, "&quot;", 6)) { \
+               msg[retcount++] = '"'; \
+               c += 6; \
+       } else if (!g_ascii_strncasecmp(c, "&amp;", 5)) { \
+               msg[retcount++] = '&'; \
+               c += 5; \
+       } else if (!g_ascii_strncasecmp(c, "&apos;", 6)) { \
+               msg[retcount++] = '\''; \
+               c += 6; \
+       } else { \
+               msg[retcount++] = *c++; \
+       }
+
        len = strlen(html);
        msg = g_malloc0(len + 1);
 
@@ -933,7 +956,10 @@ sipe_parse_html(const char *html, char **attributes, char **message)
                                        c += 7;
 
                                while ((*c != '\0') && g_ascii_strncasecmp(c, "\">", 2))
-                                       msg[retcount++] = *c++;
+                                       if (*c == '&') {
+                                               _HTML_UNESCAPE;
+                                       } else
+                                               msg[retcount++] = *c++;
 
                                if (*c != '\0')
                                        c += 2;
@@ -1046,38 +1072,7 @@ sipe_parse_html(const char *html, char **attributes, char **message)
                }
                else if (*c == '&')
                {
-                       if (!g_ascii_strncasecmp(c, "&lt;", 4))
-                       {
-                               msg[retcount++] = '<';
-                               c += 4;
-                       }
-                       else if (!g_ascii_strncasecmp(c, "&gt;", 4))
-                       {
-                               msg[retcount++] = '>';
-                               c += 4;
-                       }
-                       else if (!g_ascii_strncasecmp(c, "&nbsp;", 6))
-                       {
-                               msg[retcount++] = ' ';
-                               c += 6;
-                       }
-                       else if (!g_ascii_strncasecmp(c, "&quot;", 6))
-                       {
-                               msg[retcount++] = '"';
-                               c += 6;
-                       }
-                       else if (!g_ascii_strncasecmp(c, "&amp;", 5))
-                       {
-                               msg[retcount++] = '&';
-                               c += 5;
-                       }
-                       else if (!g_ascii_strncasecmp(c, "&apos;", 6))
-                       {
-                               msg[retcount++] = '\'';
-                               c += 6;
-                       }
-                       else
-                               msg[retcount++] = *c++;
+                       _HTML_UNESCAPE;
                }
                else
                        msg[retcount++] = *c++;
@@ -1092,6 +1087,8 @@ sipe_parse_html(const char *html, char **attributes, char **message)
        *message = msg;
 
        g_free(fontface);
+
+#undef _HTML_UNESCAPE
 }
 // End of TEMP