From 53c2907706dfcf5830abfb7513d63f2105c8d135 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 26 Jul 2013 21:33:39 -0700 Subject: [PATCH] New directive %Core.AllowHostnameUnderscore Signed-off-by: Edward Z. Yang --- NEWS | 2 ++ configdoc/usage.xml | 11 ++++++++--- library/HTMLPurifier/AttrDef/URI/Host.php | 15 ++++++++++++++- library/HTMLPurifier/ConfigSchema/schema.ser | Bin 14880 -> 15000 bytes .../schema/Core.AllowHostnameUnderscore.txt | 16 ++++++++++++++++ tests/HTMLPurifier/AttrDef/URI/HostTest.php | 7 +++++++ 6 files changed, 47 insertions(+), 4 deletions(-) rewrite library/HTMLPurifier/ConfigSchema/schema.ser (90%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Core.AllowHostnameUnderscore.txt diff --git a/NEWS b/NEWS index f715f4f6..4b134259 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier # URI parsing algorithm was made more strict, so only prefixes which looks like schemes will actually be schemes. Thanks Michael Gusev for fixing. +! New directive %Core.AllowHostnameUnderscore which allows underscores + in hostnames. - Made Linkify URL parser a bit less permissive, so that non-breaking spaces and commas are not included as part of URL. Thanks nAS for fixing. - Fix some bad interactions with %HTML.Allowed and injectors. Thanks diff --git a/configdoc/usage.xml b/configdoc/usage.xml index 79f38b85..6349b26d 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -2,7 +2,7 @@ - 131 + 150 81 @@ -54,7 +54,7 @@ - 49 + 59 @@ -355,9 +355,14 @@ 30 + + + 61 + + - 67 + 80 diff --git a/library/HTMLPurifier/AttrDef/URI/Host.php b/library/HTMLPurifier/AttrDef/URI/Host.php index 125decb2..b3d45dc6 100644 --- a/library/HTMLPurifier/AttrDef/URI/Host.php +++ b/library/HTMLPurifier/AttrDef/URI/Host.php @@ -47,10 +47,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // This doesn't match I18N domain names, but we don't have proper IRI support, // so force users to insert Punycode. + // There is not a good sense in which underscores should be + // allowed, since it's technically not! (And if you go as + // far to allow everything as specified by the DNS spec... + // well, that's literally everything, modulo some space limits + // for the components and the overall name (which, by the way, + // we are NOT checking!). So we (arbitrarily) decide this: + // let's allow underscores wherever we would have allowed + // hyphens, if they are enabled. This is a pretty good match + // for browser behavior, for example, a large number of browsers + // cannot handle foo_.example.com, but foo_bar.example.com is + // fairly well supported. + $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : ''; + // The productions describing this are: $a = '[a-z]'; // alpha $an = '[a-z0-9]'; // alphanum - $and = '[a-z0-9-]'; // alphanum | "-" + $and = "[a-z0-9-$underscore]"; // alphanum | "-" // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum $domainlabel = "$an($and*$an)?"; // toplabel = alpha | alpha *( alphanum | "-" ) alphanum diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 90% index fa0bacb9476cab9e69889141969c6fefd2b4419c..22ea32185db63b19d525f509ebe431f593e92271 100644 GIT binary patch delta 163 zcwSpjGNW{YIitm9dm&#|RU->4CFlI2R6WO>ocwZ+{Nj?l#N5=-yp+_U;$)z>l68`m n!Q_2f(wpb9F61FcS0vBmzsiP;hMV + By RFC 1123, underscores are not permitted in host names. + (This is in contrast to the specification for DNS, RFC + 2181, which allows underscores.) + However, most browsers do the right thing when faced with + an underscore in the host name, and so some poorly written + websites are written with the expectation this should work. + Setting this parameter to true relaxes our allowed character + check so that underscores are permitted. +

+--# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/AttrDef/URI/HostTest.php b/tests/HTMLPurifier/AttrDef/URI/HostTest.php index b5827718..77d84319 100644 --- a/tests/HTMLPurifier/AttrDef/URI/HostTest.php +++ b/tests/HTMLPurifier/AttrDef/URI/HostTest.php @@ -33,6 +33,7 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness $this->assertDef('-f.top', false); $this->assertDef('ff.top'); $this->assertDef('f1.top'); + $this->assertDef('f1_f2.ex.top', false); $this->assertDef('f-.top', false); $this->assertDef("\xE4\xB8\xAD\xE6\x96\x87.com.cn", false); @@ -48,6 +49,12 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness $this->assertDef("\xe2\x80\x85.com", false); // rejected } + function testAllowUnderscore() { + $this->config->set('Core.AllowHostnameUnderscore', true); + $this->assertDef("foo_bar.example.com"); + $this->assertDef("foo_.example.com", false); + } + } // vim: et sw=4 sts=4 -- 2.11.4.GIT