From 6e37ecd1c8389db63445fcfe7490db1b7b6a8383 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 16 Apr 2013 13:46:00 -0700 Subject: [PATCH] Make URI parsing algorithm more strict. Thanks Michael Gusev for contributing this patch. Signed-off-by: Edward Z. Yang --- NEWS | 5 +++++ library/HTMLPurifier/URIParser.php | 2 +- tests/HTMLPurifier/URIParserTest.php | 7 +++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 7fff16ce..1c5147a1 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Internal change ========================== +4.6.0, unknown release date +# URI parsing algorithm was made more strict, so only prefixes which + looks like schemes will actually be schemes. Thanks + Michael Gusev for fixing. + 4.5.0, released 2013-02-17 # Fix bug where stacked attribute transforms clobber each other; this also means it's no longer possible to override attribute diff --git a/library/HTMLPurifier/URIParser.php b/library/HTMLPurifier/URIParser.php index 7179e4ab..a7e5dd66 100644 --- a/library/HTMLPurifier/URIParser.php +++ b/library/HTMLPurifier/URIParser.php @@ -30,7 +30,7 @@ class HTMLPurifier_URIParser // Note that ["<>] are an addition to the RFC's recommended // characters, because they represent external delimeters. $r_URI = '!'. - '(([^:/?#"<>]+):)?'. // 2. Scheme + '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme '(//([^/?#"<>]*))?'. // 4. Authority '([^?#"<>]*)'. // 5. Path '(\?([^#"<>]*))?'. // 7. Query diff --git a/tests/HTMLPurifier/URIParserTest.php b/tests/HTMLPurifier/URIParserTest.php index 5cb5850f..cbca196c 100644 --- a/tests/HTMLPurifier/URIParserTest.php +++ b/tests/HTMLPurifier/URIParserTest.php @@ -140,6 +140,13 @@ class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness ); } + function testEmbeddedColon() { + $this->assertParsing( + '{:test:}', + null, null, null, null, '{:test:}', null, null + ); + } + } // vim: et sw=4 sts=4 -- 2.11.4.GIT