From 19360ddb36da86e0f6dca6eec2b81686bebde598 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 21 May 2013 13:35:34 -0700 Subject: [PATCH] Ignore commas and nbsps for linkification. Thanks nAS for contributing. Signed-off-by: Edward Z. Yang --- NEWS | 2 ++ library/HTMLPurifier/Injector/Linkify.php | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 1c5147a1..130b14e7 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier # URI parsing algorithm was made more strict, so only prefixes which looks like schemes will actually be schemes. Thanks Michael Gusev for fixing. +- Made Linkify URL parser a bit less permissive, so that non-breaking + spaces and commas are not included as part of URL. Thanks nAS for fixing. 4.5.0, released 2013-02-17 # Fix bug where stacked attribute transforms clobber each other; diff --git a/library/HTMLPurifier/Injector/Linkify.php b/library/HTMLPurifier/Injector/Linkify.php index 296dac28..6c3a1e6a 100644 --- a/library/HTMLPurifier/Injector/Linkify.php +++ b/library/HTMLPurifier/Injector/Linkify.php @@ -21,7 +21,8 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector // there is/are URL(s). Let's split the string: // Note: this regex is extremely permissive - $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + $bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + $token = array(); -- 2.11.4.GIT