From 8c80349f9dfd33b54947b95abdc026bdd6623374 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 28 Sep 2010 12:01:57 -0400 Subject: [PATCH] Implement HTML.Nofollow for external links. Signed-off-by: Edward Z. Yang --- NEWS | 1 + configdoc/usage.xml | 9 ++++- library/HTMLPurifier.includes.php | 2 + library/HTMLPurifier.safe-includes.php | 2 + library/HTMLPurifier/AttrTransform/Nofollow.php | 41 +++++++++++++++++++++ library/HTMLPurifier/ConfigSchema/schema.ser | Bin 13701 -> 13776 bytes .../ConfigSchema/schema/HTML.Nofollow.txt | 7 ++++ library/HTMLPurifier/HTMLModule/Nofollow.php | 19 ++++++++++ library/HTMLPurifier/HTMLModuleManager.php | 8 ++-- tests/HTMLPurifier/HTMLModule/NofollowTest.php | 20 ++++++++++ 10 files changed, 103 insertions(+), 6 deletions(-) create mode 100644 library/HTMLPurifier/AttrTransform/Nofollow.php rewrite library/HTMLPurifier/ConfigSchema/schema.ser (77%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt create mode 100644 library/HTMLPurifier/HTMLModule/Nofollow.php create mode 100644 tests/HTMLPurifier/HTMLModule/NofollowTest.php diff --git a/NEWS b/NEWS index e02fec38..53cfe242 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ========================== 4.2.1, unknown release date +! Added %HTML.Nofollow to add rel="nofollow" to external links. - Make removal of conditional IE comments ungreedy; thanks Bernd for reporting. diff --git a/configdoc/usage.xml b/configdoc/usage.xml index bd2f1a8a..d97dc6c9 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -186,16 +186,21 @@ - 221 + 220 - 226 + 223 + 226 + + + + 229 diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index 0c9a78ee..abee9a2e 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -125,6 +125,7 @@ require 'HTMLPurifier/AttrTransform/Lang.php'; require 'HTMLPurifier/AttrTransform/Length.php'; require 'HTMLPurifier/AttrTransform/Name.php'; require 'HTMLPurifier/AttrTransform/NameSync.php'; +require 'HTMLPurifier/AttrTransform/Nofollow.php'; require 'HTMLPurifier/AttrTransform/SafeEmbed.php'; require 'HTMLPurifier/AttrTransform/SafeObject.php'; require 'HTMLPurifier/AttrTransform/SafeParam.php'; @@ -151,6 +152,7 @@ require 'HTMLPurifier/HTMLModule/Image.php'; require 'HTMLPurifier/HTMLModule/Legacy.php'; require 'HTMLPurifier/HTMLModule/List.php'; require 'HTMLPurifier/HTMLModule/Name.php'; +require 'HTMLPurifier/HTMLModule/Nofollow.php'; require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; require 'HTMLPurifier/HTMLModule/Object.php'; require 'HTMLPurifier/HTMLModule/Presentation.php'; diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php index ec68b498..a5c0d5bb 100644 --- a/library/HTMLPurifier.safe-includes.php +++ b/library/HTMLPurifier.safe-includes.php @@ -119,6 +119,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php'; +require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php'; @@ -145,6 +146,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/List.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php'; +require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php'; diff --git a/library/HTMLPurifier/AttrTransform/Nofollow.php b/library/HTMLPurifier/AttrTransform/Nofollow.php new file mode 100644 index 00000000..573b42c9 --- /dev/null +++ b/library/HTMLPurifier/AttrTransform/Nofollow.php @@ -0,0 +1,41 @@ +parser = new HTMLPurifier_URIParser(); + } + + public function transform($attr, $config, $context) { + + if (!isset($attr['href'])) { + return $attr; + } + + // XXX Kind of inefficient + $url = $this->parser->parse($attr['href']); + $scheme = $url->getSchemeObj($config, $context); + + if (!is_null($url->host) && $scheme !== false && $scheme->browsable) { + if (isset($attr['rel'])) { + $attr['rel'] .= ' nofollow'; + } else { + $attr['rel'] = 'nofollow'; + } + } + + return $attr; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 77% index 978089c6291e7d828f6233ac632cedd8a98fdfd7..1f99bb9b4035daafbfd4aa486149cdb94c709110 100644 GIT binary patch delta 116 zcwTGGzK}h^oY8c$yrS&JP*WZeV=E<(5MLiXzx=fPoSgh}CF>-s&8<9!EQHm(WS;z1 Y!I05nv%KO@K2c0XnO5eTMGOjA0XKvs6951J delta 54 ycwW1Z-I_hYoY7=*prY*Nw*sa-n-}xwu%PmvFi)0OG-Nd29H{t{Z*#IiE-L^Hl@fjc diff --git a/library/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt b/library/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt new file mode 100644 index 00000000..718835af --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt @@ -0,0 +1,7 @@ +HTML.Nofollow +TYPE: bool +VERSION: 4.2.1 +DEFAULT: FALSE +--DESCRIPTION-- +If enabled, nofollow rel attributes are added to all outgoing links. +--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Nofollow.php b/library/HTMLPurifier/HTMLModule/Nofollow.php new file mode 100644 index 00000000..3aa6654a --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Nofollow.php @@ -0,0 +1,19 @@ +addBlankElement('a'); + $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow(); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php index f5c4a1d2..362e3b78 100644 --- a/library/HTMLPurifier/HTMLModuleManager.php +++ b/library/HTMLPurifier/HTMLModuleManager.php @@ -216,19 +216,19 @@ class HTMLPurifier_HTMLModuleManager } } - // add proprietary module (this gets special treatment because - // it is completely removed from doctypes, etc.) + // custom modules if ($config->get('HTML.Proprietary')) { $modules[] = 'Proprietary'; } - - // add SafeObject/Safeembed modules if ($config->get('HTML.SafeObject')) { $modules[] = 'SafeObject'; } if ($config->get('HTML.SafeEmbed')) { $modules[] = 'SafeEmbed'; } + if ($config->get('HTML.Nofollow')) { + $modules[] = 'Nofollow'; + } // merge in custom modules $modules = array_merge($modules, $this->userModules); diff --git a/tests/HTMLPurifier/HTMLModule/NofollowTest.php b/tests/HTMLPurifier/HTMLModule/NofollowTest.php new file mode 100644 index 00000000..5e29b764 --- /dev/null +++ b/tests/HTMLPurifier/HTMLModule/NofollowTest.php @@ -0,0 +1,20 @@ +config->set('HTML.Nofollow', true); + } + + function testNofollow() { + $this->assertResult( + 'abc', + 'abc' + ); + } + +} + +// vim: et sw=4 sts=4 -- 2.11.4.GIT