From 77b60a4206db5e0d854b47ad4985773381d8ebb1 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 29 May 2009 23:46:40 -0400 Subject: [PATCH] Update documentation to new configuration format. Signed-off-by: Edward Z. Yang --- INSTALL | 24 ++--- TODO | 9 +- docs/dev-advanced-api.html | 245 +++++--------------------------------------- docs/enduser-customize.html | 46 ++++----- docs/enduser-id.html | 14 +-- docs/enduser-tidy.html | 14 +-- docs/enduser-youtube.html | 2 +- 7 files changed, 80 insertions(+), 274 deletions(-) rewrite docs/dev-advanced-api.html (90%) diff --git a/INSTALL b/INSTALL index 8ee41e86..2b4069e2 100644 --- a/INSTALL +++ b/INSTALL @@ -231,12 +231,12 @@ HTML Purifier uses iconv to support other character encodings, as such, any encoding that iconv supports HTML Purifier supports with this code: - $config->set('Core', 'Encoding', /* put your encoding here */); + $config->set('Core.Encoding', /* put your encoding here */); An example usage for Latin-1 websites (the most common encoding for English websites): - $config->set('Core', 'Encoding', 'ISO-8859-1'); + $config->set('Core.Encoding', 'ISO-8859-1'); Note that HTML Purifier's support for non-Unicode encodings is crippled by the fact that any character not supported by that encoding will be silently @@ -251,7 +251,7 @@ reason, I do not include the solution in this document). For those of you using HTML 4.01 Transitional, you can disable XHTML output like this: - $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); + $config->set('HTML.Doctype', 'HTML 4.01 Transitional'); Other supported doctypes include: @@ -277,14 +277,14 @@ are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and %AutoFormat.AutoParagraph. The %Namespace.Directive naming convention translates to: - $config->set('Namespace', 'Directive', $value); + $config->set('Namespace.Directive', $value); E.g. - $config->set('HTML', 'Allowed', 'p,b,a[href],i'); - $config->set('URI', 'Base', 'http://www.example.com'); - $config->set('URI', 'MakeAbsolute', true); - $config->set('AutoFormat', 'AutoParagraph', true); + $config->set('HTML.Allowed', 'p,b,a[href],i'); + $config->set('URI.Base', 'http://www.example.com'); + $config->set('URI.MakeAbsolute', true); + $config->set('AutoFormat.AutoParagraph', true); --------------------------------------------------------------------------- @@ -318,11 +318,11 @@ If you are unable or unwilling to give write permissions to the cache directory, you can either disable the cache (and suffer a performance hit): - $config->set('Core', 'DefinitionCache', null); + $config->set('Core.DefinitionCache', null); Or move the cache directory somewhere else (no trailing slash): - $config->set('Cache', 'SerializerPath', '/home/user/absolute/path'); + $config->set('Cache.SerializerPath', '/home/user/absolute/path'); --------------------------------------------------------------------------- @@ -363,8 +363,8 @@ If your website is in a different encoding or doctype, use this code: require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php'; $config = HTMLPurifier_Config::createDefault(); - $config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding - $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype + $config->set('Core.Encoding', 'ISO-8859-1'); // replace with your encoding + $config->set('HTML.Doctype', 'HTML 4.01 Transitional'); // replace with your doctype $purifier = new HTMLPurifier($config); $clean_html = $purifier->purify($dirty_html); diff --git a/TODO b/TODO index 6bc6aa71..565ad487 100644 --- a/TODO +++ b/TODO @@ -18,14 +18,13 @@ afraid to cast your vote for the next feature to be implemented! - Incorporate download and resize support as implemented here: http://htmlpurifier.org/phorum/read.php?3,2795,3628 - Think about allowing explicit order of operations hooks for transforms -- Make it dead easy for other authors to maintain their own configuration - pools. Encourage them to namespace them (this flies counter to our - "hey, let's use convention idea", so that's why the "register" extra - field will end up being a good idea: because it means we can forgo - convention for external things +- Add "register" field to config schemas to eliminate dependence on + naming conventions - Make it easy for people to cache their entire configuration (so that they have one script they run to change configuration, and then a stub loader to get that configuration) +- Add examples to everything (make built-in which also automatically + gives output) FUTURE VERSIONS --------------- diff --git a/docs/dev-advanced-api.html b/docs/dev-advanced-api.html dissimilarity index 90% index 0233a56d..5b7aaa3c 100644 --- a/docs/dev-advanced-api.html +++ b/docs/dev-advanced-api.html @@ -1,219 +1,26 @@ - - - - - - - -Advanced API - HTML Purifier - - - -

Advanced API

- -
Filed under Development
-
Return to the index.
-
HTML Purifier End-User Documentation
- -

- Warning: This document may be out-of-date. When in doubt, - consult the source code documentation. -

- -

HTML Purifier currently natively supports only a subset of HTML's -allowed elements, attributes, and behavior; specifically, this subset -is the set of elements that are safe for untrusted users to use. -However, HTML Purifier is often utilized to ensure standards-compliance -from input that is trusted (making it a sort of Tidy substitute), -and often users need to define new elements or attributes. The -advanced API is oriented specifically for these use-cases.

- -

Our goals are to let the user:

- -
-
Select
-
    -
  • Doctype
  • - -
  • Elements / Attributes / Modules
  • -
  • Tidy
  • -
-
Customize
-
    -
  • Attributes
  • -
  • Elements
  • - -
-
- -

Select

- -

For basic use, the user will have to specify some basic parameters. This -is not strictly necessary, as HTML Purifier's default setting will always -output safe code, but is required for standards-compliant output.

- -

Selecting a Doctype

- -

The first thing to select is the doctype. This -is essential for standards-compliant output.

- -

This identifier is based -on the name the W3C has given to the document type and not -the DTD identifier.

- -

This parameter is set via the configuration object:

- -
$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
- -

Due to historical reasons, the default doctype is XHTML 1.0 -Transitional, however, we really shouldn't be guessing what the user's -doctype is. Fortunantely, people who can't be bothered to set this won't -be bothered when their pages stop validating.

- -

Selecting Elements / Attributes / Modules

- -

HTML Purifier will, by default, allow as many elements and attributes -as possible. However, a user may decide to roll their own filterset by -selecting modules, elements and attributes to allow for their own -specific use-case. This can be done using %HTML.Allowed:

- -
$config->set('HTML', 'Allowed', 'a[href|title],em,p,blockquote');
- -

The directive %HTML.Allowed is a convenience feature -that may be fully expressed with the legacy interface.

- -

We currently support another interface from older versions:

- -
$config->set('HTML', 'AllowedElements', 'a,em,p,blockquote');
-$config->set('HTML', 'AllowedAttributes', 'a.href,a.title');
- -

A user may also choose to allow modules using a specialized -directive:

- -
$config->set('HTML', 'AllowedModules', 'Hypertext,Text,Lists');
- -

But it is not expected that this feature will be widely used.

- -

Module selection will work slightly differently -from the other AllowedElements and AllowedAttributes directives by -directly modifying the doctype you are operating in, in the spirit of -XHTML 1.1's modularization. We stop users from shooting themselves in the -foot by mandating the modules in %HTML.CoreModules be used.

- -

Modules are distinguished from regular elements by the -case of their first letter. While XML distinguishes between and allows -lower and uppercase letters in element names, XHTML uses only lower-case -element names for sake of consistency.

- -

Selecting Tidy

- -

The name of this segment of functionality is inspired off of Dave -Ragget's program HTML Tidy, which purported to help clean up HTML. In -HTML Purifier, Tidy functionality involves turning unsupported and -deprecated elements into standards-compliant ones, maintaining -backwards compatibility, and enforcing best practices.

- -

This is a complicated feature, and is explained more in depth at -the Tidy documentation page.

- - - -

Customize

- -

By reviewing topic posts in the support forum, we determined that -there were two primarily demanded customization features people wanted: -to add an attribute to an existing element, and to add an element. -Thus, we'll want to create convenience functions for these common -use-cases.

- -

Note that the functions described here are only available if -a raw copy of HTMLPurifier_HTMLDefinition was retrieved. -Furthermore, caching may prevent your changes from immediately -being seen: consult enduser-customize.html on how -to work around this.

- -

Attributes

- -

An attribute is bound to an element by a name and has a specific -AttrDef that validates it. The interface is therefore:

- -
function addAttribute($element, $attribute, $attribute_def);
- -

Example of the functionality in action:

- -
$def->addAttribute('a', 'rel', 'Enum#nofollow');
- -

The $attribute_def value is flexible, -to make things simpler. It can be a literal object or:

- -
    - -
  • String attribute type: We'll use HTMLPurifier_AttrTypes - to resolve it for you. Any data that follows a hash mark (#) will - be used to customize the attribute type: in the example above, - we specify which values for Enum to allow.
  • -
- -

Elements

- -

An element requires certain information as specified by -HTMLPurifier_ElementDef. However, not all of it is necessary, -the usual things required are:

- -
    -
  • Attributes
  • -
  • Content model/type
  • -
  • Registration in a content set
  • -
- -

This suggests an API like this:

- -
function addElement($element, $type, $contents,
-    $attr_collections = array(); $attributes = array());
- -

Each parameter explained in depth:

- -
-
$element
-
Element name, ex. 'label'
-
$type
-
Content set to register in, ex. 'Inline' or 'Flow'
-
$contents
-
Description of allowed children. This is a merged form of - HTMLPurifier_ElementDef's member variables - $content_model and $content_model_type, - where the form is Type: Model, ex. 'Optional: Inline'. - There are also a number of predefined templates one may use.
-
$attr_collections
-
Array (or string if only one) of attribute collection(s) to - merge into the attributes array.
-
$attributes
-
Array of attribute names to attribute definitions, much like - the above-described attribute customization.
-
- -

A possible usage:

- -
$def->addElement('font', 'Inline', 'Optional: Inline', 'Common',
-    array('color' => 'Color'));
- -

See HTMLPurifier/HTMLModule.php for details.

- - - - + + + + + + + +Advanced API - HTML Purifier + + + +

Advanced API

+ +
Filed under Development
+
Return to the index.
+
HTML Purifier End-User Documentation
+ +

+ Please see Customize! +

+ + + + diff --git a/docs/enduser-customize.html b/docs/enduser-customize.html index 6af0e9e7..67496fc5 100644 --- a/docs/enduser-customize.html +++ b/docs/enduser-customize.html @@ -155,9 +155,9 @@

$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$def = $config->getHTMLDefinition(true);
+$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial'); +$config->set('HTML.DefinitionRev', 1); +$def = $config->getHTMLDefinition(true);

Assuming that HTML Purifier has already been properly loaded (hint: @@ -210,10 +210,10 @@ $def = $config->getHTMLDefinition(true);

$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$config->set('Cache', 'DefinitionImpl', null); // remove this later!
-$def = $config->getHTMLDefinition(true);
+$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial'); +$config->set('HTML.DefinitionRev', 1); +$config->set('Cache.DefinitionImpl', null); // TODO: remove this later! +$def = $config->getHTMLDefinition(true);

A few things should be mentioned about the caching mechanism before @@ -266,10 +266,10 @@ $def = $config->getHTMLDefinition(true);

$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$config->set('Cache', 'DefinitionImpl', null); // remove this later!
-$def = $config->getHTMLDefinition(true);
+$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
+$config->set('HTML.DefinitionRev', 1);
+$config->set('Cache.DefinitionImpl', null); // remove this later!
+$def = $config->getHTMLDefinition(true);
 $def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');

@@ -384,11 +384,11 @@ $def = $config->getHTMLDefinition(true);

$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$config->set('Cache', 'DefinitionImpl', null); // remove this later!
-$def = $config->getHTMLDefinition(true);
-$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
+$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
+$config->set('HTML.DefinitionRev', 1);
+$config->set('Cache.DefinitionImpl', null); // remove this later!
+$def = $config->getHTMLDefinition(true);
+$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
   array('_blank','_self','_target','_top')
 ));
@@ -731,14 +731,14 @@ $def = $config->getHTMLDefinition(true);

$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$config->set('Cache', 'DefinitionImpl', null); // remove this later!
-$def = $config->getHTMLDefinition(true);
-$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
+$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
+$config->set('HTML.DefinitionRev', 1);
+$config->set('Cache.DefinitionImpl', null); // remove this later!
+$def = $config->getHTMLDefinition(true);
+$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
   array('_blank','_self','_target','_top')
 ));
-$form = $def->addElement(
+$form = $def->addElement(
   'form',   // name
   'Block',  // content set
   'Flow', // allowed children
@@ -749,7 +749,7 @@ $def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
     'name' => 'ID'
   )
 );
-$form->excludes = array('form' => true);
+$form->excludes = array('form' => true);

Each of the parameters corresponds to one of the questions we asked. diff --git a/docs/enduser-id.html b/docs/enduser-id.html index 808e2129..53d2da24 100644 --- a/docs/enduser-id.html +++ b/docs/enduser-id.html @@ -31,7 +31,7 @@ by default.

IDs, however, are quite useful functionality to have, so if users start complaining about broken anchors you'll probably want to turn them back on -with %HTML.EnableAttrID. But before you go mucking around with the config +with %Attr.EnableID. But before you go mucking around with the config object, it's probably worth to take some precautions to keep your page validating. Why?

@@ -56,8 +56,8 @@ validating. Why?

deal with the most obvious solution: preventing users from using any IDs that appear elsewhere on the document. The method is simple:

-
$config->set('HTML', 'EnableAttrID', true);
-$config->set('Attr', 'IDBlacklist' array(
+
$config->set('Attr.EnableID', true);
+$config->set('Attr.IDBlacklist' array(
     'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
 ));
@@ -88,8 +88,8 @@ all, they might have simply specified a duplicate ID by accident.

This method, too, is quite simple: add a prefix to all user IDs. With this code:

-
$config->set('HTML', 'EnableAttrID', true);
-$config->set('Attr', 'IDPrefix', 'user_');
+
$config->set('Attr.EnableID', true);
+$config->set('Attr.IDPrefix', 'user_');

...this:

@@ -109,7 +109,7 @@ user_ to the beginning."

nothing about multiple HTML Purifier outputs on one page. Thus, we have a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:

-
$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');
+
$config->set('Attr.IDPrefixLocal', 'comment' . $id . '_');

This new attributes does nothing but append on to regular IDPrefix, but is special in that it is volatile: it's value is determined at run-time and @@ -137,7 +137,7 @@ anchors is beyond me.

To revert back to pre-1.2.0 behavior, simply:

-
$config->set('HTML', 'EnableAttrID', true);
+
$config->set('Attr.EnableID', true);

Don't come crying to me when your page mysteriously stops validating, though.

diff --git a/docs/enduser-tidy.html b/docs/enduser-tidy.html index 1721c717..a243f7fc 100644 --- a/docs/enduser-tidy.html +++ b/docs/enduser-tidy.html @@ -76,7 +76,7 @@ associated with it, although it may change depending on your doctype.

change the level of cleaning by setting the %HTML.TidyLevel configuration directive:

-
$config->set('HTML', 'TidyLevel', 'heavy'); // burn baby burn!
+
$config->set('HTML.TidyLevel', 'heavy'); // burn baby burn!

Is the light level really light?

@@ -165,17 +165,17 @@ smoketest.

so happy about the br@clear implementation. That's perfectly fine! HTML Purifier will make accomodations:

-
$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
-$config->set('HTML', 'TidyLevel', 'heavy'); // all changes, minus...
-$config->set('HTML', 'TidyRemove', 'br@clear');
+
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
+$config->set('HTML.TidyLevel', 'heavy'); // all changes, minus...
+$config->set('HTML.TidyRemove', 'br@clear');

That third line does the magic, removing the br@clear fix from the module, ensuring that <br clear="both" /> will pass through unharmed. The reverse is possible too:

-
$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
-$config->set('HTML', 'TidyLevel', 'none'); // no changes, plus...
-$config->set('HTML', 'TidyAdd', 'p@align');
+
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
+$config->set('HTML.TidyLevel', 'none'); // no changes, plus...
+$config->set('HTML.TidyAdd', 'p@align');

In this case, all transformations are shut off, except for the p@align one, which you found handy.

diff --git a/docs/enduser-youtube.html b/docs/enduser-youtube.html index aaf8bdbc..87a36b9a 100644 --- a/docs/enduser-youtube.html +++ b/docs/enduser-youtube.html @@ -75,7 +75,7 @@ passes through HTML Purifier unharmed.

And the corresponding usage:

<?php
-    $config->set('Filter', 'YouTube', true);
+    $config->set('Filter.YouTube', true);
 ?>

There is a bit going in the two code snippets, so let's explain.

-- 2.11.4.GIT