From c35eb3e95f2a8c76bc2f037d91189cd9f7340eb3 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
Date: Sat, 5 May 2007 20:49:49 +0000
Subject: [PATCH] Release 1.6.1, merged in 931 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1026 48356398-32a2-884e-a903-53898d9a118a
---
Doxyfile | 2 +-
INSTALL | 2 +-
NEWS | 37 +++
README | 2 +-
TODO | 2 +
VERSION | 1 +
WHATSNEW | 7 +
configdoc/generate.php | 3 +-
docs/dev-advanced-api.html | 2 +-
...{dev-code-quality.html => dev-code-quality.txt} | 30 +-
docs/dev-naming.html | 2 +-
docs/dev-optimization.html | 2 +-
docs/dev-progress.html | 38 +--
docs/enduser-id.html | 2 +-
docs/enduser-slow.html | 2 +-
docs/enduser-utf8.html | 2 +-
docs/enduser-youtube.html | 4 +-
docs/examples/basic.php | 17 +-
docs/examples/demo.php | 136 ---------
docs/index.html | 11 +-
docs/proposal-colors.html | 2 +-
docs/ref-devnetwork.html | 2 +-
docs/specimens/LICENSE | 8 +
docs/specimens/html-align-to-css.html | 165 +++++++++++
docs/specimens/img.png | Bin 0 -> 2138 bytes
docs/specimens/windows-live-mail-desktop-beta.html | 74 +++++
library/HTMLPurifier.php | 4 +-
library/HTMLPurifier/AttrDef/Enum.php | 4 +
library/HTMLPurifier/AttrDef/HTML/FrameTarget.php | 34 +++
library/HTMLPurifier/AttrTransform.php | 24 ++
library/HTMLPurifier/AttrTransform/BgColor.php | 6 +-
library/HTMLPurifier/AttrTransform/BoolToCSS.php | 39 +++
library/HTMLPurifier/AttrTransform/Border.php | 14 +-
library/HTMLPurifier/AttrTransform/EnumToCSS.php | 60 ++++
library/HTMLPurifier/AttrTransform/ImgSpace.php | 47 +++
library/HTMLPurifier/AttrTransform/Length.php | 8 +-
library/HTMLPurifier/AttrTransform/Name.php | 16 +-
library/HTMLPurifier/AttrTransform/TextAlign.php | 36 ---
library/HTMLPurifier/CSSDefinition.php | 3 +
library/HTMLPurifier/ConfigSchema.php | 4 +
library/HTMLPurifier/ElementDef.php | 2 +-
library/HTMLPurifier/HTMLDefinition.php | 17 +-
library/HTMLPurifier/HTMLModule/Bdo.php | 1 -
library/HTMLPurifier/HTMLModule/Edit.php | 1 -
library/HTMLPurifier/HTMLModule/Hypertext.php | 1 -
library/HTMLPurifier/HTMLModule/Image.php | 1 -
library/HTMLPurifier/HTMLModule/List.php | 2 +-
library/HTMLPurifier/HTMLModule/Presentation.php | 1 -
library/HTMLPurifier/HTMLModule/Scripting.php | 67 +++++
library/HTMLPurifier/HTMLModule/Tables.php | 1 -
library/HTMLPurifier/HTMLModule/Target.php | 26 ++
library/HTMLPurifier/HTMLModule/Text.php | 2 -
.../HTMLPurifier/HTMLModule/TransformToStrict.php | 98 ++++++-
.../HTMLPurifier/HTMLModule/TransformToXHTML11.php | 6 +
library/HTMLPurifier/HTMLModuleManager.php | 57 +++-
library/HTMLPurifier/Lexer/DirectLex.php | 17 ++
.../Strategy/RemoveForeignElements.php | 3 +-
library/HTMLPurifier/TagTransform/Font.php | 13 +-
package.php | 14 +-
release.php | 82 ++++++
smoketests/attrTransform.php | 68 +++++
smoketests/attrTransform.xml | 189 ++++++++++++
smoketests/img.png | Bin 0 -> 2138 bytes
smoketests/printDefinition.php | 2 +-
.../HTMLPurifier/AttrDef/HTML/FrameTargetTest.php | 31 ++
tests/HTMLPurifier/AttrDefTest.php | 44 +--
tests/HTMLPurifier/AttrTransform/BoolToCSSTest.php | 39 +++
.../{TextAlignTest.php => EnumToCSSTest.php} | 52 ++--
.../HTMLPurifier/AttrTransform/ImgRequiredTest.php | 2 +-
tests/HTMLPurifier/AttrTransform/ImgSpaceTest.php | 57 ++++
tests/HTMLPurifier/AttrTransform/LangTest.php | 2 +-
tests/HTMLPurifier/AttrTransformTest.php | 42 +++
tests/HTMLPurifier/ChildDef/RequiredTest.php | 4 +-
tests/HTMLPurifier/ConfigSchemaTest.php | 3 +
tests/HTMLPurifier/ConfigTest.php | 16 +-
tests/HTMLPurifier/GeneratorTest.php | 6 +-
tests/HTMLPurifier/HTMLModuleManagerTest.php | 8 +-
tests/HTMLPurifier/Harness.php | 2 +-
tests/HTMLPurifier/LanguageFactoryTest.php | 10 +-
tests/HTMLPurifier/Lexer/DirectLexTest.php | 2 +-
tests/HTMLPurifier/LexerTest.php | 22 +-
tests/HTMLPurifier/PercentEncoderTest.php | 2 +-
.../Strategy/RemoveForeignElementsTest.php | 9 +
.../Strategy/ValidateAttributesTest.php | 320 ++++++++++++++++++---
tests/HTMLPurifier/TagTransformTest.php | 16 +-
tests/HTMLPurifier/Test.php | 2 +-
tests/HTMLPurifier/TokenFactoryTest.php | 2 +-
tests/HTMLPurifier/TokenTest.php | 4 +-
tests/test_files.php | 6 +-
89 files changed, 1792 insertions(+), 436 deletions(-)
create mode 100644 VERSION
create mode 100644 WHATSNEW
rename docs/{dev-code-quality.html => dev-code-quality.txt} (58%)
delete mode 100644 docs/examples/demo.php
create mode 100644 docs/specimens/LICENSE
create mode 100644 docs/specimens/html-align-to-css.html
create mode 100644 docs/specimens/img.png
create mode 100644 docs/specimens/windows-live-mail-desktop-beta.html
create mode 100644 library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
create mode 100644 library/HTMLPurifier/AttrTransform/BoolToCSS.php
create mode 100644 library/HTMLPurifier/AttrTransform/EnumToCSS.php
create mode 100644 library/HTMLPurifier/AttrTransform/ImgSpace.php
delete mode 100644 library/HTMLPurifier/AttrTransform/TextAlign.php
create mode 100644 library/HTMLPurifier/HTMLModule/Scripting.php
create mode 100644 library/HTMLPurifier/HTMLModule/Target.php
create mode 100644 release.php
create mode 100644 smoketests/attrTransform.php
create mode 100644 smoketests/attrTransform.xml
create mode 100644 smoketests/img.png
create mode 100644 tests/HTMLPurifier/AttrDef/HTML/FrameTargetTest.php
rewrite tests/HTMLPurifier/AttrDefTest.php (62%)
create mode 100644 tests/HTMLPurifier/AttrTransform/BoolToCSSTest.php
rename tests/HTMLPurifier/AttrTransform/{TextAlignTest.php => EnumToCSSTest.php} (56%)
create mode 100644 tests/HTMLPurifier/AttrTransform/ImgSpaceTest.php
create mode 100644 tests/HTMLPurifier/AttrTransformTest.php
diff --git a/Doxyfile b/Doxyfile
index da12ad93..4e548067 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -4,7 +4,7 @@
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = HTML Purifier
-PROJECT_NUMBER = 1.6.0
+PROJECT_NUMBER = 1.6.1
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
diff --git a/INSTALL b/INSTALL
index 5f41cfba..e80c57de 100644
--- a/INSTALL
+++ b/INSTALL
@@ -143,7 +143,7 @@ versions will also allow strict-compliant output.
4.3. Other settings
There are more configuration directives which can be read about
-here: They're a bit boring,
+here: They're a bit boring,
but they can help out for those of you who like to exert maximum control over
your code.
diff --git a/NEWS b/NEWS
index 089922f0..7dfe3531 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,43 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
+1.7.0, unknown release date
+
+1.6.1, released 2007-05-05
+! Support for more deprecated attributes via transformations:
+ + hspace and vspace in img
+ + size and noshade in hr
+ + nowrap in td
+ + clear in br
+ + align in caption, table, img and hr
+ + type in ul, ol and li
+! DirectLex now preserves text in which a < bracket is followed by
+ a non-alphanumeric character. This means that certain emoticons
+ are now preserved.
+! %Core.RemoveInvalidImg is now operational, when set to false invalid
+ images will hang around with an empty src
+! target attribute in a tag supported, use %Attr.AllowedFrameTargets
+ to enable
+! CSS property white-space now allows nowrap (supported in all modern
+ browsers) but not others (which have spotty browser implementations)
+! XHTML 1.1 mode now sort-of works without any fatal errors, and
+ lang is now moved over to xml:lang.
+! Attribute transformation smoketest available at smoketests/attrTransform.php
+! Transformation of font's size attribute now handles super-large numbers
+- Possibly fatal bug with __autoload() fixed in module manager
+- Invert HTMLModuleManager->addModule() processing order to check
+ prefixes first and then the literal module
+- Empty strings get converted to empty arrays instead of arrays with
+ an empty string in them.
+- Merging in attribute lists now works.
+. Demo script removed: it has been added to the website's repository
+. Basic.php script modified to work out of the box
+. Refactor AttrTransform classes to reduce duplication
+. AttrTransform_TextAlign axed in favor of a more general
+ AttrTransform_EnumToCSS, refer to HTMLModule/TransformToStrict.php to
+ see how the new equivalent is implemented
+. Unit tests now use exclusively assertIdentical
+
1.6.0, released 2007-04-01
! Support for most common deprecated attributes via transformations:
+ bgcolor in td, th, tr and table
diff --git a/README b/README
index bfd270d8..5bfd5e40 100644
--- a/README
+++ b/README
@@ -19,4 +19,4 @@ Places to go:
an in-depth installation guide.
* See WYSIWYG for information on editors like TinyMCE and FCKeditor
-HTML Purifier can be found on the web at: http://hp.jpsband.org/
+HTML Purifier can be found on the web at: http://htmlpurifier.org/
diff --git a/TODO b/TODO
index 9901a429..ebc6e8ed 100644
--- a/TODO
+++ b/TODO
@@ -13,6 +13,7 @@ TODO List
# Implement all deprecated tags and attributes
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly
do this earlier)
+ ? HTML interface for tweaking configuration to see changes
1.8 release [Refactor, refactor!]
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
@@ -82,6 +83,7 @@ Unknown release (on a scratch-an-itch basis)
? Semi-lossy dumb alternate character encoding transfor
? Have 'lang' attribute be checked against official lists, achieved by
encoding all characters that have string entity equivalents
+ - Explain how to use HTML Purifier in non-PHP languages
Requested
? Native content compression, whitespace stripping (don't rely on Tidy, make
diff --git a/VERSION b/VERSION
new file mode 100644
index 00000000..2eda823f
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+1.6.1
\ No newline at end of file
diff --git a/WHATSNEW b/WHATSNEW
new file mode 100644
index 00000000..7ce6b516
--- /dev/null
+++ b/WHATSNEW
@@ -0,0 +1,7 @@
+The 1.6.1 release, code-named 'Ach! We missed something! Run!', completes
+HTML Purifier's roster of attribute transformations. It also implements
+a number of minor features (such as better font transformations, smarter
+HTML parsing, the CSS property 'white-space' and XHTML 1.1), a few bug
+fixes (most notably fixed __autoload compatibility issues) and a ton
+of refactoring. 1.6 was for things that absolutely could not wait: this
+release, developed in a more leisurely pace, fills in the gaps.
\ No newline at end of file
diff --git a/configdoc/generate.php b/configdoc/generate.php
index d5966e2e..a5b06e96 100644
--- a/configdoc/generate.php
+++ b/configdoc/generate.php
@@ -24,8 +24,7 @@ error_reporting(E_ALL);
// ---------------------------------------------------------------------------
// Include HTML Purifier library
-set_include_path('../library' . PATH_SEPARATOR . get_include_path());
-require_once 'HTMLPurifier.php';
+require_once '../library/HTMLPurifier.auto.php';
// ---------------------------------------------------------------------------
diff --git a/docs/dev-advanced-api.html b/docs/dev-advanced-api.html
index abc83025..a9d9f745 100644
--- a/docs/dev-advanced-api.html
+++ b/docs/dev-advanced-api.html
@@ -14,7 +14,7 @@
HTML Purifier currently natively supports only a subset of HTML's
allowed elements, attributes, and behavior. This is by design,
diff --git a/docs/dev-code-quality.html b/docs/dev-code-quality.txt
similarity index 58%
rename from docs/dev-code-quality.html
rename to docs/dev-code-quality.txt
index 4134360a..7c09a22c 100644
--- a/docs/dev-code-quality.html
+++ b/docs/dev-code-quality.txt
@@ -1,31 +1,16 @@
-
-
-
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
+Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
also can do quick prototypes, and then forget to rewrite them later. Well,
while I can't list mistakes in here, I can list prototype-like segments
of code that should be aggressively refactored. This does not list
-optimization issues, that needs to be done after intense profiling.
+optimization issues, that needs to be done after intense profiling.
-
docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
-AttrDef
+AttrDef - a lot of duplication, more generic classes need to be created;
+a lot of strtolower() calls, no legit casing
Class - doesn't support Unicode characters (fringe); uses regular
expressions
Lang - code duplication; premature optimization
@@ -45,8 +30,3 @@ URIScheme - needs to have callable generic checks
mailto - doesn't validate emails, doesn't validate querystring
news - doesn't validate opaque path
nntp - doesn't constrain path
-
-
-
$Id$
-
-
\ No newline at end of file
diff --git a/docs/dev-naming.html b/docs/dev-naming.html
index 732d32c3..9fffbb65 100644
--- a/docs/dev-naming.html
+++ b/docs/dev-naming.html
@@ -14,7 +14,7 @@
Here are some possible optimization techniques we can apply to code sections if
they turn out to be slow. Be sure not to prematurely optimize: if you get
diff --git a/docs/dev-progress.html b/docs/dev-progress.html
index c0da280a..8d245308 100644
--- a/docs/dev-progress.html
+++ b/docs/dev-progress.html
@@ -32,7 +32,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
May be dropped from CSS2, fairly useless for inline context
visibility
ENUM(visible, hidden, collapse),
Dangerous
-
white-space
ENUM(normal, pre, nowrap, pre-wrap,
+
white-space
ENUM(normal, pre, nowrap, pre-wrap,
pre-line), Spotty implementation:
- pre (no IE 5/6), nowrap (no IE 5),
+ pre (no IE 5/6), nowrap (no IE 5, supported),
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.
@@ -238,7 +238,7 @@ Mozilla on inside and needs -moz-outline, no IE support.
Questionable
accesskey
A
May interfere with main interface
tabindex
A
May interfere with main interface
-
target
A
Config enabled, only useful for frame layouts, disallowed in strict
+
target
A
Config enabled, only useful for frame layouts, disallowed in strict
@@ -262,35 +262,35 @@ Mozilla on inside and needs -moz-outline, no IE support.
-
Transform, target milestone 1.6
-
align
CAPTION
Near-equiv style 'caption-side', drop left and right
-
IMG
Margin-left and margin-right = auto or parent div
-
TABLE
-
HR
Near-equivalent style 'text-align' (Works for IE and Opera, but not Firefox). Also try margin-right:auto; margin-left:0; for left or margin-right:0; margin-left:auto; for right (optionally replacing 0 with the original margin for that side)
+
Transform
+
align
CAPTION
'caption-side' for top/bottom, 'text-align' for left/right
+
IMG
See specimens/html-align-to-css.html
+
TABLE
+
HR
H1, H2, H3, H4, H5, H6, P
Equivalent style 'text-align'
alt
IMG
Required, insert image filename if src is present or default invalid image text
bgcolor
TABLE
Superset style 'background-color'
TR
Superset style 'background-color'
TD, TH
Superset style 'background-color'
border
IMG
Equivalent style border:[number]px solid
-
clear
BR
Near-equiv style 'clear', transform 'all' into 'both'
+
clear
BR
Near-equiv style 'clear', transform 'all' into 'both'
compact
DL, OL, UL
Boolean, needs custom CSS class; rarely used anyway
dir
BDO
Required, insert ltr (or configuration value) if none
height
TD, TH
Near-equiv style 'height', needs px suffix if original was in pixels
-
hspace
IMG
Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix
+
hspace
IMG
Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix
lang
*
Copy value to xml:lang
name
IMG
Turn into ID
A
Turn into ID
-
noshade
HR
Boolean, style 'border-style:solid;'
-
nowrap
TD, TH
Boolean, style 'white-space:nowrap;' (not compat with IE5)
-
size
HR
Near-equiv 'height', needs px suffix if original was pixels
+
noshade
HR
Boolean, style 'border-style:solid;'
+
nowrap
TD, TH
Boolean, style 'white-space:nowrap;' (not compat with IE5)
+
size
HR
Near-equiv 'height', needs px suffix if original was pixels
src
IMG
Required, insert blank or default img if not set
start
OL
Poorly supported 'counter-reset', allowed in loose, dropped in strict
-
type
LI
Equivalent style 'list-style-type', different allowed values though. (needs testing)
-
OL
-
UL
+
type
LI
Equivalent style 'list-style-type', different allowed values though. (needs testing)
+
OL
+
UL
value
LI
Poorly supported 'counter-reset', allowed in loose, dropped in strict
-
vspace
IMG
Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace
+
vspace
IMG
Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace
width
HR
Near-equiv style 'width', needs px suffix if original was pixels
HTML Purifier is a very powerful library. But with power comes great
responsibility, in the form of longer execution times. Remember, this
diff --git a/docs/enduser-utf8.html b/docs/enduser-utf8.html
index 351c44d1..6d03ad91 100644
--- a/docs/enduser-utf8.html
+++ b/docs/enduser-utf8.html
@@ -23,7 +23,7 @@ own advice for sake of portability. -->
Character encoding and character sets are not that
difficult to understand, but so many people blithely stumble
diff --git a/docs/enduser-youtube.html b/docs/enduser-youtube.html
index 20ade969..a827033a 100644
--- a/docs/enduser-youtube.html
+++ b/docs/enduser-youtube.html
@@ -15,7 +15,7 @@
Clients like their YouTube videos. It gives them a warm fuzzy feeling when
they see a neat little embedded video player on their websites that can play
@@ -70,7 +70,7 @@ into your documents. YouTube's code goes like this:
class="embed-youtube">AyPzM5WK8ys</span> your
application can reconstruct the full object from this small snippet that
passes through HTML Purifier unharmed.
-Show me the code!
If you would like to validate the code with
-W3C's
-validator, copy and paste the entire demo page's source.
-
-
Welcome to the live demo. Enter some HTML and see how HTML Purifier
-will filter it.
-
-
-
Return to HTML Purifier's home page.
-Try the form in GET and POST request
-flavors (GET is easy to validate with W3C, but POST allows larger inputs).
-
-
\ No newline at end of file
diff --git a/docs/index.html b/docs/index.html
index 0065c3d6..7a7ec0a3 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -13,7 +13,7 @@
Documentation
-
HTML Purifier has documentation for all types of people.
+
HTML Purifier has documentation for all types of people.
Here is an index of all of them.
Your website probably has a color-scheme.
Green on white,
diff --git a/docs/ref-devnetwork.html b/docs/ref-devnetwork.html
index 5742ce5d..9a009284 100644
--- a/docs/ref-devnetwork.html
+++ b/docs/ref-devnetwork.html
@@ -15,7 +15,7 @@
Many thanks to the DevNetwork community for answering questions,
theorizing about design, and offering encouragement during
diff --git a/docs/specimens/LICENSE b/docs/specimens/LICENSE
new file mode 100644
index 00000000..8b94b0d4
--- /dev/null
+++ b/docs/specimens/LICENSE
@@ -0,0 +1,8 @@
+Licensing of Specimens
+
+Some files in this directory have different licenses:
+
+windows-live-mail-desktop-beta.html - donated by laacz, public domain
+img.png - LGPL, from
+
+All other files are by me, and are licensed under LGPL.
\ No newline at end of file
diff --git a/docs/specimens/html-align-to-css.html b/docs/specimens/html-align-to-css.html
new file mode 100644
index 00000000..a301bc58
--- /dev/null
+++ b/docs/specimens/html-align-to-css.html
@@ -0,0 +1,165 @@
+
+
+
+HTML align attribute to CSS - HTML Purifier Specimen
+
+
+
+
+