From f0bbf4abf056387eeb211788c17eecc682aabcda Mon Sep 17 00:00:00 2001 From: dlichteblau Date: Sat, 22 Dec 2007 15:24:52 +0000 Subject: [PATCH] Use 21 bit characters on Lisp offering them. --- XMLCONF | 54 +++++++++++++++++++++++----------------------- doc/index.xml | 61 ++++++++++++++++++++++++++++++++++++---------------- doc/installation.xml | 13 +++++------ xml/xml-parse.lisp | 22 +++++++++---------- 4 files changed, 87 insertions(+), 63 deletions(-) diff --git a/XMLCONF b/XMLCONF index e89ec17..50778c9 100644 --- a/XMLCONF +++ b/XMLCONF @@ -8,19 +8,19 @@ xmltest/not-wf/sa/007.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/008.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/009.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/010.xml [not validating:] not-wf [validating:] invalid -xmltest/not-wf/sa/011.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/012.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/013.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/014.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/015.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/016.xml [not validating:] not-wf [validating:] not-wf +xmltest/not-wf/sa/011.xml [not validating:] not-wf [validating:] invalid +xmltest/not-wf/sa/012.xml [not validating:] not-wf [validating:] invalid +xmltest/not-wf/sa/013.xml [not validating:] not-wf [validating:] invalid +xmltest/not-wf/sa/014.xml [not validating:] not-wf [validating:] invalid +xmltest/not-wf/sa/015.xml [not validating:] not-wf [validating:] invalid +xmltest/not-wf/sa/016.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/017.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/018.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/019.xml [not validating:] not-wf [validating:] invalid -xmltest/not-wf/sa/020.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/021.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/022.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/023.xml [not validating:] not-wf [validating:] not-wf +xmltest/not-wf/sa/020.xml [not validating:] not-wf [validating:] invalid +xmltest/not-wf/sa/021.xml [not validating:] not-wf [validating:] invalid +xmltest/not-wf/sa/022.xml [not validating:] not-wf [validating:] invalid +xmltest/not-wf/sa/023.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/024.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/025.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/026.xml [not validating:] not-wf [validating:] invalid @@ -31,11 +31,11 @@ xmltest/not-wf/sa/030.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/031.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/032.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/033.xml [not validating:] not-wf [validating:] invalid -xmltest/not-wf/sa/034.xml [not validating:] not-wf [validating:] not-wf +xmltest/not-wf/sa/034.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/035.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/036.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/037.xml [not validating:] not-wf [validating:] invalid -xmltest/not-wf/sa/038.xml [not validating:] not-wf [validating:] not-wf +xmltest/not-wf/sa/038.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/039.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/040.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/041.xml [not validating:] not-wf [validating:] invalid @@ -73,7 +73,7 @@ xmltest/not-wf/sa/072.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/073.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/074.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/075.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/076.xml [not validating:] not-wf [validating:] not-wf +xmltest/not-wf/sa/076.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/077.xml [not validating:] not-wf [validating:] not-wf xmltest/not-wf/sa/078.xml [not validating:] not-wf [validating:] not-wf xmltest/not-wf/sa/079.xml [not validating:] not-wf [validating:] not-wf @@ -170,7 +170,7 @@ xmltest/not-wf/sa/169.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/170.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/171.xml [not validating:] not-wf [validating:] not-wf xmltest/not-wf/sa/172.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/173.xml [not validating:] not-wf [validating:] not-wf +xmltest/not-wf/sa/173.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/174.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/175.xml [not validating:] not-wf [validating:] not-wf xmltest/not-wf/sa/176.xml [not validating:] not-wf [validating:] not-wf @@ -688,9 +688,9 @@ oasis/p03fail5.xml [not validating:] not-wf [validating:] not-wf oasis/p03fail7.xml [not validating:] not-wf [validating:] not-wf oasis/p03fail8.xml [not validating:] not-wf [validating:] not-wf oasis/p03fail9.xml [not validating:] not-wf [validating:] not-wf -oasis/p04fail1.xml [not validating:] not-wf [validating:] not-wf -oasis/p04fail2.xml [not validating:] not-wf [validating:] not-wf -oasis/p04fail3.xml [not validating:] not-wf [validating:] not-wf +oasis/p04fail1.xml [not validating:] not-wf [validating:] invalid +oasis/p04fail2.xml [not validating:] not-wf [validating:] invalid +oasis/p04fail3.xml [not validating:] not-wf [validating:] invalid oasis/p05fail1.xml [not validating:] not-wf [validating:] not-wf oasis/p05fail2.xml [not validating:] not-wf [validating:] not-wf oasis/p05fail3.xml [not validating:] not-wf [validating:] not-wf @@ -701,9 +701,9 @@ oasis/p09fail2.xml [not validating:] not-wf [validating:] not-wf oasis/p09fail3.xml [not validating:] not-wf [validating:] not-wf oasis/p09fail4.xml [not validating:] not-wf [validating:] not-wf oasis/p09fail5.xml [not validating:] not-wf [validating:] not-wf -oasis/p10fail1.xml [not validating:] not-wf [validating:] not-wf -oasis/p10fail2.xml [not validating:] not-wf [validating:] not-wf -oasis/p10fail3.xml [not validating:] not-wf [validating:] not-wf +oasis/p10fail1.xml [not validating:] not-wf [validating:] invalid +oasis/p10fail2.xml [not validating:] not-wf [validating:] invalid +oasis/p10fail3.xml [not validating:] not-wf [validating:] invalid oasis/p11fail1.xml [not validating:] not-wf [validating:] not-wf oasis/p11fail2.xml [not validating:] not-wf [validating:] not-wf oasis/p12fail1.xml [not validating:] not-wf [validating:] not-wf @@ -752,24 +752,24 @@ oasis/p39fail2.xml [not validating:] not-wf [validating:] invalid oasis/p39fail3.xml [not validating:] not-wf [validating:] invalid oasis/p39fail4.xml [not validating:] not-wf [validating:] not-wf oasis/p39fail5.xml [not validating:] not-wf [validating:] not-wf -oasis/p40fail1.xml [not validating:] not-wf [validating:] not-wf +oasis/p40fail1.xml [not validating:] not-wf [validating:] invalid oasis/p40fail2.xml [not validating:] not-wf [validating:] not-wf oasis/p40fail3.xml [not validating:] not-wf [validating:] not-wf oasis/p40fail4.xml [not validating:] not-wf [validating:] not-wf oasis/p41fail1.xml [not validating:] not-wf [validating:] not-wf oasis/p41fail2.xml [not validating:] not-wf [validating:] not-wf -oasis/p41fail3.xml [not validating:] not-wf [validating:] not-wf +oasis/p41fail3.xml [not validating:] not-wf [validating:] invalid oasis/p42fail1.xml [not validating:] not-wf [validating:] invalid oasis/p42fail2.xml [not validating:] not-wf [validating:] invalid -oasis/p42fail3.xml [not validating:] not-wf [validating:] not-wf +oasis/p42fail3.xml [not validating:] not-wf [validating:] invalid oasis/p43fail1.xml [not validating:] not-wf [validating:] not-wf oasis/p43fail2.xml [not validating:] not-wf [validating:] not-wf oasis/p43fail3.xml [not validating:] not-wf [validating:] not-wf oasis/p44fail1.xml [not validating:] not-wf [validating:] not-wf -oasis/p44fail2.xml [not validating:] not-wf [validating:] not-wf -oasis/p44fail3.xml [not validating:] not-wf [validating:] not-wf -oasis/p44fail4.xml [not validating:] not-wf [validating:] not-wf -oasis/p44fail5.xml [not validating:] not-wf [validating:] not-wf +oasis/p44fail2.xml [not validating:] not-wf [validating:] invalid +oasis/p44fail3.xml [not validating:] not-wf [validating:] invalid +oasis/p44fail4.xml [not validating:] not-wf [validating:] invalid +oasis/p44fail5.xml [not validating:] not-wf [validating:] invalid oasis/p45fail1.xml [not validating:] not-wf [validating:] not-wf oasis/p45fail2.xml [not validating:] not-wf [validating:] not-wf oasis/p45fail3.xml [not validating:] not-wf [validating:] not-wf diff --git a/doc/index.xml b/doc/index.xml index 5d3db48..bf03985 100644 --- a/doc/index.xml +++ b/doc/index.xml @@ -35,28 +35,53 @@

The following libraries are available as separate downloads:

-

- ⬗  - cxml-rng -    - Relax NG validation -

-

- ⬗  - cxml-stp -    - STP, an alternative to DOM -

-

- ⬗  - Closure - HTML: cxml can be used together with its sister project - Closure HTML to convert between HTML and XHTML. -

+

Recent Changes

+

rel-2007-10-21

+
trivial-gray-streams.

- +

+ On Lisps without Unicode support, cxml uses integers instead of + characters to represent code points internally. By default, + however, the parser will recode all data to strings containing + characters representing UTF-8 bytes before passing it to user + code. +

Compilation

diff --git a/xml/xml-parse.lisp b/xml/xml-parse.lisp index 7113273..ddf6aab 100644 --- a/xml/xml-parse.lisp +++ b/xml/xml-parse.lisp @@ -458,7 +458,8 @@ (defmacro %put-unicode-char (code-var put) `(progn - (cond ((%> ,code-var #xFFFF) + (cond #+rune-is-utf-16 + ((%> ,code-var #xFFFF) (,put (the rune (code-rune (%+ #xD7C0 (%ash ,code-var -10))))) (,put (the rune (code-rune (%ior #xDC00 (%and ,code-var #x03FF)))))) (t @@ -1489,19 +1490,14 @@ value)))) (definline data-rune-p (rune) - ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. - ;; - ;; FIXME: das halte ich fuer verkehrt. Surrogates als Unicode-Zeichen - ;; sind verboten. Das liegt hier aber nicht vor, denn wir arbeiten - ;; ja tatsaechlich mit UTF-16. Verboten ist es nur, wenn wir ein - ;; solches Zeichen beim Dekodieren finden, das wird aber eben - ;; in encodings.lisp bereits geprueft. --david + ;; Any Unicode character, excluding FFFE, and FFFF. + ;; Allow surrogates if using UTF-16, else allow >= 0x10000. (let ((c (rune-code rune))) (or (= c #x9) (= c #xA) (= c #xD) (<= #x20 c #xD7FF) + #+rune-is-utf-16 (<= #xD800 c #xDFFF) (<= #xE000 c #xFFFD) - (<= #xD800 c #xDBFF) - (<= #xDC00 c #xDFFF)))) + #-rune-is-utf-16 (<= #x10000 c #x10FFFF)))) (defun read-att-value (zinput input mode &optional canon-space-p (delim nil)) (with-rune-collector-2 (collect) @@ -1761,11 +1757,13 @@ (rune= rune #/U+000D))) (defun code-data-char-p (c) - ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + ;; Any Unicode character, excluding FFFE, and FFFF. + ;; Allow surrogates if using UTF-16, else allow >= 0x10000. (or (= c #x9) (= c #xA) (= c #xD) (<= #x20 c #xD7FF) + #+rune-is-utf-16 (<= #xD800 c #xDFFF) (<= #xE000 c #xFFFD) - (<= #x10000 c #x10FFFF))) + #-rune-is-utf-16 (<= #x10000 c #x10FFFF))) (defun pubid-char-p (c) (or (rune= c #/u+0020) (rune= c #/u+000D) (rune= c #/u+000A) -- 2.11.4.GIT