From 1b631ed1f89dba42cd3d4d9692f82969b89ec706 Mon Sep 17 00:00:00 2001 From: David Lichteblau Date: Sat, 22 Dec 2007 16:26:37 +0100 Subject: [PATCH] Use 21 bit characters on Lisps offering them. --- unicode.lisp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/unicode.lisp b/unicode.lisp index b70fa87..a6b0454 100644 --- a/unicode.lisp +++ b/unicode.lisp @@ -28,6 +28,11 @@ (in-package :cxml-types) +;;; FIXME: On Lisps using UTF-16 characters, we are unable to recognize +;;; ranges including code points above #x10000. To do so, we would have +;;; to check for substrings of up to two characters rather than indiviual +;;; characters. + (eval-when (:compile-toplevel :load-toplevel :execute) (defconstant +limit-1+ (1- cl-ppcre::*regex-char-code-limit*))) @@ -54,8 +59,12 @@ (let ((result nil)) (labels ((range* (min max) (when (and (< min max) - (not (<= #xD800 min #xDFFF)) - (not (<= #xD800 (1- max) #xDFFF))) + #-rune-is-utf-16 + (not (or (<= #xD800 min #xDFFF) + (<= #xD800 (1- max) #xDFFF))) + #+rune-is-utf-16 + ;; FIXME: See surrogate comment above. + (not (>= max #x10000))) (push (list :range (code-char min) (code-char (1- max))) result)))) (range* amin (min bmin amax)) -- 2.11.4.GIT