1 ;;;; character functions
3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 ;;; We compile some trivial character operations via inline expansion.
16 (declaim (inline standard-char-p graphic-char-p alpha-char-p
17 upper-case-p lower-case-p both-case-p alphanumericp
19 (declaim (maybe-inline digit-char-p
))
22 `(integer 0 (,sb
!xc
:char-code-limit
)))
25 (defvar **unicode-character-name-database
**)
26 (defvar **unicode-1-character-name-database
**)
27 (defvar **unicode-character-name-huffman-tree
**))
29 (defun sorted-position (item list
)
33 ((= item i
) (return-from sorted-position index
))
34 ((< item i
) (return-from sorted-position nil
))
35 (t (incf index
))))) nil
)
37 (defun pack-3-codepoints (first &optional
(second 0) (third 0))
38 (declare (type (unsigned-byte 21) first second third
))
39 (sb!c
::mask-signed-field
63 (logior first
(ash second
21) (ash third
42))))
43 (flet ((coerce-it (array)
44 (!coerce-to-specialized array
'(unsigned-byte 8)))
46 (merge-pathnames (make-pathname
48 '(:relative
:up
:up
"output")
49 :name name
:type type
)
50 sb
!xc
:*compile-file-truename
*))
51 (read-ub8-vector (pathname)
52 (with-open-file (stream pathname
54 :element-type
'(unsigned-byte 8))
55 (let* ((length (file-length stream
))
57 length
:element-type
'(unsigned-byte 8))))
58 (read-sequence array stream
)
60 (let ((misc-database (read-ub8-vector (file "ucdmisc" "dat")))
61 (ucd-high-pages (read-ub8-vector (file "ucdhigh" "dat")))
62 (ucd-low-pages (read-ub8-vector (file "ucdlow" "dat")))
63 (decompositions (read-ub8-vector (file "decomp" "dat")))
64 (primary-compositions (read-ub8-vector (file "comp" "dat")))
65 (case-data (read-ub8-vector (file "case" "dat")))
66 (case-pages (with-open-file (s (file "casepages" "lisp-expr"))
68 (collations (read-ub8-vector (file "collation" "dat"))))
71 (declaim (type (simple-array (unsigned-byte 8) (*))
72 **character-misc-database
**))
73 ;; KLUDGE: All temporary values, fixed up in cold-load
74 (defglobal **character-misc-database
** ,(coerce-it misc-database
))
75 (defglobal **character-high-pages
** ,(coerce-it ucd-high-pages
))
76 (defglobal **character-low-pages
** ,(coerce-it ucd-low-pages
))
77 (defglobal **character-decompositions
** ,(coerce-it decompositions
))
78 (defglobal **character-case-pages
** ',case-pages
)
79 (defglobal **character-primary-compositions
** ,(coerce-it primary-compositions
))
80 (defglobal **character-cases
** ,(coerce-it case-data
))
81 (defglobal **character-collations
** ,(coerce-it collations
))
83 (defun !character-database-cold-init
()
84 (flet ((make-ubn-vector (raw-bytes n
)
87 (/ (length raw-bytes
) n
)
88 :element-type
(list 'unsigned-byte
(* 8 n
)))))
89 (loop for i from
0 below
(length raw-bytes
) by n
91 (loop for offset from
0 below n do
93 (ash (aref raw-bytes
(+ i offset
))
94 (* 8 (- n offset
1)))))
95 (setf (aref new-array
(/ i n
)) element
))
97 (setf **character-misc-database
** ,misc-database
98 **character-high-pages
**
99 (make-ubn-vector ,ucd-high-pages
2)
100 **character-low-pages
**
101 (make-ubn-vector ,ucd-low-pages
2)
102 **character-case-pages
** ',case-pages
103 **character-decompositions
**
104 (make-ubn-vector ,decompositions
3))
106 (setf **character-primary-compositions
**
107 (let ((table (make-hash-table))
108 (info (make-ubn-vector ,primary-compositions
3)))
109 (dotimes (i (/ (length info
) 3))
110 (setf (gethash (dpb (aref info
(* 3 i
)) (byte 21 21)
111 (aref info
(1+ (* 3 i
))))
113 (aref info
(+ (* 3 i
) 2))))
116 (setf **character-cases
**
118 (make-hash-table ;; 64 characters in each page
119 :size
(* 64 (length **character-case-pages
**))
122 (let ((page (sorted-position
124 **character-case-pages
**)))
126 (+ (ash page
6) (ldb (byte 6 0) key
))
128 (info ,case-data
) (index 0)
129 (length (length info
)))
130 (labels ((read-codepoint ()
131 (let* ((b1 (aref info index
))
132 (b2 (aref info
(incf index
)))
133 (b3 (aref info
(incf index
))))
136 (dpb b2
(byte 8 8) b3
))))
137 (read-length-tagged ()
138 (let ((len (aref info index
)) ret
)
140 (if (zerop len
) (read-codepoint)
143 (push (read-codepoint) ret
))
145 (loop until
(>= index length
)
146 for key
= (read-codepoint)
147 for upper
= (read-length-tagged)
148 for lower
= (read-length-tagged)
149 do
(setf (gethash key table
) (cons upper lower
))))
152 (setf **character-collations
**
153 (let* ((table (make-hash-table))
154 (index 0) (info (make-ubn-vector ,collations
4))
156 (loop while
(< index len
) do
157 (let* ((entry-head (aref info index
))
158 (cp-length (ldb (byte 4 28) entry-head
))
159 (key-length (ldb (byte 5 23) entry-head
))
162 :element-type
'(unsigned-byte 32)))
164 (assert (and (/= cp-length
0) (/= key-length
0)))
165 (loop repeat cp-length do
166 (push (dpb 0 (byte 10 22) (aref info index
))
169 (setf codepoints
(nreverse codepoints
))
170 (dotimes (i key-length
)
171 (setf (aref key i
) (aref info index
))
174 (apply #'pack-3-codepoints codepoints
)
179 (stream (file "ucd-names" "lisp-expr")
181 :element-type
'character
)
182 (with-open-file (u1-stream (file "ucd1-names" "lisp-expr")
184 :element-type
'character
)
185 (let ((names (make-hash-table))
186 (u1-names (make-hash-table)))
188 for code-point
= (read stream nil nil
)
189 for char-name
= (string-upcase (read stream nil nil
))
191 do
(setf (gethash code-point names
) char-name
))
193 for code-point
= (read u1-stream nil nil
)
194 for char-name
= (string-upcase (read u1-stream nil nil
))
196 do
(setf (gethash code-point u1-names
) char-name
))
202 (maphash (lambda (code name
)
203 (declare (ignore code
))
206 (maphash (lambda (code u1-name
)
207 (declare (ignore code
))
212 (make-array (hash-table-count names
)
216 (make-array (hash-table-count u1-names
)
219 (maphash (lambda (code name
)
221 (cons code
(huffman-encode name tree
))
224 (maphash (lambda (code name
)
226 (cons code
(huffman-encode name tree
))
227 code-
>u1-name
)) u1-names
)
229 (sort (copy-seq code-
>name
) #'< :key
#'cdr
))
231 (sort (copy-seq name-
>code
) #'< :key
#'car
))
233 (sort (copy-seq code-
>u1-name
) #'< :key
#'cdr
))
235 (sort (copy-seq u1-name-
>code
) #'< :key
#'car
))
236 (setf names nil u1-names nil
)
237 `(defun !character-name-database-cold-init
()
238 (setq **unicode-character-name-database
**
239 (cons ',code-
>name
',name-
>code
)
240 **unicode-character-name-huffman-tree
** ',tree
241 **unicode-1-character-name-database
**
242 (cons ',code-
>u1-name
',u1-name-
>code
))))))))))))
245 #+sb-xc-host
(!character-name-database-cold-init
)
247 (defparameter *base-char-name-alist
*
248 ;; Note: The *** markers here indicate character names which are
249 ;; required by the ANSI specification of #'CHAR-NAME. For the others,
250 ;; we prefer the ASCII standard name.
251 '((#x00
"Nul" "Null" "^@")
258 ;; Don't alias to Bell, another Unicode character has that name.
260 (#x08
"Backspace" "^h" "Bs") ; *** See Note above
261 (#x09
"Tab" "^i" "Ht") ; *** See Note above
262 (#x0A
"Newline" "Linefeed" "^j" "Lf" "Nl") ; *** See Note above
264 (#x0C
"Page" "^l" "Form" "Formfeed" "Ff" "Np") ; *** See Note above
265 (#x0D
"Return" "^m" "Cr") ; *** See Note above
279 (#x1B
"Esc" "Escape" "^[" "Altmode" "Alt")
284 (#x20
"Space" "Sp") ; *** See Note above
285 (#x7f
"Rubout" "Delete" "Del")
288 (#x82
"Break-Permitted")
289 (#x83
"No-Break-Permitted")
292 (#x86
"Start-Selected-Area")
293 (#x87
"End-Selected-Area")
294 (#x88
"Character-Tabulation-Set")
295 (#x89
"Character-Tabulation-With-Justification")
296 (#x8A
"Line-Tabulation-Set")
297 (#x8B
"Partial-Line-Forward")
298 (#x8C
"Partial-Line-Backward")
299 (#x8D
"Reverse-Linefeed")
300 (#x8E
"Single-Shift-Two")
301 (#x8F
"Single-Shift-Three")
302 (#x90
"Device-Control-String")
303 (#x91
"Private-Use-One")
304 (#x92
"Private-Use-Two")
305 (#x93
"Set-Transmit-State")
306 (#x94
"Cancel-Character")
307 (#x95
"Message-Waiting")
308 (#x96
"Start-Guarded-Area")
309 (#x97
"End-Guarded-Area")
310 (#x98
"Start-String")
312 (#x9A
"Single-Character-Introducer")
313 (#x9B
"Control-Sequence-Introducer")
314 (#x9C
"String-Terminator")
315 (#x9D
"Operating-System-Command")
316 (#x9E
"Privacy-Message")
317 (#x9F
"Application-Program-Command"))) ; *** See Note above
319 ;;;; UCD accessor functions
321 ;;; The character database is made of several arrays.
322 ;;; **CHARACTER-MISC-DATABASE** is an array of bytes that encode character
323 ;;; attributes. Each entry in the misc database is +misc-width+ (currently 8)
324 ;;; bytes wide. Within each entry, the bytes represent: general category, BIDI
325 ;;; class, canonical combining class, digit value, decomposition info, other
326 ;;; flags, script, line break class, and age, respectively. Several of the
327 ;;; entries have additional information encoded in them at the bit level. The
328 ;;; digit value field is equal to 128 (has only its high bit set) if characters
329 ;;; with that set of attribute are not digits. Bit 6 is set if that entry
330 ;;; encodes decimal digits, that is, characters that are DIGIT-CHAR-P. The rest
331 ;;; of the value is the digit value of characters with that entry. Decomposition
332 ;;; info contains the length of the decomposition of characters with that entry,
333 ;;; and also sets its high bit if the decompositions are compatibility
334 ;;; decompositions. The other flags byte encodes boolean properties. Bit 7 is
335 ;;; set if the entry's characters are BOTH-CASE-P in the Common Lisp sense. Bit
336 ;;; 6 is set if the entry's characters hav a defined case transformation in
337 ;;; Unicode. Bit 5 is set if the characters have the property BIDI_Mirrored=Y.
338 ;;; Bits 3-0 encode the entry's East Asian Width. Bit 4 is unused. Age stores
339 ;;; the minor version in bits 0-2, and the major version in the remaining 5
342 ;;; To find which entry in **CHARACTER-MISC-DATABASE** encodes a character's
343 ;;; attributes, first index **CHARACTER-HIGH-PAGES** (an array of 16-bit
344 ;;; values) with the high 13 bits of the character's codepoint. If the result
345 ;;; value has its high bit set, the character is in a "compressed page". To
346 ;;; find the misc entry number, simply clear the high bit. If the high bit is
347 ;;; not set, the misc entry number must be looked up in
348 ;;; **CHARACTER-LOW-PAGES**, which is an array of 16-bit values. Each entry in
349 ;;; the array consists of two such values, the misc entry number and the
350 ;;; decomposition index. To find the misc entry number, index into
351 ;;; **CHARACTER-LOW-PAGES** using the value retreived from
352 ;;; **CHARACTER-HIGH-PAGES** (shifted left 8 bits) plus the low 8 bits of the
353 ;;; codepoint, all times two to account for the widtth of the entries. The
354 ;;; value in **CHARACTER-LOW-PAGES** at this point is the misc entry number. To
355 ;;; transform a misc entry number into an index into
356 ;;; **CHARACTER-MISC-DATABASE**, multiply it by +misc-width*. This gives the
357 ;;; index of the start of the charater's misc entry in
358 ;;; **CHARACTER-MISC-DATABASE**.
360 ;;; To look up a character's decomposition, first retreive its
361 ;;; decomposition-info from the misc database as described above. If the
362 ;;; decomposition info is not 0, the character has a decomposition with a
363 ;;; length given by the decomposition info with the high bit (which indicates
364 ;;; compatibility/canonical status) cleared. To find the decomposition, move
365 ;;; one value past the character's misc entry number in
366 ;;; **CHARACTER-LOW-DATABASE**, which gives an index into
367 ;;; **CHARACTER-DECOMPOSITIONS**. The next LENGTH values in
368 ;;; **CHARACTER-DECOMPOSITIONS** (an array of codepoints), starting at this
369 ;;; index, are the decomposition of the character. This proceduce does not
370 ;;; apply to Hangul syllables, which have their own decomposition algorithm.
372 ;;; Case information is stored in **CHARACTER-CASES**, a hash table that maps a
373 ;;; character's codepoint to (cons uppercase lowercase). Uppercase and
374 ;;; lowercase are either a single codepoint, which is the upper- or lower-case
375 ;;; of the given character, or a list of codepoints which taken as a whole are
376 ;;; the upper- or lower-case. These case lists are only used in Unicode case
377 ;;; transformations, not in Common Lisp ones.
379 ;;; Similarly, composition information is stored in **CHARACTER-COMPOSITIONS**,
380 ;;; which is a hash table of codepoints indexed by (+ (ash codepoint1 21)
383 (defun clear-flag (bit integer
)
384 (logandc2 integer
(ash 1 bit
)))
386 (defconstant +misc-width
+ 9)
388 (declaim (ftype (sfunction (t) (unsigned-byte 16)) misc-index
))
389 (defun misc-index (char)
390 (let* ((cp (char-code char
))
391 (cp-high (ash cp -
8))
392 (high-index (aref **character-high-pages
** cp-high
)))
393 (if (logbitp 15 high-index
)
394 (* +misc-width
+ (clear-flag 15 high-index
))
396 (aref **character-low-pages
**
397 (* 2 (+ (ldb (byte 8 0) cp
) (ash high-index
8))))))))
399 (declaim (ftype (sfunction (t) (unsigned-byte 8)) ucd-general-category
))
400 (defun ucd-general-category (char)
401 (aref **character-misc-database
** (misc-index char
)))
403 (defun ucd-decimal-digit (char)
404 (let ((digit (aref **character-misc-database
**
405 (+ 3 (misc-index char
)))))
406 (when (logbitp 6 digit
) ; decimalp flag
407 (ldb (byte 4 0) digit
))))
409 (defun char-code (char)
411 "Return the integer code of CHAR."
414 (defun char-int (char)
416 "Return the integer code of CHAR. (In SBCL this is the same as CHAR-CODE, as
417 there are no character bits or fonts.)"
420 (defun code-char (code)
422 "Return the character with the code CODE."
425 (defun character (object)
427 "Coerce OBJECT into a CHARACTER if possible. Legal inputs are characters,
428 strings and symbols of length 1."
429 (flet ((do-error (control args
)
430 (error 'simple-type-error
432 ;;?? how to express "symbol with name of length 1"?
433 :expected-type
'(or character
(string 1))
434 :format-control control
435 :format-arguments args
)))
438 (string (if (= 1 (length (the string object
)))
441 "String is not of length one: ~S" (list object
))))
442 (symbol (if (= 1 (length (symbol-name object
)))
443 (schar (symbol-name object
) 0)
445 "Symbol name is not of length one: ~S" (list object
))))
446 (t (do-error "~S cannot be coerced to a character." (list object
))))))
448 (defun char-name (char)
450 "Return the name (a STRING) for a CHARACTER object."
451 (let ((char-code (char-code char
)))
452 (or (second (assoc char-code
*base-char-name-alist
*))
453 (let ((h-code (cdr (binary-search char-code
454 (car **unicode-character-name-database
**)
458 (huffman-decode h-code
**unicode-character-name-huffman-tree
**))
460 (format nil
"U~X" char-code
)))))))
462 (defun name-char (name)
464 "Given an argument acceptable to STRING, NAME-CHAR returns a character whose
465 name is that string, if one exists. Otherwise, NIL is returned."
466 (or (let ((char-code (car (rassoc-if (lambda (names)
467 (member name names
:test
#'string-equal
))
468 *base-char-name-alist
*))))
470 (code-char char-code
)))
471 (let* ((%name
(string-upcase name
))
472 (encoding (huffman-encode (if (string= "U+" (subseq %name
0 2))
473 (remove #\
+ %name
:count
1)
475 **unicode-character-name-huffman-tree
**)))
479 (car (binary-search encoding
480 (cdr **unicode-character-name-database
**)
482 (car (binary-search encoding
483 (cdr **unicode-1-character-name-database
**)
485 (name-string (string name
))
486 (name-length (length name-string
)))
489 (code-char char-code
))
490 ((and (> name-length
1)
491 (char-equal (char name-string
0) #\U
)
493 (if (char-equal (char name-string
1) #\
+) 2 1)
495 always
(digit-char-p (char name-string i
) 16)))
496 (code-char (parse-integer name-string
:start
1 :radix
16)))
502 (defun standard-char-p (char)
504 "The argument must be a character object. STANDARD-CHAR-P returns T if the
505 argument is a standard character -- one of the 95 ASCII printing characters or
507 (and (typep char
'base-char
)
508 (let ((n (char-code (the base-char char
))))
512 (defun %standard-char-p
(thing)
514 "Return T if and only if THING is a standard-char. Differs from
515 STANDARD-CHAR-P in that THING doesn't have to be a character."
516 (and (characterp thing
) (standard-char-p thing
)))
518 (defun graphic-char-p (char)
520 "The argument must be a character object. GRAPHIC-CHAR-P returns T if the
521 argument is a printing character (space through ~ in ASCII), otherwise returns
523 (let ((n (char-code char
)))
527 (defun alpha-char-p (char)
529 "The argument must be a character object. ALPHA-CHAR-P returns T if the
530 argument is an alphabetic character, A-Z or a-z; otherwise NIL."
531 (< (ucd-general-category char
) 5))
533 (defun both-case-p (char)
535 "The argument must be a character object. BOTH-CASE-P returns T if the
536 argument is an alphabetic character and if the character exists in both upper
537 and lower case. For ASCII, this is the same as ALPHA-CHAR-P."
538 (logbitp 7 (aref **character-misc-database
** (+ 5 (misc-index char
)))))
540 (defun upper-case-p (char)
542 "The argument must be a character object; UPPER-CASE-P returns T if the
543 argument is an upper-case character, NIL otherwise."
544 (and (both-case-p char
) (= (ucd-general-category char
) 0)))
546 (defun lower-case-p (char)
548 "The argument must be a character object; LOWER-CASE-P returns T if the
549 argument is a lower-case character, NIL otherwise."
550 (and (both-case-p char
) (= (ucd-general-category char
) 1)))
552 (defun digit-char-p (char &optional
(radix 10.
))
554 "If char is a digit in the specified radix, returns the fixnum for which
555 that digit stands, else returns NIL."
556 (let ((m (- (char-code char
) 48)))
558 (cond ((and (<= radix
10.
) (<= m
79.
))
559 ;; Special-case ASCII digits in decimal and smaller radices.
560 (if (and (>= m
0) (< m radix
)) m nil
))
561 ;; Digits 0 - 9 are used as is, since radix is larger.
562 ((and (>= m
0) (< m
10)) m
)
563 ;; Check for upper case A - Z.
564 ((and (>= (setq m
(- m
7)) 10) (< m radix
)) m
)
565 ;; Also check lower case a - z.
566 ((and (>= (setq m
(- m
32)) 10) (< m radix
)) m
)
568 (t (let ((number (ucd-decimal-digit char
)))
569 (when (and number
(< number radix
))
572 (defun alphanumericp (char)
574 "Given a character-object argument, ALPHANUMERICP returns T if the argument
575 is either numeric or alphabetic."
576 (let ((gc (ucd-general-category char
)))
580 (defun char= (character &rest more-characters
)
582 "Return T if all of the arguments are the same character."
583 (declare (truly-dynamic-extent more-characters
))
584 (dolist (c more-characters t
)
585 (declare (type character c
))
586 (unless (eq c character
) (return nil
))))
588 (defun char/= (character &rest more-characters
)
590 "Return T if no two of the arguments are the same character."
591 (declare (truly-dynamic-extent more-characters
))
592 (do* ((head character
(car list
))
593 (list more-characters
(cdr list
)))
595 (declare (type character head
))
597 (declare (type character c
))
598 (when (eq head c
) (return-from char
/= nil
)))))
600 (defun char< (character &rest more-characters
)
602 "Return T if the arguments are in strictly increasing alphabetic order."
603 (declare (truly-dynamic-extent more-characters
))
604 (do* ((c character
(car list
))
605 (list more-characters
(cdr list
)))
607 (unless (< (char-int c
)
608 (char-int (car list
)))
611 (defun char> (character &rest more-characters
)
613 "Return T if the arguments are in strictly decreasing alphabetic order."
614 (declare (truly-dynamic-extent more-characters
))
615 (do* ((c character
(car list
))
616 (list more-characters
(cdr list
)))
618 (unless (> (char-int c
)
619 (char-int (car list
)))
622 (defun char<= (character &rest more-characters
)
624 "Return T if the arguments are in strictly non-decreasing alphabetic order."
625 (declare (truly-dynamic-extent more-characters
))
626 (do* ((c character
(car list
))
627 (list more-characters
(cdr list
)))
629 (unless (<= (char-int c
)
630 (char-int (car list
)))
633 (defun char>= (character &rest more-characters
)
635 "Return T if the arguments are in strictly non-increasing alphabetic order."
636 (declare (truly-dynamic-extent more-characters
))
637 (do* ((c character
(car list
))
638 (list more-characters
(cdr list
)))
640 (unless (>= (char-int c
)
641 (char-int (car list
)))
644 ;;; EQUAL-CHAR-CODE is used by the following functions as a version of CHAR-INT
645 ;;; which loses font, bits, and case info.
647 (defmacro equal-char-code
(character)
649 `(let ((,ch
,character
))
650 (if (both-case-p ,ch
)
651 (cdr (gethash (char-code ,ch
) **character-cases
**))
654 (defun two-arg-char-equal (c1 c2
)
655 (flet ((base-char-equal-p ()
656 (let* ((code1 (char-code c1
))
657 (code2 (char-code c2
))
658 (sum (logxor code1 code2
)))
660 (let ((sum (+ code1 code2
)))
661 (or (and (> sum
161) (< sum
213))
662 (and (> sum
415) (< sum
461))
663 (and (> sum
463) (< sum
477))))))))
664 (declare (inline base-char-equal-p
))
671 (and (base-char-p c2
)
672 (base-char-equal-p)))
674 (= (equal-char-code c1
) (equal-char-code c2
)))))))
676 (defun char-equal-constant (x char reverse-case-char
)
677 (declare (type character x
))
679 (eq reverse-case-char x
)))
681 (defun char-equal (character &rest more-characters
)
683 "Return T if all of the arguments are the same character.
685 (declare (truly-dynamic-extent more-characters
))
686 (do ((clist more-characters
(cdr clist
)))
688 (unless (two-arg-char-equal (car clist
) character
)
691 (defun two-arg-char-not-equal (c1 c2
)
692 (/= (equal-char-code c1
) (equal-char-code c2
)))
694 (defun char-not-equal (character &rest more-characters
)
696 "Return T if no two of the arguments are the same character.
698 (declare (truly-dynamic-extent more-characters
))
699 (do* ((head character
(car list
))
700 (list more-characters
(cdr list
)))
702 (unless (do* ((l list
(cdr l
)))
704 (if (two-arg-char-equal head
(car l
))
708 (defun two-arg-char-lessp (c1 c2
)
709 (< (equal-char-code c1
) (equal-char-code c2
)))
711 (defun char-lessp (character &rest more-characters
)
713 "Return T if the arguments are in strictly increasing alphabetic order.
715 (declare (truly-dynamic-extent more-characters
))
716 (do* ((c character
(car list
))
717 (list more-characters
(cdr list
)))
719 (unless (two-arg-char-lessp c
(car list
))
722 (defun two-arg-char-greaterp (c1 c2
)
723 (> (equal-char-code c1
) (equal-char-code c2
)))
725 (defun char-greaterp (character &rest more-characters
)
727 "Return T if the arguments are in strictly decreasing alphabetic order.
729 (declare (truly-dynamic-extent more-characters
))
730 (do* ((c character
(car list
))
731 (list more-characters
(cdr list
)))
733 (unless (two-arg-char-greaterp c
(car list
))
736 (defun two-arg-char-not-greaterp (c1 c2
)
737 (<= (equal-char-code c1
) (equal-char-code c2
)))
739 (defun char-not-greaterp (character &rest more-characters
)
741 "Return T if the arguments are in strictly non-decreasing alphabetic order.
743 (declare (truly-dynamic-extent more-characters
))
744 (do* ((c character
(car list
))
745 (list more-characters
(cdr list
)))
747 (unless (two-arg-char-not-greaterp c
(car list
))
750 (defun two-arg-char-not-lessp (c1 c2
)
751 (>= (equal-char-code c1
) (equal-char-code c2
)))
753 (defun char-not-lessp (character &rest more-characters
)
755 "Return T if the arguments are in strictly non-increasing alphabetic order.
757 (declare (truly-dynamic-extent more-characters
))
758 (do* ((c character
(car list
))
759 (list more-characters
(cdr list
)))
761 (unless (two-arg-char-not-lessp c
(car list
))
764 ;;;; miscellaneous functions
766 (defun char-upcase (char)
768 "Return CHAR converted to upper-case if that is possible. Don't convert
769 lowercase eszet (U+DF)."
770 (if (both-case-p char
)
771 (code-char (car (gethash (char-code char
) **character-cases
**)))
774 (defun char-downcase (char)
776 "Return CHAR converted to lower-case if that is possible."
777 (if (both-case-p char
)
778 (code-char (cdr (gethash (char-code char
) **character-cases
**)))
781 (defun digit-char (weight &optional
(radix 10))
783 "All arguments must be integers. Returns a character object that represents
784 a digit of the given weight in the specified radix. Returns NIL if no such
786 (and (typep weight
'fixnum
)
787 (>= weight
0) (< weight radix
) (< weight
36)
788 (code-char (if (< weight
10) (+ 48 weight
) (+ 55 weight
)))))