1 ;;;; character functions
3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 ;;; We compile some trivial character operations via inline expansion.
16 (declaim (inline standard-char-p graphic-char-p alpha-char-p
18 (declaim (maybe-inline upper-case-p lower-case-p both-case-p
22 `(integer 0 (,sb
!xc
:char-code-limit
)))
24 (defglobal **unicode-character-name-huffman-tree
** ())
26 (declaim (inline pack-3-codepoints
))
27 (defun pack-3-codepoints (first &optional
(second 0) (third 0))
28 (declare (type (unsigned-byte 21) first second third
))
29 (sb!c
::mask-signed-field
63 (logior first
(ash second
21) (ash third
42))))
32 (flet ((coerce-it (array)
33 (!coerce-to-specialized array
'(unsigned-byte 8)))
35 (let ((dir (sb-cold:prepend-genfile-path
"output/")))
36 (make-pathname :directory
(pathname-directory (merge-pathnames dir
))
37 :name name
:type type
)))
38 (read-ub8-vector (pathname)
39 (with-open-file (stream pathname
40 :element-type
'(unsigned-byte 8))
41 (let* ((length (file-length stream
))
43 length
:element-type
'(unsigned-byte 8))))
44 (read-sequence array stream
)
46 (init-global (name type
&optional length
)
49 ,(if (eql type
'hash-table
)
51 `(make-array ,length
:element-type
',type
)))
52 (declaim (type ,(if (eql type
'hash-table
)
54 `(simple-array ,type
(,length
))) ,name
)))))
55 (let ((misc-database (coerce-it (read-ub8-vector (file "ucdmisc" "dat"))))
56 (ucd-high-pages (coerce-it (read-ub8-vector (file "ucdhigh" "dat"))))
57 (ucd-low-pages (coerce-it (read-ub8-vector (file "ucdlow" "dat"))))
58 (decompositions (coerce-it (read-ub8-vector (file "decomp" "dat"))))
59 (primary-compositions (coerce-it (read-ub8-vector (file "comp" "dat"))))
60 (case-data (coerce-it (read-ub8-vector (file "case" "dat"))))
61 (case-pages (coerce-it (read-ub8-vector (file "casepages" "dat"))))
62 (collations (coerce-it (read-ub8-vector (file "collation" "dat")))))
64 ;; KLUDGE: All temporary values, fixed up in cold-load
65 ,(init-global '**character-misc-database
** '(unsigned-byte 8)
66 (length misc-database
))
67 ,(init-global '**character-high-pages
** '(unsigned-byte 16)
68 (/ (length ucd-high-pages
) 2))
69 ,(init-global '**character-low-pages
** '(unsigned-byte 16)
70 (/ (length ucd-low-pages
) 2))
71 ,(init-global '**character-decompositions
** '(unsigned-byte 21)
72 (/ (length decompositions
) 3))
73 ,(init-global '**character-case-pages
** '(unsigned-byte 8)
75 ,(init-global '**character-primary-compositions
** 'hash-table
)
76 ,(init-global '**character-unicode-cases
** t
77 (* 64 (1+ (aref case-pages
78 (1- (length case-pages
))))))
79 ,(init-global '**character-cases
** '(unsigned-byte 32)
80 (* 2 64 (1+ (aref case-pages
81 (1- (length case-pages
))))))
82 ,(init-global '**character-collations
** 'hash-table
)
84 (defun !character-database-cold-init
()
85 (flet ((make-ubn-vector (raw-bytes n
)
88 (/ (length raw-bytes
) n
)
89 :element-type
(list 'unsigned-byte
(* 8 n
)))))
90 (loop for i from
0 below
(length raw-bytes
) by n
92 (loop for offset from
0 below n do
94 (ash (aref raw-bytes
(+ i offset
))
95 (* 8 (- n offset
1)))))
96 (setf (aref new-array
(/ i n
)) element
))
98 (setf **character-misc-database
**
100 **character-high-pages
**
101 (make-ubn-vector ,ucd-high-pages
2)
102 **character-low-pages
**
103 (make-ubn-vector ,ucd-low-pages
2)
104 **character-case-pages
**
105 ,(coerce-it case-pages
)
106 **character-decompositions
**
107 (make-ubn-vector ,decompositions
3))
109 (setf **character-primary-compositions
**
110 (let ((table (make-hash-table))
111 (info (make-ubn-vector ,primary-compositions
3)))
112 (dotimes (i (/ (length info
) 3))
113 (setf (gethash (dpb (aref info
(* 3 i
)) (byte 21 21)
114 (aref info
(1+ (* 3 i
))))
116 (aref info
(+ (* 3 i
) 2))))
119 (let* ((unicode-table
121 (* 64 (1+ (aref **character-case-pages
**
122 (1- (length **character-case-pages
**)))))))
124 (* 2 (length unicode-table
))
125 :element-type
'(unsigned-byte 32)))
128 (length (length info
)))
129 (labels ((read-codepoint ()
130 (let* ((b1 (aref info index
))
131 (b2 (aref info
(incf index
)))
132 (b3 (aref info
(incf index
))))
135 (dpb b2
(byte 8 8) b3
))))
136 (read-length-tagged ()
137 (let ((len (aref info index
)) ret
)
143 (push (read-codepoint) ret
))
145 (loop until
(>= index length
)
146 for key
= (read-codepoint)
147 for upper
= (read-length-tagged)
148 for lower
= (read-length-tagged)
149 for page
= (aref **character-case-pages
** (ash key -
6))
150 for i
= (+ (ash page
6) (ldb (byte 6 0) key
))
152 (setf (aref unicode-table i
) (cons upper lower
))
156 (logbitp 7 (aref **character-misc-database
**
157 (+ 5 (misc-index (code-char code
)))))))
160 ;; Some characters are only equal under unicode rules,
161 ;; e.g. #\MICRO_SIGN and #\GREEK_CAPITAL_LETTER_MU
165 (both-case-p upper
)))
167 (setf (aref table
(* i
2)) lower
168 (aref table
(1+ (* i
2))) upper
)))
169 (setf **character-unicode-cases
** unicode-table
)
170 (setf **character-cases
** table
))
172 (setf **character-collations
**
173 (let* ((table (make-hash-table))
175 (info (make-ubn-vector ,collations
4))
177 (loop while
(< index len
) do
178 (let* ((entry-head (aref info index
))
179 (cp-length (ldb (byte 4 28) entry-head
))
180 (key-length (ldb (byte 5 23) entry-head
))
183 :element-type
'(unsigned-byte 32)))
185 (assert (and (/= cp-length
0) (/= key-length
0)))
186 (loop repeat cp-length do
187 (push (dpb 0 (byte 10 22) (aref info index
))
190 (setf codepoints
(nreverse codepoints
))
191 (dotimes (i key-length
)
192 (setf (aref key i
) (aref info index
))
195 (apply #'pack-3-codepoints codepoints
)
196 table
) (logically-readonlyize key
))))
200 (stream (file "ucd-names" "lisp-expr"))
201 (with-open-file (u1-stream (file "ucd1-names" "lisp-expr"))
202 (flet ((convert-to-double-vector (vector &optional reversed
)
203 (let ((result (make-array (* (length vector
) 2))))
204 (loop for
(code . name
) across vector
209 (setf (aref result i
) code
210 (aref result
(1+ i
)) name
))
212 (let ((names (make-hash-table))
213 (u1-names (make-hash-table)))
215 for code-point
= (read stream nil nil
)
216 for char-name
= (string-upcase (read stream nil nil
))
218 do
(setf (gethash code-point names
) char-name
))
220 for code-point
= (read u1-stream nil nil
)
221 for char-name
= (string-upcase (read u1-stream nil nil
))
223 do
(setf (gethash code-point u1-names
) char-name
))
227 (maphash (lambda (code name
)
228 (declare (ignore code
))
231 (maphash (lambda (code u1-name
)
232 (declare (ignore code
))
237 (make-array (hash-table-count names
)
241 (make-array (hash-table-count u1-names
)
244 (maphash (lambda (code name
)
246 (cons code
(huffman-encode name tree
))
249 (maphash (lambda (code name
)
251 (cons code
(huffman-encode name tree
))
255 (sort (copy-seq code-
>name
) #'< :key
#'cdr
)
257 (sort (copy-seq name-
>code
) #'< :key
#'car
)
259 (sort (copy-seq code-
>u1-name
) #'< :key
#'cdr
)
261 (sort (copy-seq u1-name-
>code
) #'< :key
#'car
))
263 ,(init-global '**unicode-char-name-database
** t
264 (* 2 (length code-
>name
)))
265 ,(init-global '**unicode-name-char-database
** t
266 (* 2 (length name-
>code
)))
267 ,(init-global '**unicode-1-char-name-database
** t
268 (* 2 (length code-
>u1-name
)))
269 ,(init-global '**unicode-1-name-char-database
** t
270 (* 2 (length u1-name-
>code
)))
271 (defun !character-name-database-cold-init
()
272 (setf **unicode-character-name-huffman-tree
** ',tree
273 **unicode-char-name-database
**
274 ',(convert-to-double-vector code-
>name
)
275 **unicode-name-char-database
**
276 ',(convert-to-double-vector name-
>code t
)
277 **unicode-1-char-name-database
**
278 ',(convert-to-double-vector code-
>u1-name
)
279 **unicode-1-name-char-database
**
280 ',(convert-to-double-vector u1-name-
>code t
))))))))))))))
283 #+sb-xc-host
(!character-name-database-cold-init
)
285 (defglobal *base-char-name-alist
*
286 ;; Note: The *** markers here indicate character names which are
287 ;; required by the ANSI specification of #'CHAR-NAME. For the others,
288 ;; we prefer the ASCII standard name.
289 '((#x00
"Nul" "Null" "^@")
296 ;; Don't alias to Bell, another Unicode character has that name.
298 (#x08
"Backspace" "^h" "Bs") ; *** See Note above
299 (#x09
"Tab" "^i" "Ht") ; *** See Note above
300 (#x0A
"Newline" "Linefeed" "^j" "Lf" "Nl") ; *** See Note above
302 (#x0C
"Page" "^l" "Form" "Formfeed" "Ff" "Np") ; *** See Note above
303 (#x0D
"Return" "^m" "Cr") ; *** See Note above
317 (#x1B
"Esc" "Escape" "^[" "Altmode" "Alt")
322 (#x20
"Space" "Sp") ; *** See Note above
323 (#x7f
"Rubout" "Delete" "Del")
326 (#x82
"Break-Permitted")
327 (#x83
"No-Break-Permitted")
330 (#x86
"Start-Selected-Area")
331 (#x87
"End-Selected-Area")
332 (#x88
"Character-Tabulation-Set")
333 (#x89
"Character-Tabulation-With-Justification")
334 (#x8A
"Line-Tabulation-Set")
335 (#x8B
"Partial-Line-Forward")
336 (#x8C
"Partial-Line-Backward")
337 (#x8D
"Reverse-Linefeed")
338 (#x8E
"Single-Shift-Two")
339 (#x8F
"Single-Shift-Three")
340 (#x90
"Device-Control-String")
341 (#x91
"Private-Use-One")
342 (#x92
"Private-Use-Two")
343 (#x93
"Set-Transmit-State")
344 (#x94
"Cancel-Character")
345 (#x95
"Message-Waiting")
346 (#x96
"Start-Guarded-Area")
347 (#x97
"End-Guarded-Area")
348 (#x98
"Start-String")
350 (#x9A
"Single-Character-Introducer")
351 (#x9B
"Control-Sequence-Introducer")
352 (#x9C
"String-Terminator")
353 (#x9D
"Operating-System-Command")
354 (#x9E
"Privacy-Message")
355 (#x9F
"Application-Program-Command"))) ; *** See Note above
357 ;;;; UCD accessor functions
359 ;;; The character database is made of several arrays.
360 ;;; **CHARACTER-MISC-DATABASE** is an array of bytes that encode character
361 ;;; attributes. Each entry in the misc database is +misc-width+ (currently 8)
362 ;;; bytes wide. Within each entry, the bytes represent: general category, BIDI
363 ;;; class, canonical combining class, digit value, decomposition info, other
364 ;;; flags, script, line break class, and age, respectively. Several of the
365 ;;; entries have additional information encoded in them at the bit level. The
366 ;;; digit value field is equal to 128 (has only its high bit set) if characters
367 ;;; with that set of attribute are not digits. Bit 6 is set if that entry
368 ;;; encodes decimal digits, that is, characters that are DIGIT-CHAR-P. The rest
369 ;;; of the value is the digit value of characters with that entry. Decomposition
370 ;;; info contains the length of the decomposition of characters with that entry,
371 ;;; and also sets its high bit if the decompositions are compatibility
372 ;;; decompositions. The other flags byte encodes boolean properties. Bit 7 is
373 ;;; set if the entry's characters are BOTH-CASE-P in the Common Lisp sense. Bit
374 ;;; 6 is set if the entry's characters hav a defined case transformation in
375 ;;; Unicode. Bit 5 is set if the characters have the property BIDI_Mirrored=Y.
376 ;;; Bits 3-0 encode the entry's East Asian Width. Bit 4 is unused. Age stores
377 ;;; the minor version in bits 0-2, and the major version in the remaining 5
380 ;;; To find which entry in **CHARACTER-MISC-DATABASE** encodes a character's
381 ;;; attributes, first index **CHARACTER-HIGH-PAGES** (an array of 16-bit
382 ;;; values) with the high 13 bits of the character's codepoint. If the result
383 ;;; value has its high bit set, the character is in a "compressed page". To
384 ;;; find the misc entry number, simply clear the high bit. If the high bit is
385 ;;; not set, the misc entry number must be looked up in
386 ;;; **CHARACTER-LOW-PAGES**, which is an array of 16-bit values. Each entry in
387 ;;; the array consists of two such values, the misc entry number and the
388 ;;; decomposition index. To find the misc entry number, index into
389 ;;; **CHARACTER-LOW-PAGES** using the value retreived from
390 ;;; **CHARACTER-HIGH-PAGES** (shifted left 8 bits) plus the low 8 bits of the
391 ;;; codepoint, all times two to account for the widtth of the entries. The
392 ;;; value in **CHARACTER-LOW-PAGES** at this point is the misc entry number. To
393 ;;; transform a misc entry number into an index into
394 ;;; **CHARACTER-MISC-DATABASE**, multiply it by +misc-width*. This gives the
395 ;;; index of the start of the charater's misc entry in
396 ;;; **CHARACTER-MISC-DATABASE**.
398 ;;; To look up a character's decomposition, first retreive its
399 ;;; decomposition-info from the misc database as described above. If the
400 ;;; decomposition info is not 0, the character has a decomposition with a
401 ;;; length given by the decomposition info with the high bit (which indicates
402 ;;; compatibility/canonical status) cleared. To find the decomposition, move
403 ;;; one value past the character's misc entry number in
404 ;;; **CHARACTER-LOW-DATABASE**, which gives an index into
405 ;;; **CHARACTER-DECOMPOSITIONS**. The next LENGTH values in
406 ;;; **CHARACTER-DECOMPOSITIONS** (an array of codepoints), starting at this
407 ;;; index, are the decomposition of the character. This proceduce does not
408 ;;; apply to Hangul syllables, which have their own decomposition algorithm.
410 ;;; Case information is stored in **CHARACTER-UNICODE-CASES**, an array that
411 ;;; indirectly maps a character's codepoint to (cons uppercase
412 ;;; lowercase). Uppercase and lowercase are either a single codepoint,
413 ;;; which is the upper- or lower-case of the given character, or a
414 ;;; list of codepoints which taken as a whole are the upper- or
415 ;;; lower-case. These case lists are only used in Unicode case
416 ;;; transformations, not in Common Lisp ones.
418 ;;; **CHARACTER-CASES** is similar to the above but it stores codes in
419 ;;; a flat array twice as large, and it includes only the standard casing rules,
420 ;;; so there's always just two characters.
422 ;;; Similarly, composition information is stored in **CHARACTER-COMPOSITIONS**,
423 ;;; which is a hash table of codepoints indexed by (+ (ash codepoint1 21)
426 (declaim (inline clear-flag
))
427 (defun clear-flag (bit integer
)
428 (logandc2 integer
(ash 1 bit
)))
430 (defconstant +misc-width
+ 9)
432 (declaim (ftype (sfunction (t) (unsigned-byte 16)) misc-index
))
433 (defun misc-index (char)
434 (let* ((cp (char-code char
))
435 (cp-high (ash cp -
8))
436 (high-index (aref **character-high-pages
** cp-high
)))
437 (if (logbitp 15 high-index
)
438 (* +misc-width
+ (clear-flag 15 high-index
))
440 (aref **character-low-pages
**
441 (* 2 (+ (ldb (byte 8 0) cp
) (ash high-index
8))))))))
443 (declaim (ftype (sfunction (t) (unsigned-byte 8)) ucd-general-category
)
444 (inline ucd-general-category
))
445 (defun ucd-general-category (char)
446 (aref **character-misc-database
** (misc-index char
)))
448 (defun ucd-decimal-digit (char)
449 (let ((digit (aref **character-misc-database
**
450 (+ 3 (misc-index char
)))))
451 (when (logbitp 6 digit
) ; decimalp flag
452 (ldb (byte 4 0) digit
))))
454 (defun char-code (char)
455 "Return the integer code of CHAR."
458 (defun char-int (char)
459 "Return the integer code of CHAR. (In SBCL this is the same as CHAR-CODE, as
460 there are no character bits or fonts.)"
463 (defun code-char (code)
464 "Return the character with the code CODE."
467 (defun character (object)
468 "Coerce OBJECT into a CHARACTER if possible. Legal inputs are characters,
469 strings and symbols of length 1."
470 (flet ((do-error (control args
)
471 (declare (optimize allow-non-returning-tail-call
))
472 (error 'simple-type-error
474 ;;?? how to express "symbol with name of length 1"?
475 :expected-type
'(or character
(string 1))
476 :format-control control
477 :format-arguments args
)))
480 (string (if (= 1 (length (the string object
)))
483 "String is not of length one: ~S" (list object
))))
484 (symbol (if (= 1 (length (symbol-name object
)))
485 (schar (symbol-name object
) 0)
487 "Symbol name is not of length one: ~S" (list object
))))
488 (t (do-error "~S cannot be coerced to a character." (list object
))))))
490 (defun char-name (char)
491 "Return the name (a STRING) for a CHARACTER object."
492 (let ((char-code (char-code char
)))
493 (or (second (assoc char-code
*base-char-name-alist
*))
494 (let ((h-code (double-vector-binary-search char-code
495 **unicode-char-name-database
**)))
498 (huffman-decode h-code
**unicode-character-name-huffman-tree
**))
500 (format nil
"U~X" char-code
)))))))
502 (defun name-char (name)
503 "Given an argument acceptable to STRING, NAME-CHAR returns a character whose
504 name is that string, if one exists. Otherwise, NIL is returned."
505 (let ((char-code (car (rassoc-if (lambda (names)
506 (member name names
:test
#'string-equal
))
507 *base-char-name-alist
*))))
509 (code-char char-code
))
510 ((let ((start (cond ((eql (string-not-equal "U+" name
) 2)
512 ((eql (string-not-equal "U" name
) 1)
515 (loop for i from start
517 always
(digit-char-p (char name i
) 16))
518 (code-char (parse-integer name
:start start
:radix
16)))))
520 (let ((encoding (huffman-encode (string-upcase name
)
521 **unicode-character-name-huffman-tree
**)))
525 (double-vector-binary-search encoding
526 **unicode-name-char-database
**)
527 (double-vector-binary-search encoding
528 **unicode-1-name-char-database
**))))
530 (code-char char-code
)))))))))
534 (defun standard-char-p (char)
535 "The argument must be a character object. STANDARD-CHAR-P returns T if the
536 argument is a standard character -- one of the 95 ASCII printing characters or
538 (and (typep char
'base-char
)
539 (let ((n (char-code (the base-char char
))))
543 (defun %standard-char-p
(thing)
544 "Return T if and only if THING is a standard-char. Differs from
545 STANDARD-CHAR-P in that THING doesn't have to be a character."
546 (and (characterp thing
) (standard-char-p thing
)))
548 (defun graphic-char-p (char)
549 "The argument must be a character object. GRAPHIC-CHAR-P returns T if the
550 argument is a printing character (space through ~ in ASCII), otherwise returns
552 (let ((n (char-code char
)))
556 (defun alpha-char-p (char)
557 "The argument must be a character object. ALPHA-CHAR-P returns T if the
558 argument is an alphabetic character, A-Z or a-z; otherwise NIL."
559 (< (ucd-general-category char
) 5))
561 (defmacro with-case-info
((char index-var cases-var
564 (let ((code-var (gensym "CODE"))
565 (shifted-var (gensym "SHIFTED"))
566 (page-var (gensym "PAGE")))
569 (declare (optimize (sb!c
::insert-array-bounds-checks
0)))
570 (let ((,code-var
(char-code ,char
)))
571 (let* ((,shifted-var
(ash ,code-var -
6))
572 (,page-var
(if (>= ,shifted-var
(length **character-case-pages
**))
574 (aref **character-case-pages
** ,shifted-var
))))
575 (if (= ,page-var
255)
577 (let ((,index-var
(* (+ (ash ,page-var
6)
578 (ldb (byte 6 0) ,code-var
))
580 (,cases-var
**character-cases
**))
583 (defun both-case-p (char)
584 "The argument must be a character object. BOTH-CASE-P returns T if the
585 argument is an alphabetic character and if the character exists in both upper
586 and lower case. For ASCII, this is the same as ALPHA-CHAR-P."
587 (with-case-info (char index cases
)
588 (plusp (aref cases index
))))
590 (defun upper-case-p (char)
591 "The argument must be a character object; UPPER-CASE-P returns T if the
592 argument is an upper-case character, NIL otherwise."
593 (with-case-info (char index cases
)
594 (= (aref cases
(1+ index
))
597 (defun lower-case-p (char)
598 "The argument must be a character object; LOWER-CASE-P returns T if the
599 argument is a lower-case character, NIL otherwise."
600 (with-case-info (char index cases
)
601 (= (aref cases index
)
604 (defun char-upcase (char)
605 "Return CHAR converted to upper-case if that is possible. Don't convert
606 lowercase eszet (U+DF)."
607 (with-case-info (char index cases
609 (let ((code (aref cases
(1+ index
))))
614 (defun char-downcase (char)
615 "Return CHAR converted to lower-case if that is possible."
616 (with-case-info (char index cases
618 (let ((code (aref cases index
)))
623 (defun alphanumericp (char)
624 "Given a character-object argument, ALPHANUMERICP returns T if the argument
625 is either numeric or alphabetic."
626 (let ((gc (ucd-general-category char
)))
630 ;;; EQUAL-CHAR-CODE is used by the following functions as a version of CHAR-INT
631 ;;; which loses font, bits, and case info.
633 ;;; Return a cons with (upper-case . lower-case), where it either can
634 ;;; be a character code or a list of character codes if the character
635 ;;; donwcases or upcases into multiple characters.
636 (declaim (inline char-case-info
))
637 (defun char-case-info (character)
638 (let* ((code (char-code character
))
639 (page (aref **character-case-pages
** (ash code -
6))))
640 ;; Pages with 255 means the character is not both-case.
641 ;; **character-cases** has 0 for those characters.
642 (aref **character-unicode-cases
**
644 (ldb (byte 6 0) code
)))))
646 ;;; Returns the downcased code or the character code
647 (declaim (inline equal-char-code
))
648 (defun equal-char-code (char)
649 (let* ((code (char-code char
))
650 (shifted (ash code -
6))
651 (page (if (>= shifted
(length **character-case-pages
**))
652 (return-from equal-char-code code
)
653 (aref **character-case-pages
** shifted
))))
657 (aref **character-cases
**
659 (ldb (byte 6 0) code
))
661 (if (zerop down-code
)
665 (declaim (inline two-arg-char-equal-inline
))
666 (defun two-arg-char-equal-inline (c1 c2
)
667 (flet ((base-char-equal-p ()
668 (let* ((code1 (char-code c1
))
669 (code2 (char-code c2
))
670 (sum (logxor code1 code2
)))
672 (let ((sum (+ code1 code2
)))
673 (or (and (< 161 sum
213))
674 (and (< 415 sum
461))
675 (and (< 463 sum
477))))))))
676 (declare (inline base-char-equal-p
))
683 (and (base-char-p c2
)
684 (base-char-equal-p)))
690 (with-case-info (c1 index cases
)
691 (or (= (aref cases index
) (char-code c2
)) ;; lower case
692 (= (aref cases
(1+ index
)) (char-code c2
))))))))
694 ;;; There are transforms on two-arg-char-equal, don't make it inlinable itself.
695 (defun two-arg-char-equal (c1 c2
)
696 (two-arg-char-equal-inline c1 c2
))
698 (defun two-arg-char-not-equal (c1 c2
)
699 (not (two-arg-char-equal-inline c1 c2
)))
701 (macrolet ((def (name test doc
)
702 `(defun ,name
(character &rest more-characters
)
705 (do ((c character
(nth i more-characters
))
707 ((>= i
(length more-characters
)) t
)
708 (do-rest-arg ((c2) more-characters i
)
710 (return-from ,name nil
))))
711 ;; CHAR-NOT-EQUAL has explicit check attribute
712 (progn (the character character
) t
)))))
713 (def char
/= (eq c
(the character c2
))
714 "Return T if no two of the arguments are the same character.")
715 (def char-not-equal
(two-arg-char-equal c c2
)
716 "Return T if no two of the arguments are the same character.
719 (defun two-arg-char-lessp (c1 c2
)
720 (< (equal-char-code c1
) (equal-char-code c2
)))
722 (defun two-arg-char-greaterp (c1 c2
)
723 (> (equal-char-code c1
) (equal-char-code c2
)))
725 (defun two-arg-char-not-greaterp (c1 c2
)
726 (<= (equal-char-code c1
) (equal-char-code c2
)))
728 (defun two-arg-char-not-lessp (c1 c2
)
729 (>= (equal-char-code c1
) (equal-char-code c2
)))
731 (macrolet ((def (op test doc
&optional explicit-check
)
732 `(defun ,op
(character &rest more-characters
)
734 ,@(when explicit-check
`((declare (explicit-check))))
735 (let ((c1 character
))
736 (declare (character c1
))
737 (do-rest-arg ((c2 i
) more-characters
0 t
)
740 (return (do-rest-arg ((c) more-characters
(1+ i
))
741 (the character c
))))))))) ; for effect
743 (def char
= (eq c1
(the character c2
))
744 "Return T if all of the arguments are the same character.")
745 (def char
< (< (char-int c1
) (char-int c2
))
746 "Return T if the arguments are in strictly increasing alphabetic order.")
747 (def char
> (> (char-int c1
) (char-int c2
))
748 "Return T if the arguments are in strictly decreasing alphabetic order.")
749 (def char
<= (<= (char-int c1
) (char-int c2
))
750 "Return T if the arguments are in strictly non-decreasing alphabetic order.")
751 (def char
>= (>= (char-int c1
) (char-int c2
))
752 "Return T if the arguments are in strictly non-increasing alphabetic order.")
755 (def char-equal
(two-arg-char-equal c1 c2
)
756 "Return T if all of the arguments are the same character.
758 (def char-lessp
(two-arg-char-lessp c1 c2
)
759 "Return T if the arguments are in strictly increasing alphabetic order.
761 (def char-greaterp
(two-arg-char-greaterp c1 c2
)
762 "Return T if the arguments are in strictly decreasing alphabetic order.
764 (def char-not-greaterp
(two-arg-char-not-greaterp c1 c2
)
765 "Return T if the arguments are in strictly non-decreasing alphabetic order.
767 (def char-not-lessp
(two-arg-char-not-lessp c1 c2
)
768 "Return T if the arguments are in strictly non-increasing alphabetic order.
769 Case is ignored." t
))
772 (defun digit-char-p (char &optional
(radix 10.
))
773 "If char is a digit in the specified radix, returns the fixnum for which
774 that digit stands, else returns NIL."
775 (if (<= (char-code char
) 127)
776 (let ((weight (- (char-code char
) 48)))
777 (cond ((minusp weight
) nil
)
779 ;; Special-case ASCII digits in decimal and smaller radices.
780 (if (< weight radix
) weight nil
))
781 ;; Digits 0 - 9 are used as is, since radix is larger.
782 ((< weight
10) weight
)
783 ;; Check for upper case A - Z.
784 ((and (>= (decf weight
7) 10) (< weight radix
)) weight
)
785 ;; Also check lower case a - z.
786 ((and (>= (decf weight
32) 10) (< weight radix
)) weight
)))
787 (let ((number (ucd-decimal-digit char
)))
788 (when (and number
(< (truly-the fixnum number
) radix
))
791 (defun digit-char (weight &optional
(radix 10))
792 "All arguments must be integers. Returns a character object that represents
793 a digit of the given weight in the specified radix. Returns NIL if no such
795 (and (typep weight
'fixnum
)
796 (>= weight
0) (< weight radix
) (< weight
36)
797 (code-char (if (< weight
10) (+ 48 weight
) (+ 55 weight
)))))