1.0.12.23: Optimize STRING-*-TRIM
[sbcl/nikodemus.git] / src / code / string.lisp
blobdede9485c6828f13b0a96bf78fdf8c9d130bfde8
1 ;;;; This software is part of the SBCL system. See the README file for
2 ;;;; more information.
3 ;;;;
4 ;;;; This software is derived from the CMU CL system, which was
5 ;;;; written at Carnegie Mellon University and released into the
6 ;;;; public domain. The software is in the public domain and is
7 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
8 ;;;; files for more information.
10 (in-package "SB!IMPL")
12 (defun string (x)
13 #!+sb-doc
14 "Coerces X into a string. If X is a string, X is returned. If X is a
15 symbol, X's pname is returned. If X is a character then a one element
16 string containing that character is returned. If X cannot be coerced
17 into a string, an error occurs."
18 (cond ((stringp x) x)
19 ((symbolp x) (symbol-name x))
20 ((characterp x)
21 (let ((res (make-string 1)))
22 (setf (schar res 0) x) res))
24 (error 'simple-type-error
25 :datum x
26 :expected-type 'string-designator
27 :format-control "~S cannot be coerced to a string."
28 :format-arguments (list x)))))
30 ;;; %CHECK-VECTOR-SEQUENCE-BOUNDS is used to verify that the START and
31 ;;; END arguments are valid bounding indices.
32 (defun %check-vector-sequence-bounds (vector start end)
33 (%check-vector-sequence-bounds vector start end))
35 (eval-when (:compile-toplevel)
36 ;;; WITH-ONE-STRING is used to set up some string hacking things. The
37 ;;; keywords are parsed, and the string is hacked into a
38 ;;; simple-string.
39 (sb!xc:defmacro with-one-string ((string start end) &body forms)
40 `(let* ((,string (if (stringp ,string) ,string (string ,string))))
41 (with-array-data ((,string ,string)
42 (,start ,start)
43 (,end ,end)
44 :check-fill-pointer t)
45 ,@forms)))
46 ;;; WITH-STRING is like WITH-ONE-STRING, but doesn't parse keywords.
47 (sb!xc:defmacro with-string (string &rest forms)
48 `(let ((,string (if (stringp ,string) ,string (string ,string))))
49 (with-array-data ((,string ,string)
50 (start)
51 (end)
52 :check-fill-pointer t)
53 ,@forms)))
54 ;;; WITH-TWO-STRINGS is used to set up string comparison operations. The
55 ;;; keywords are parsed, and the strings are hacked into SIMPLE-STRINGs.
56 (sb!xc:defmacro with-two-strings (string1 string2 start1 end1 cum-offset-1
57 start2 end2 &rest forms)
58 `(let ((,string1 (if (stringp ,string1) ,string1 (string ,string1)))
59 (,string2 (if (stringp ,string2) ,string2 (string ,string2))))
60 (with-array-data ((,string1 ,string1 :offset-var ,cum-offset-1)
61 (,start1 ,start1)
62 (,end1 ,end1)
63 :check-fill-pointer t)
64 (with-array-data ((,string2 ,string2)
65 (,start2 ,start2)
66 (,end2 ,end2)
67 :check-fill-pointer t)
68 ,@forms))))
69 ) ; EVAL-WHEN
71 (defun char (string index)
72 #!+sb-doc
73 "Given a string and a non-negative integer index less than the length of
74 the string, returns the character object representing the character at
75 that position in the string."
76 (declare (optimize (safety 1)))
77 (char string index))
79 (defun %charset (string index new-el)
80 (declare (optimize (safety 1)))
81 (setf (char string index) new-el))
83 (defun schar (string index)
84 #!+sb-doc
85 "SCHAR returns the character object at an indexed position in a string
86 just as CHAR does, except the string must be a simple-string."
87 (declare (optimize (safety 1)))
88 (schar string index))
90 (defun %scharset (string index new-el)
91 (declare (optimize (safety 1)))
92 (setf (schar string index) new-el))
94 (defun string=* (string1 string2 start1 end1 start2 end2)
95 (with-two-strings string1 string2 start1 end1 nil start2 end2
96 (not (%sp-string-compare string1 start1 end1 string2 start2 end2))))
98 (defun string/=* (string1 string2 start1 end1 start2 end2)
99 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
100 (let ((comparison (%sp-string-compare string1 start1 end1
101 string2 start2 end2)))
102 (if comparison (- (the fixnum comparison) offset1)))))
104 (eval-when (:compile-toplevel :execute)
106 ;;; LESSP is true if the desired expansion is for STRING<* or STRING<=*.
107 ;;; EQUALP is true if the desired expansion is for STRING<=* or STRING>=*.
108 (sb!xc:defmacro string<>=*-body (lessp equalp)
109 (let ((offset1 (gensym)))
110 `(with-two-strings string1 string2 start1 end1 ,offset1 start2 end2
111 (let ((index (%sp-string-compare string1 start1 end1
112 string2 start2 end2)))
113 (if index
114 (cond ((= (the fixnum index) (the fixnum end1))
115 ,(if lessp
116 `(- (the fixnum index) ,offset1)
117 `nil))
118 ((= (+ (the fixnum index) (- start2 start1))
119 (the fixnum end2))
120 ,(if lessp
121 `nil
122 `(- (the fixnum index) ,offset1)))
123 ((,(if lessp 'char< 'char>)
124 (schar string1 index)
125 (schar string2 (+ (the fixnum index) (- start2 start1))))
126 (- (the fixnum index) ,offset1))
127 (t nil))
128 ,(if equalp `(- (the fixnum end1) ,offset1) nil))))))
129 ) ; EVAL-WHEN
131 (defun string<* (string1 string2 start1 end1 start2 end2)
132 (declare (fixnum start1 start2))
133 (string<>=*-body t nil))
135 (defun string>* (string1 string2 start1 end1 start2 end2)
136 (declare (fixnum start1 start2))
137 (string<>=*-body nil nil))
139 (defun string<=* (string1 string2 start1 end1 start2 end2)
140 (declare (fixnum start1 start2))
141 (string<>=*-body t t))
143 (defun string>=* (string1 string2 start1 end1 start2 end2)
144 (declare (fixnum start1 start2))
145 (string<>=*-body nil t))
147 (defun string< (string1 string2 &key (start1 0) end1 (start2 0) end2)
148 #!+sb-doc
149 "Given two strings, if the first string is lexicographically less than
150 the second string, returns the longest common prefix (using char=)
151 of the two strings. Otherwise, returns ()."
152 (string<* string1 string2 start1 end1 start2 end2))
154 (defun string> (string1 string2 &key (start1 0) end1 (start2 0) end2)
155 #!+sb-doc
156 "Given two strings, if the first string is lexicographically greater than
157 the second string, returns the longest common prefix (using char=)
158 of the two strings. Otherwise, returns ()."
159 (string>* string1 string2 start1 end1 start2 end2))
161 (defun string<= (string1 string2 &key (start1 0) end1 (start2 0) end2)
162 #!+sb-doc
163 "Given two strings, if the first string is lexicographically less than
164 or equal to the second string, returns the longest common prefix
165 (using char=) of the two strings. Otherwise, returns ()."
166 (string<=* string1 string2 start1 end1 start2 end2))
168 (defun string>= (string1 string2 &key (start1 0) end1 (start2 0) end2)
169 "Given two strings, if the first string is lexicographically greater
170 than or equal to the second string, returns the longest common prefix
171 (using char=) of the two strings. Otherwise, returns ()."
172 (string>=* string1 string2 start1 end1 start2 end2))
174 ;;; Note: (STRING= "PREFIX" "SHORT" :END2 (LENGTH "PREFIX")) gives
175 ;;; an error instead of returning NIL as I would have expected.
176 ;;; The ANSI spec for STRING= itself doesn't seem to clarify this
177 ;;; much, but the SUBSEQ-OUT-OF-BOUNDS writeup seems to say that
178 ;;; this is conforming (and required) behavior, because any index
179 ;;; out of range is an error. (So there seems to be no concise and
180 ;;; efficient way to test for strings which begin with a particular
181 ;;; pattern. Alas..) -- WHN 19991206
182 (defun string= (string1 string2 &key (start1 0) end1 (start2 0) end2)
183 #!+sb-doc
184 "Given two strings (string1 and string2), and optional integers start1,
185 start2, end1 and end2, compares characters in string1 to characters in
186 string2 (using char=)."
187 (string=* string1 string2 start1 end1 start2 end2))
189 (defun string/= (string1 string2 &key (start1 0) end1 (start2 0) end2)
190 #!+sb-doc
191 "Given two strings, if the first string is not lexicographically equal
192 to the second string, returns the longest common prefix (using char=)
193 of the two strings. Otherwise, returns ()."
194 (string/=* string1 string2 start1 end1 start2 end2))
196 (eval-when (:compile-toplevel :execute)
198 ;;; STRING-NOT-EQUAL-LOOP is used to generate character comparison loops for
199 ;;; STRING-EQUAL and STRING-NOT-EQUAL.
200 (sb!xc:defmacro string-not-equal-loop (end
201 end-value
202 &optional (abort-value nil abortp))
203 (declare (fixnum end))
204 (let ((end-test (if (= end 1)
205 `(= index1 (the fixnum end1))
206 `(= index2 (the fixnum end2)))))
207 `(do ((index1 start1 (1+ index1))
208 (index2 start2 (1+ index2)))
209 (,(if abortp
210 end-test
211 `(or ,end-test
212 (not (char-equal (schar string1 index1)
213 (schar string2 index2)))))
214 ,end-value)
215 (declare (fixnum index1 index2))
216 ,@(if abortp
217 `((if (not (char-equal (schar string1 index1)
218 (schar string2 index2)))
219 (return ,abort-value)))))))
221 ) ; EVAL-WHEN
223 (defun string-equal (string1 string2 &key (start1 0) end1 (start2 0) end2)
224 #!+sb-doc
225 "Given two strings (string1 and string2), and optional integers start1,
226 start2, end1 and end2, compares characters in string1 to characters in
227 string2 (using char-equal)."
228 (declare (fixnum start1 start2))
229 (with-two-strings string1 string2 start1 end1 nil start2 end2
230 (let ((slen1 (- (the fixnum end1) start1))
231 (slen2 (- (the fixnum end2) start2)))
232 (declare (fixnum slen1 slen2))
233 (if (= slen1 slen2)
234 ;;return () immediately if lengths aren't equal.
235 (string-not-equal-loop 1 t nil)))))
237 (defun string-not-equal (string1 string2 &key (start1 0) end1 (start2 0) end2)
238 #!+sb-doc
239 "Given two strings, if the first string is not lexicographically equal
240 to the second string, returns the longest common prefix (using char-equal)
241 of the two strings. Otherwise, returns ()."
242 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
243 (let ((slen1 (- end1 start1))
244 (slen2 (- end2 start2)))
245 (declare (fixnum slen1 slen2))
246 (cond ((= slen1 slen2)
247 (string-not-equal-loop 1 nil (- index1 offset1)))
248 ((< slen1 slen2)
249 (string-not-equal-loop 1 (- index1 offset1)))
251 (string-not-equal-loop 2 (- index1 offset1)))))))
253 (eval-when (:compile-toplevel :execute)
255 ;;; STRING-LESS-GREATER-EQUAL-TESTS returns a test on the lengths of string1
256 ;;; and string2 and a test on the current characters from string1 and string2
257 ;;; for the following macro.
258 (defun string-less-greater-equal-tests (lessp equalp)
259 (if lessp
260 (if equalp
261 ;; STRING-NOT-GREATERP
262 (values '<= `(not (char-greaterp char1 char2)))
263 ;; STRING-LESSP
264 (values '< `(char-lessp char1 char2)))
265 (if equalp
266 ;; STRING-NOT-LESSP
267 (values '>= `(not (char-lessp char1 char2)))
268 ;; STRING-GREATERP
269 (values '> `(char-greaterp char1 char2)))))
271 (sb!xc:defmacro string-less-greater-equal (lessp equalp)
272 (multiple-value-bind (length-test character-test)
273 (string-less-greater-equal-tests lessp equalp)
274 `(with-two-strings string1 string2 start1 end1 offset1 start2 end2
275 (let ((slen1 (- (the fixnum end1) start1))
276 (slen2 (- (the fixnum end2) start2)))
277 (declare (fixnum slen1 slen2))
278 (do ((index1 start1 (1+ index1))
279 (index2 start2 (1+ index2))
280 (char1)
281 (char2))
282 ((or (= index1 (the fixnum end1)) (= index2 (the fixnum end2)))
283 (if (,length-test slen1 slen2) (- index1 offset1)))
284 (declare (fixnum index1 index2))
285 (setq char1 (schar string1 index1))
286 (setq char2 (schar string2 index2))
287 (if (not (char-equal char1 char2))
288 (if ,character-test
289 (return (- index1 offset1))
290 (return ()))))))))
292 ) ; EVAL-WHEN
294 (defun string-lessp* (string1 string2 start1 end1 start2 end2)
295 (declare (fixnum start1 start2))
296 (string-less-greater-equal t nil))
298 (defun string-greaterp* (string1 string2 start1 end1 start2 end2)
299 (declare (fixnum start1 start2))
300 (string-less-greater-equal nil nil))
302 (defun string-not-lessp* (string1 string2 start1 end1 start2 end2)
303 (declare (fixnum start1 start2))
304 (string-less-greater-equal nil t))
306 (defun string-not-greaterp* (string1 string2 start1 end1 start2 end2)
307 (declare (fixnum start1 start2))
308 (string-less-greater-equal t t))
310 (defun string-lessp (string1 string2 &key (start1 0) end1 (start2 0) end2)
311 #!+sb-doc
312 "Given two strings, if the first string is lexicographically less than
313 the second string, returns the longest common prefix (using char-equal)
314 of the two strings. Otherwise, returns ()."
315 (string-lessp* string1 string2 start1 end1 start2 end2))
317 (defun string-greaterp (string1 string2 &key (start1 0) end1 (start2 0) end2)
318 #!+sb-doc
319 "Given two strings, if the first string is lexicographically greater than
320 the second string, returns the longest common prefix (using char-equal)
321 of the two strings. Otherwise, returns ()."
322 (string-greaterp* string1 string2 start1 end1 start2 end2))
324 (defun string-not-lessp (string1 string2 &key (start1 0) end1 (start2 0) end2)
325 #!+sb-doc
326 "Given two strings, if the first string is lexicographically greater
327 than or equal to the second string, returns the longest common prefix
328 (using char-equal) of the two strings. Otherwise, returns ()."
329 (string-not-lessp* string1 string2 start1 end1 start2 end2))
331 (defun string-not-greaterp (string1 string2 &key (start1 0) end1 (start2 0)
332 end2)
333 #!+sb-doc
334 "Given two strings, if the first string is lexicographically less than
335 or equal to the second string, returns the longest common prefix
336 (using char-equal) of the two strings. Otherwise, returns ()."
337 (string-not-greaterp* string1 string2 start1 end1 start2 end2))
339 (defun make-string (count &key
340 (element-type 'character)
341 ((:initial-element fill-char)))
342 #!+sb-doc
343 "Given a character count and an optional fill character, makes and returns a
344 new string COUNT long filled with the fill character."
345 (declare (fixnum count))
346 (if fill-char
347 (make-string count :element-type element-type :initial-element fill-char)
348 (make-string count :element-type element-type)))
350 (flet ((%upcase (string start end)
351 (declare (string string) (index start) (type sequence-end end))
352 (let ((saved-header string))
353 (with-one-string (string start end)
354 (do ((index start (1+ index)))
355 ((= index (the fixnum end)))
356 (declare (fixnum index))
357 (setf (schar string index) (char-upcase (schar string index)))))
358 saved-header)))
359 (defun string-upcase (string &key (start 0) end)
360 (%upcase (copy-seq (string string)) start end))
361 (defun nstring-upcase (string &key (start 0) end)
362 (%upcase string start end))
363 ) ; FLET
365 (flet ((%downcase (string start end)
366 (declare (string string) (index start) (type sequence-end end))
367 (let ((saved-header string))
368 (with-one-string (string start end)
369 (do ((index start (1+ index)))
370 ((= index (the fixnum end)))
371 (declare (fixnum index))
372 (setf (schar string index)
373 (char-downcase (schar string index)))))
374 saved-header)))
375 (defun string-downcase (string &key (start 0) end)
376 (%downcase (copy-seq (string string)) start end))
377 (defun nstring-downcase (string &key (start 0) end)
378 (%downcase string start end))
379 ) ; FLET
381 (flet ((%capitalize (string start end)
382 (declare (string string) (index start) (type sequence-end end))
383 (let ((saved-header string))
384 (with-one-string (string start end)
385 (do ((index start (1+ index))
386 (new-word? t)
387 (char nil))
388 ((= index (the fixnum end)))
389 (declare (fixnum index))
390 (setq char (schar string index))
391 (cond ((not (alphanumericp char))
392 (setq new-word? t))
393 (new-word?
394 ;; CHAR is the first case-modifiable character after
395 ;; a sequence of non-case-modifiable characters.
396 (setf (schar string index) (char-upcase char))
397 (setq new-word? nil))
399 (setf (schar string index) (char-downcase char))))))
400 saved-header)))
401 (defun string-capitalize (string &key (start 0) end)
402 (%capitalize (copy-seq (string string)) start end))
403 (defun nstring-capitalize (string &key (start 0) end)
404 (%capitalize string start end))
405 ) ; FLET
407 (defun generic-string-trim (char-bag string left-p right-p)
408 (with-string string
409 (let* ((left-end (if left-p
410 (do ((index start (1+ index)))
411 ((or (= index (the fixnum end))
412 (not (find (schar string index)
413 char-bag
414 :test #'char=)))
415 index)
416 (declare (fixnum index)))
418 (right-end (if right-p
419 (do ((index (1- (the fixnum end)) (1- index)))
420 ((or (< index left-end)
421 (not (find (schar string index)
422 char-bag
423 :test #'char=)))
424 (1+ index))
425 (declare (fixnum index)))
426 (length string))))
427 (if (and (eql left-end 0)
428 (eql right-end (length string)))
429 string
430 (subseq (the simple-string string) left-end right-end)))))
432 (defun string-left-trim (char-bag string)
433 (generic-string-trim char-bag string t nil))
435 (defun string-right-trim (char-bag string)
436 (generic-string-trim char-bag string nil t))
438 (defun string-trim (char-bag string)
439 (generic-string-trim char-bag string t t))