Improve SPLIT-SEQUENCE.
[iolib.git] / base / split-sequence.lisp
blob84c6e6f5c76358604a58cbcfab08d040d3edca8c
1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; indent-tabs-mode: nil -*-
2 ;;;
3 ;;; --- SPLIT-SEQUENCE
4 ;;;
5 ;;; This code was based on Arthur Lemmens' in
6 ;;; <URL:http://groups.google.com/groups?as_umsgid=39F36F1A.B8F19D20%40simplex.nl>;
7 ;;;
8 ;;; Examples:
9 ;;;
10 ;;; * (split-sequence #\; "a;;b;c")
11 ;;; -> ("a" "" "b" "c"), 6
12 ;;;
13 ;;; * (split-sequence #\; "a;;b;c" :from-end t)
14 ;;; -> ("a" "" "b" "c"), 0
15 ;;;
16 ;;; * (split-sequence #\; "a;;b;c" :from-end t :count 1)
17 ;;; -> ("c"), 4
18 ;;;
19 ;;; * (split-sequence #\; "a;;b;c" :remove-empty-subseqs t)
20 ;;; -> ("a" "b" "c"), 6
21 ;;;
22 ;;; * (split-sequence-if (lambda (x) (member x '(#\a #\b))) "abracadabra")
23 ;;; -> ("" "" "r" "c" "d" "" "r" ""), 11
24 ;;;
25 ;;; * (split-sequence-if-not (lambda (x) (member x '(#\a #\b))) "abracadabra")
26 ;;; -> ("ab" "a" "a" "ab" "a"), 11
27 ;;;
28 ;;; * (split-sequence #\; ";oo;bar;ba;" :start 1 :end 9)
29 ;;; -> ("oo" "bar" "b"), 9
31 (in-package :iolib.base)
33 (defun split-sequence (delimiter sequence &key (start 0) (end nil) (from-end nil)
34 (count nil) (remove-empty-subseqs nil)
35 (test #'eql) (test-not nil) (key #'identity))
36 "Return a list of subsequences in seq delimited by delimiter.
38 If :remove-empty-subseqs is NIL, empty subsequences will be included
39 in the result; otherwise they will be discarded. All other keywords
40 work analogously to those for CL:SUBSTITUTE. In particular, the
41 behaviour of :from-end is possibly different from other versions of
42 this function; :from-end values of NIL and T are equivalent unless
43 :count is supplied. The second return value is an index suitable as an
44 argument to CL:SUBSEQ into the sequence indicating where processing
45 stopped."
46 (check-bounds sequence start end)
47 (cond
48 ((and (not from-end) (null test-not))
49 (split-from-start (lambda (sequence start)
50 (position delimiter sequence :start start :key key :test test))
51 sequence start end count remove-empty-subseqs))
52 ((and (not from-end) test-not)
53 (split-from-start (lambda (sequence start)
54 (position delimiter sequence :start start :key key :test-not test-not))
55 sequence start end count remove-empty-subseqs))
56 ((and from-end (null test-not))
57 (split-from-end (lambda (sequence end)
58 (position delimiter sequence :end end :from-end t :key key :test test))
59 sequence start end count remove-empty-subseqs))
60 ((and from-end test-not)
61 (split-from-end (lambda (sequence end)
62 (position delimiter sequence :end end :from-end t :key key :test-not test-not))
63 sequence start end count remove-empty-subseqs))))
65 (defun split-sequence-if (predicate sequence &key (start 0) (end nil) (from-end nil)
66 (count nil) (remove-empty-subseqs nil) (key #'identity))
67 "Return a list of subsequences in seq delimited by items satisfying
68 predicate.
70 If :remove-empty-subseqs is NIL, empty subsequences will be included
71 in the result; otherwise they will be discarded. All other keywords
72 work analogously to those for CL:SUBSTITUTE-IF. In particular, the
73 behaviour of :from-end is possibly different from other versions of
74 this function; :from-end values of NIL and T are equivalent unless
75 :count is supplied. The second return value is an index suitable as an
76 argument to CL:SUBSEQ into the sequence indicating where processing
77 stopped."
78 (check-bounds sequence start end)
79 (if from-end
80 (split-from-end (lambda (sequence end)
81 (position-if predicate sequence :end end :from-end t :key key))
82 sequence start end count remove-empty-subseqs)
83 (split-from-start (lambda (sequence start)
84 (position-if predicate sequence :start start :key key))
85 sequence start end count remove-empty-subseqs)))
87 (defun split-sequence-if-not (predicate sequence &key (count nil) (remove-empty-subseqs nil)
88 (from-end nil) (start 0) (end nil) (key #'identity))
89 "Return a list of subsequences in seq delimited by items satisfying
90 \(CL:COMPLEMENT predicate).
92 If :remove-empty-subseqs is NIL, empty subsequences will be included
93 in the result; otherwise they will be discarded. All other keywords
94 work analogously to those for CL:SUBSTITUTE-IF-NOT. In particular,
95 the behaviour of :from-end is possibly different from other versions
96 of this function; :from-end values of NIL and T are equivalent unless
97 :count is supplied. The second return value is an index suitable as an
98 argument to CL:SUBSEQ into the sequence indicating where processing
99 stopped."
100 (check-bounds sequence start end)
101 (if from-end
102 (split-from-end (lambda (sequence end)
103 (position-if-not predicate sequence :end end :from-end t :key key))
104 sequence start end count remove-empty-subseqs)
105 (split-from-start (lambda (sequence start)
106 (position-if-not predicate sequence :start start :key key))
107 sequence start end count remove-empty-subseqs)))
109 (defun split-from-end (position-fn sequence start end count remove-empty-subseqs)
110 (loop
111 :for right := end :then left
112 :for left := (max (or (funcall position-fn sequence right) -1)
113 (1- start))
114 :unless (and (= right (1+ left))
115 remove-empty-subseqs) ; empty subseq we don't want
116 :if (and count (>= nr-elts count))
117 ;; We can't take any more. Return now.
118 :return (values (nreverse subseqs) right)
119 :else
120 :collect (subseq sequence (1+ left) right) into subseqs
121 :and :sum 1 :into nr-elts
122 :until (< left start)
123 :finally (return (values (nreverse subseqs) (1+ left)))))
125 (defun split-from-start (position-fn sequence start end count remove-empty-subseqs)
126 (let ((length (length sequence)))
127 (loop
128 :for left := start :then (+ right 1)
129 :for right := (min (or (funcall position-fn sequence left) length)
130 end)
131 :unless (and (= right left)
132 remove-empty-subseqs) ; empty subseq we don't want
133 :if (and count (>= nr-elts count))
134 ;; We can't take any more. Return now.
135 :return (values subseqs left)
136 :else
137 :collect (subseq sequence left right) :into subseqs
138 :and :sum 1 :into nr-elts
139 :until (>= right end)
140 :finally (return (values subseqs right)))))