f6c86f2a5ff6fdd32d671270ee092c74bba69708
[lice.git] / search.lisp
blobf6c86f2a5ff6fdd32d671270ee092c74bba69708
1 (in-package :lice)
3 ;; because gnu emacs' match-data is not reentrant we create this
4 ;; structure that is returned for all searching functions. It is
5 ;; passed into the match-data related functions.
6 (defstruct match-data
7 obj start end reg-starts reg-ends)
9 (defvar *match-data* nil
10 "store the match data for searches.")
12 (defvar *with-match-data* nil
13 "Set to true when inside a match-data block. If this is NIL
14 during one of the searches, a warning is signaled because it's
15 not thread safe. But, lots of code uses the search functions so
16 it's useful, at least now to be compatible with gnu emacs, even
17 if it's not thread safe. Never set this variable directly.")
19 (defmacro with-match-data (&body body)
20 `(let ((*with-match-data* t)
21 (*match-data* nil))
22 ,@body))
24 (defun match-end (idx &optional (data *match-data*))
25 "Return position of start of text matched by last search.
26 SUBEXP, a number, specifies which parenthesized expression in the last
27 regexp.
28 Value is nil if SUBEXPth pair didn't match, or there were less than
29 SUBEXP pairs.
30 Zero means the entire text matched by the whole regexp or whole string."
31 (if (zerop idx)
32 (match-data-end data)
33 (aref (match-data-reg-ends data) (1- idx))))
35 (defun match-beginning (idx &optional (data *match-data*))
36 "Return position of start of text matched by last search.
37 SUBEXP, a number, specifies which parenthesized expression in the last
38 regexp.
39 Value is nil if SUBEXPth pair didn't match, or there were less than
40 SUBEXP pairs.
41 Zero means the entire text matched by the whole regexp or whole string."
42 (if (zerop idx)
43 (match-data-start data)
44 (aref (match-data-reg-starts data) (1- idx))))
46 ;; FIXME: needs a formatter and the search string
47 (define-condition search-failed (lice-condition)
48 () (:documentation "raised when a search failed to match"))
50 (define-condition thread-unsafe (style-warning)
51 () (:documentation "Raised when a search is not threadsafe. See also `*with-match-data*'"))
53 (defun check-search-thread-safe ()
54 "Report a warning if the search is unsafe for threads."
55 (unless *with-match-data*
56 (signal 'thread-unsafe)))
58 (defun string-search-command (string bound error count direction)
59 (check-search-thread-safe)
60 (gap-move-to (current-buffer) (buffer-point-aref (current-buffer)))
61 ;; normalize vars
62 (setf count (* count direction)
63 bound (if (minusp count)
64 (if bound (max bound (begv)) (begv))
65 (if bound (min bound (zv)) (zv))))
66 (let* ((buffer (current-buffer))
67 pos
68 (start-aref (buffer-point-aref buffer))
69 (bound-aref (buffer-char-to-aref buffer bound))
70 (n (if (minusp count)
71 (loop for i from 0 below (- count)
72 do (setf pos (search string (buffer-data buffer) :from-end t :end2 start-aref :start2 bound-aref))
73 while pos
74 count i)
75 (loop for i from 0 below count
76 do (setf pos (search string (buffer-data buffer) :start2 start-aref :end2 bound-aref))
77 while pos
78 count i))))
79 (if (/= n (abs count))
80 (cond
81 ((eq error t)
82 (signal 'search-failed))
83 ((null error)
84 nil)
85 (bound
86 (goto-char bound buffer)
87 nil)
88 (t nil))
89 (progn
90 (if (minusp count)
91 (goto-char (+ (buffer-aref-to-char buffer pos) (length string)))
92 (goto-char (buffer-aref-to-char buffer pos)))
93 (values (point)
94 (setf *match-data*
95 (make-match-data :obj buffer
96 :start (buffer-aref-to-char buffer pos)
97 :end (+ (buffer-aref-to-char buffer pos) (length string))
98 :reg-starts #()
99 :reg-ends #())))))))
101 (defun search-forward (string &key bound (error t) (count 1))
102 "Search forward from point for string.
103 Set point to the end of the occurrence found, and return point.
104 An optional second argument bounds the search; it is a buffer position.
105 The match found must not extend after that position. nil is equivalent
106 to (point-max).
107 Optional third argument, if t, means if fail just return nil (no error).
108 If not nil and not t, move to limit of search and return nil.
109 Optional fourth argument is repeat count--search for successive occurrences.
111 Search case-sensitivity is determined by the value of the variable
112 `case-fold-search', which see.
114 See also the functions `match-beginning', `match-end' and `replace-match'."
115 (string-search-command string bound error count 1))
117 (defun search-backward (string &key bound (error t) (count 1))
118 "Search backward from point for STRING.
119 Set point to the beginning of the occurrence found, and return point.
120 An optional second argument bounds the search; it is a buffer position.
121 The match found must not extend before that position.
122 Optional third argument, if t, means if fail just return nil (no error).
123 If not nil and not t, position at limit of search and return nil.
124 Optional fourth argument is repeat count--search for successive occurrences.
126 Search case-sensitivity is determined by the value of the variable
127 `case-fold-search', which see.
129 See also the functions `match-beginning', `match-end' and `replace-match'."
130 (string-search-command string bound error count -1))
132 ;; TODO: create compiler-macros for regex functions so the regexps can
133 ;; be compiled at compile time.
135 (defun looking-at (regexp &optional (buffer (current-buffer)))
136 "Return the match-data if text after point matches regular expression regexp."
137 (check-search-thread-safe)
138 ;; get the gap outta the way. It sucks we have to do this. Really we
139 ;; should modify ppcre to generate scanner functions that hop the
140 ;; gap. Meantime...
141 (when (< (buffer-char-to-aref buffer (point buffer))
142 (buffer-gap-start buffer))
143 (gap-move-to-point buffer))
144 (multiple-value-bind (start end reg-starts reg-ends)
145 (ppcre:scan (ppcre:create-scanner regexp :multi-line-mode t) (buffer-data buffer)
146 :start (buffer-char-to-aref buffer (point buffer))
147 :real-start-pos 0)
148 (when (and start
149 (= start (buffer-char-to-aref buffer (point buffer))))
150 (values t
151 (setf *match-data*
152 (make-match-data :obj buffer
153 :start (buffer-aref-to-char buffer start)
154 :end (buffer-aref-to-char buffer end)
155 :reg-starts (map 'vector (lambda (n)
156 (buffer-aref-to-char buffer n))
157 reg-starts)
158 :reg-ends (map 'vector (lambda (n)
159 (buffer-aref-to-char buffer n))
160 reg-ends)))))))
162 (defun re-search-forward (regexp &key (bound (zv)) (error t) count &aux (buffer (current-buffer)))
163 "Search forward from point for regular expression regexp.
164 Set point to the end of the occurrence found, and return match-data structure.
165 BOUND bounds the search; it is a buffer position.
166 The match found must not extend after that position.
167 ERROR, if nil, means if fail just return nil (no error).
168 If not nil and not t, move to limit of search and return nil.
169 COUNT is repeat count--search for successive occurrences.
170 See also the functions `match-beginning', `match-end', `match-string',
171 and `replace-match'."
172 (declare (ignore count))
173 (check-search-thread-safe)
174 (when (< (buffer-char-to-aref buffer (point buffer))
175 (buffer-gap-start buffer))
176 (gap-move-to-point buffer))
177 (multiple-value-bind (start end reg-starts reg-ends)
178 (ppcre:scan (ppcre:create-scanner regexp :multi-line-mode t) (buffer-data buffer)
179 :start (buffer-char-to-aref buffer (point buffer))
180 :end (buffer-char-to-aref buffer bound)
181 :real-start-pos 0)
182 (cond (start
183 (goto-char (buffer-aref-to-char buffer end) buffer)
184 (values (point)
185 (setf *match-data*
186 (make-match-data :obj buffer
187 :start (buffer-aref-to-char buffer start)
188 :end (buffer-aref-to-char buffer end)
189 :reg-starts (map 'vector (lambda (n)
190 (buffer-aref-to-char buffer n))
191 reg-starts)
192 :reg-ends (map 'vector (lambda (n)
193 (buffer-aref-to-char buffer n))
194 reg-ends)))))
195 ((eq error t)
196 (signal 'search-failed))
197 ((null error)
198 nil)
199 (bound
200 (goto-char bound buffer)
201 nil)
202 (t nil))))
204 (defun re-search-backward (regexp &key (bound (begv)) (error t) count &aux (buffer (current-buffer)))
205 "Search backward from point for match for regular expression regexp.
206 Set point to the beginning of the match, and return match-data.
207 The match found is the one starting last in the buffer
208 and yet ending before the origin of the search.
209 BOUND bounds the search; it is a buffer position.
210 The match found must start at or after that position.
211 ERROR, if nil, means if fail just return nil (no error).
212 If not nil and not t, move to limit of search and return nil.
213 COUNT is repeat count--search for successive occurrences.
214 See also the functions `match-beginning', `match-end', `match-string',
215 and `replace-match'."
216 (declare (ignore count))
217 (check-search-thread-safe)
218 ;;(message "re-search-backward ~s ~d" regexp (point))
219 (when (> (buffer-gap-start buffer)
220 (buffer-char-to-aref buffer (point buffer)))
221 (gap-move-to buffer (buffer-char-to-aref buffer (1+ (point buffer)))))
222 ;; start search from point and keep walking back til we match something
223 (let* ((start-aref (buffer-char-to-aref buffer (point buffer)))
224 (pt-aref start-aref)
225 (stop (buffer-char-to-aref buffer bound))
226 (scanner (ppcre:create-scanner regexp :multi-line-mode t)))
227 (loop
228 (multiple-value-bind (start end reg-starts reg-ends)
229 (ppcre:scan scanner (buffer-data buffer) :start start-aref :end pt-aref :real-start-pos 0)
230 (when start
231 (goto-char (buffer-aref-to-char buffer start) buffer)
232 (return (values (point)
233 (setf *match-data*
234 (make-match-data :obj buffer
235 :start (buffer-aref-to-char buffer start)
236 :end (buffer-aref-to-char buffer end)
237 :reg-starts (map 'vector (lambda (n)
238 (buffer-aref-to-char buffer n))
239 reg-starts)
240 :reg-ends (map 'vector (lambda (n)
241 (buffer-aref-to-char buffer n))
242 reg-ends))))))
243 (dec-aref start-aref buffer)
244 (when (< start-aref stop)
245 (cond ((eq error t)
246 ;; FIXME: we need a search condition
247 (signal 'search-failed))
248 ((null error)
249 (return nil))
251 (when bound
252 (goto-char bound buffer))
253 (return nil))))))))
255 (defun string-match (regexp string &key (start 0) (end (length string)))
256 "Return index of start of first match for regexp in string and match-data, or nil.
257 Matching ignores case if `case-fold-search' is non-nil.
258 START, start search at that index in string.
259 END, end search at that index in string.
260 **For index of first char beyond the match, do (match-end 0).
261 **`match-end' and `match-beginning' also give indices of substrings
262 **matched by parenthesis constructs in the pattern.
264 You can use the function `match-string' to extract the substrings
265 matched by the parenthesis constructions in regexp."
266 (check-search-thread-safe)
267 (multiple-value-bind (start end reg-starts reg-ends)
268 (ppcre:scan (ppcre:create-scanner regexp :multi-line-mode t)
269 string :start start :end end)
270 (when start
271 (values start
272 (setf *match-data*
273 (make-match-data :obj string
274 :start start
275 :end end
276 :reg-starts reg-starts
277 :reg-ends reg-ends))))))
279 (defun regexp-quote (string)
280 "Return a regexp string which matches exactly STRING and nothing else."
281 (check-type string string)
282 (coerce
283 (loop for c across string
284 when (find c "[*.\\?+^$" :test 'char=)
285 collect #\\
286 collect c)
287 'string))