3 ;; because gnu emacs' match-data is not reentrant we create this
4 ;; structure that is returned for all searching functions. It is
5 ;; passed into the match-data related functions.
7 obj start end reg-starts reg-ends
)
9 (defvar *match-data
* nil
10 "store the match data for searches.")
12 (defvar *with-match-data
* nil
13 "Set to true when inside a match-data block. If this is NIL
14 during one of the searches, a warning is signaled because it's
15 not thread safe. But, lots of code uses the search functions so
16 it's useful, at least now to be compatible with gnu emacs, even
17 if it's not thread safe. Never set this variable directly.")
19 (defmacro with-match-data
(&body body
)
20 `(let ((*with-match-data
* t
)
24 (defun match-end (idx &optional
(data *match-data
*))
25 "Return position of start of text matched by last search.
26 SUBEXP, a number, specifies which parenthesized expression in the last
28 Value is nil if SUBEXPth pair didn't match, or there were less than
30 Zero means the entire text matched by the whole regexp or whole string."
33 (aref (match-data-reg-ends data
) (1- idx
))))
35 (defun match-beginning (idx &optional
(data *match-data
*))
36 "Return position of start of text matched by last search.
37 SUBEXP, a number, specifies which parenthesized expression in the last
39 Value is nil if SUBEXPth pair didn't match, or there were less than
41 Zero means the entire text matched by the whole regexp or whole string."
43 (match-data-start data
)
44 (aref (match-data-reg-starts data
) (1- idx
))))
46 (defun match-string (num &optional string
)
47 "Return string of text matched by last search.
48 NUM specifies which parenthesized expression in the last regexp.
49 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
50 Zero means the entire text matched by the whole regexp or whole string.
51 STRING should be given if the last search was by `string-match' on STRING."
52 (if (match-beginning num
)
54 (substring string
(match-beginning num
) (match-end num
))
55 (buffer-substring (match-beginning num
) (match-end num
)))))
58 (defun match-string-no-properties (num &optional string
)
59 "Return string of text matched by last search, without text properties.
60 NUM specifies which parenthesized expression in the last regexp.
61 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
62 Zero means the entire text matched by the whole regexp or whole string.
63 STRING should be given if the last search was by `string-match' on STRING."
64 (if (match-beginning num
)
66 (substring-no-properties string
(match-beginning num
)
68 (buffer-substring-no-properties (match-beginning num
)
71 ;; FIXME: needs a formatter and the search string
72 (define-condition search-failed
(lice-condition)
73 () (:documentation
"raised when a search failed to match"))
75 (define-condition thread-unsafe
(style-warning)
76 () (:documentation
"Raised when a search is not threadsafe. See also `*with-match-data*'"))
78 (defun check-search-thread-safe ()
79 "Report a warning if the search is unsafe for threads."
80 (unless *with-match-data
*
81 (signal 'thread-unsafe
)))
83 (defun string-search-command (string bound error count direction
)
84 (check-search-thread-safe)
85 (gap-move-to (current-buffer) (buffer-point-aref (current-buffer)))
87 (setf count
(* count direction
)
88 bound
(if (minusp count
)
89 (if bound
(max bound
(begv)) (begv))
90 (if bound
(min bound
(zv)) (zv))))
91 (let* ((buffer (current-buffer))
93 (start-aref (buffer-point-aref buffer
))
94 (bound-aref (buffer-char-to-aref buffer bound
))
96 (loop for i from
0 below
(- count
)
97 do
(setf pos
(search string
(buffer-data buffer
) :from-end t
:end2 start-aref
:start2 bound-aref
))
100 (loop for i from
0 below count
101 do
(setf pos
(search string
(buffer-data buffer
) :start2 start-aref
:end2 bound-aref
))
104 (if (/= n
(abs count
))
107 (signal 'search-failed
))
111 (set-point bound buffer
)
116 (set-point (+ (buffer-aref-to-char buffer pos
) (length string
)))
117 (set-point (buffer-aref-to-char buffer pos
)))
120 (make-match-data :obj buffer
121 :start
(buffer-aref-to-char buffer pos
)
122 :end
(+ (buffer-aref-to-char buffer pos
) (length string
))
126 (defun search-forward (string &key bound
(error t
) (count 1))
127 "Search forward from point for string.
128 Set point to the end of the occurrence found, and return point.
129 An optional second argument bounds the search; it is a buffer position.
130 The match found must not extend after that position. nil is equivalent
132 Optional third argument, if t, means if fail just return nil (no error).
133 If not nil and not t, move to limit of search and return nil.
134 Optional fourth argument is repeat count--search for successive occurrences.
136 Search case-sensitivity is determined by the value of the variable
137 `case-fold-search', which see.
139 See also the functions `match-beginning', `match-end' and `replace-match'."
140 (string-search-command string bound error count
1))
142 (defun search-backward (string &key bound
(error t
) (count 1))
143 "Search backward from point for STRING.
144 Set point to the beginning of the occurrence found, and return point.
145 An optional second argument bounds the search; it is a buffer position.
146 The match found must not extend before that position.
147 Optional third argument, if t, means if fail just return nil (no error).
148 If not nil and not t, position at limit of search and return nil.
149 Optional fourth argument is repeat count--search for successive occurrences.
151 Search case-sensitivity is determined by the value of the variable
152 `case-fold-search', which see.
154 See also the functions `match-beginning', `match-end' and `replace-match'."
155 (string-search-command string bound error count -
1))
157 (defvar *regexp-cache
* (make-memoize-state :test
'string
=))
159 ;; TODO: create compiler-macros for regex functions so the regexps can
160 ;; be compiled at compile time.
162 (defun looking-at (regexp &optional
(buffer (current-buffer)))
163 "Return the match-data if text after point matches regular expression regexp."
164 (check-type regexp string
)
165 (check-search-thread-safe)
166 ;; get the gap outta the way. It sucks we have to do this. Really we
167 ;; should modify ppcre to generate scanner functions that hop the
169 (when (< (buffer-char-to-aref buffer
(pt buffer
))
170 (buffer-gap-start buffer
))
171 (gap-move-to-point buffer
))
172 (multiple-value-bind (start end reg-starts reg-ends
)
173 (ppcre:scan
(memoize *regexp-cache
* regexp
(ppcre:create-scanner regexp
:multi-line-mode t
)) (buffer-data buffer
)
174 :start
(buffer-char-to-aref buffer
(pt buffer
))
177 (= start
(buffer-char-to-aref buffer
(pt buffer
))))
180 (make-match-data :obj buffer
181 :start
(buffer-aref-to-char buffer start
)
182 :end
(buffer-aref-to-char buffer end
)
183 :reg-starts
(map 'vector
(lambda (n)
184 (buffer-aref-to-char buffer n
))
186 :reg-ends
(map 'vector
(lambda (n)
187 (buffer-aref-to-char buffer n
))
190 (defun re-search-forward (regexp &key
(bound (zv)) (error t
) count
&aux
(buffer (current-buffer)))
191 "Search forward from point for regular expression regexp.
192 Set point to the end of the occurrence found, and return match-data structure.
193 BOUND bounds the search; it is a buffer position.
194 The match found must not extend after that position.
195 ERROR, if nil, means if fail just return nil (no error).
196 If not nil and not t, move to limit of search and return nil.
197 COUNT is repeat count--search for successive occurrences.
198 See also the functions `match-beginning', `match-end', `match-string',
199 and `replace-match'."
200 (declare (ignore count
))
201 (check-search-thread-safe)
202 (when (< (buffer-char-to-aref buffer
(pt buffer
))
203 (buffer-gap-start buffer
))
204 (gap-move-to-point buffer
))
205 (multiple-value-bind (start end reg-starts reg-ends
)
206 (ppcre:scan
(memoize *regexp-cache
* regexp
(ppcre:create-scanner regexp
:multi-line-mode t
)) (buffer-data buffer
)
207 :start
(buffer-char-to-aref buffer
(pt buffer
))
208 :end
(buffer-char-to-aref buffer bound
)
211 (set-point (buffer-aref-to-char buffer end
) buffer
)
214 (make-match-data :obj buffer
215 :start
(buffer-aref-to-char buffer start
)
216 :end
(buffer-aref-to-char buffer end
)
217 :reg-starts
(map 'vector
(lambda (n)
218 (buffer-aref-to-char buffer n
))
220 :reg-ends
(map 'vector
(lambda (n)
221 (buffer-aref-to-char buffer n
))
224 (signal 'search-failed
))
228 (set-point bound buffer
)
232 (defun re-search-backward (regexp &key
(bound (begv)) (error t
) count
&aux
(buffer (current-buffer)))
233 "Search backward from point for match for regular expression regexp.
234 Set point to the beginning of the match, and return match-data.
235 The match found is the one starting last in the buffer
236 and yet ending before the origin of the search.
237 BOUND bounds the search; it is a buffer position.
238 The match found must start at or after that position.
239 ERROR, if nil, means if fail just return nil (no error).
240 If not nil and not t, move to limit of search and return nil.
241 COUNT is repeat count--search for successive occurrences.
242 See also the functions `match-beginning', `match-end', `match-string',
243 and `replace-match'."
244 (declare (ignore count
))
245 (check-search-thread-safe)
246 ;;(message "re-search-backward ~s ~d" regexp (point))
247 (when (> (buffer-gap-start buffer
)
248 (buffer-char-to-aref buffer
(pt buffer
)))
249 (gap-move-to buffer
(buffer-char-to-aref buffer
(1+ (pt buffer
)))))
250 ;; start search from point and keep walking back til we match something
251 (let* ((start-aref (buffer-char-to-aref buffer
(pt buffer
)))
253 (stop (buffer-char-to-aref buffer bound
))
254 (scanner (memoize *regexp-cache
* regexp
(ppcre:create-scanner regexp
:multi-line-mode t
))))
256 (multiple-value-bind (start end reg-starts reg-ends
)
257 (ppcre:scan scanner
(buffer-data buffer
) :start start-aref
:end pt-aref
:real-start-pos
0)
259 (set-point (buffer-aref-to-char buffer start
) buffer
)
262 (make-match-data :obj buffer
263 :start
(buffer-aref-to-char buffer start
)
264 :end
(buffer-aref-to-char buffer end
)
265 :reg-starts
(map 'vector
(lambda (n)
266 (buffer-aref-to-char buffer n
))
268 :reg-ends
(map 'vector
(lambda (n)
269 (buffer-aref-to-char buffer n
))
271 (dec-aref start-aref buffer
)
272 (when (< start-aref stop
)
274 ;; FIXME: we need a search condition
275 (signal 'search-failed
))
280 (set-point bound buffer
))
283 (defun string-match (regexp string
&key
(start 0) (end (length string
)))
284 "Return index of start of first match for regexp in string and match-data, or nil.
285 Matching ignores case if `case-fold-search' is non-nil.
286 START, start search at that index in string.
287 END, end search at that index in string.
288 **For index of first char beyond the match, do (match-end 0).
289 **`match-end' and `match-beginning' also give indices of substrings
290 **matched by parenthesis constructs in the pattern.
292 You can use the function `match-string' to extract the substrings
293 matched by the parenthesis constructions in regexp."
294 (check-search-thread-safe)
295 (multiple-value-bind (start end reg-starts reg-ends
)
296 (ppcre:scan
(memoize *regexp-cache
* regexp
(ppcre:create-scanner regexp
:multi-line-mode t
))
297 string
:start start
:end end
)
301 (make-match-data :obj string
304 :reg-starts reg-starts
305 :reg-ends reg-ends
))))))
307 (defun regexp-quote (string)
308 "Return a regexp string which matches exactly STRING and nothing else."
309 (check-type string string
)
311 (loop for c across string
312 when
(find c
"[*.\\?+^$" :test
'char
=)
317 (defun wordify (string)
318 "Given a string of words separated by word delimiters,
319 compute a regexp that matches those exact words
320 separated by arbitrary punctuation."
321 (error "unimplemented"))
323 (defun word-search-forward (string &key
(bound (begv)) (error t
) count
&aux
(buffer (current-buffer)))
324 (error "unimplemented"))
326 (defun scan-buffer (buffer target start end count
)
327 "Search for COUNT instances of the character TARGET between START and END.
329 If COUNT is positive, search forwards; END must be >= START.
330 If COUNT is negative, search backwards for the -COUNTth instance;
331 END must be <= START.
332 If COUNT is zero, do anything you please; run rogue, for all I care.
334 If END is NIL, use BEGV or ZV instead, as appropriate for the
335 direction indicated by COUNT.
337 If we find COUNT instances, return the
338 position past the COUNTth match and 0. Note that for reverse motion
339 this is not the same as the usual convention for Emacs motion commands.
341 If we don't find COUNT instances before reaching END, return END
342 and the number of TARGETs left unfound."
343 (let ((shortage (abs count
))
346 (setf end
(or end
(zv buffer
)))
347 (setf end
(or end
(begv buffer
))))
348 (setf start
(buffer-char-to-aref buffer start
)
349 end
(buffer-char-to-aref buffer end
))
350 (loop while
(and (> count
0)
353 (if (< start
(buffer-gap-start buffer
))
354 (or (position target
(buffer-data buffer
) :start start
:end
(min end
(buffer-gap-start buffer
)))
355 (and (> end
(gap-end buffer
))
356 (position target
(buffer-data buffer
) :start
(gap-end buffer
) :end end
)))
357 (position target
(buffer-data buffer
) :start start
:end end
)))
359 (setf start
(1+ start
)
362 shortage
(1- shortage
))
364 (loop while
(and (< count
0)
367 (if (> start
(buffer-gap-start buffer
))
368 (or (position target
(buffer-data buffer
) :start
(max end
(gap-end buffer
)) :end start
:from-end t
)
369 (and (< end
(buffer-gap-start buffer
))
370 (position target
(buffer-data buffer
) :start end
:end
(buffer-gap-start buffer
) :from-end t
)))
371 (position target
(buffer-data buffer
) :start end
:end start
:from-end t
)))
373 (setf last
(+ start
1) ; match emacs functionality
375 shortage
(1- shortage
))
378 (values (and last
(buffer-aref-to-char buffer last
)) 0)
379 (values (buffer-aref-to-char buffer end
) shortage
))))
381 (defun find-before-next-newline (from to cnt
)
382 "Like find_next_newline, but returns position before the newline,
383 not after, and only search up to TO. This isn't just
384 find_next_newline (...)-1, because you might hit TO."
385 (multiple-value-bind (pos shortage
) (scan-buffer (current-buffer) #\Newline from to cnt
)
386 (when (zerop shortage
)
390 (defun buffer-scan-newline (buf start limit count
)
391 "Search BUF for COUNT newlines with a limiting point at LIMIT,
392 starting at START. Returns the point of the last newline or limit and
393 number of newlines found. START and LIMIT are inclusive."
394 (declare (type buffer buf
)
395 (type integer start limit count
))
396 (labels ((buffer-scan-bk (buf start limit count
)
397 "count is always >=0. start >= limit."
398 (let* ((start-aref (buffer-char-to-aref buf start
))
399 (limit-aref (buffer-char-to-aref buf limit
))
400 (ceiling (if (>= start-aref
(gap-end buf
))
401 (max limit-aref
(gap-end buf
))
404 ;; :END is not inclusive but START is.
405 (start (1+ start-aref
))
408 ;; Always search at least once
409 (setf p
(position #\Newline
(buffer-data buf
)
410 :start ceiling
:end start
:from-end t
))
413 ;; Move start. Note that start isn't set to (1+ p)
414 ;; because we don't want to search p again.
418 ;; Have we found enough newlines?
420 (return-from buffer-scan-bk
(values (buffer-aref-to-char buf p
)
422 ;; Check if we've searched up to the limit
423 (if (= ceiling limit-aref
)
424 (return-from buffer-scan-bk
(values limit i
))
425 ;; if not, skip past the gap
427 (setf ceiling limit-aref
)
428 (setf start
(buffer-gap-start buf
))))))))
429 (buffer-scan-fw (buf start limit count
)
430 "count is always >=0. start >= limit."
431 (let* ((start-aref (buffer-char-to-aref buf start
))
432 (limit-aref (1+ (buffer-char-to-aref buf limit
)))
433 (ceiling (if (< start
(buffer-gap-start buf
))
434 (min limit-aref
(buffer-gap-start buf
))
440 ;; Always search at least once
441 (setf p
(position #\Newline
(buffer-data buf
) :start start
:end ceiling
))
444 ;; Move start. We don't want to search p again, thus the 1+.
448 ;; Have we found enough newlines?
450 (return-from buffer-scan-fw
(values (buffer-aref-to-char buf p
)
452 ;; Check if we've searched up to the limit
453 (if (= ceiling limit-aref
)
454 (return-from buffer-scan-fw
(values limit i
))
455 ;; if not, skip past the gap
457 (setf ceiling limit-aref
)
458 (setf start
(gap-end buf
)))))))))
459 ;; make sure start and limit are within the bounds
460 (setf start
(max 0 (min start
(1- (buffer-size buf
))))
461 limit
(max 0 (min limit
(1- (buffer-size buf
)))))
462 ;; the search always fails on an empty buffer
463 (when (= (buffer-size buf
) 0)
464 (return-from buffer-scan-newline
(values limit
0)))
466 (dformat +debug-vv
+ "scan-fw ~a ~a ~a~%" start limit count
)
467 (buffer-scan-fw buf start limit count
))
469 (dformat +debug-vv
+ "scan-bk ~a ~a ~a~%" start limit count
)
470 (buffer-scan-bk buf start limit
(abs count
)))
471 ;; 0 means the newline before the beginning of the current
472 ;; line. We need to handle the case where we are on a newline.
474 (dformat +debug-vv
+ "scan-0 ~a ~a ~a~%" start limit count
)
475 (if (char= (buffer-char-after buf start
) #\Newline
)
476 (buffer-scan-bk buf start limit
2)
477 (buffer-scan-bk buf start limit
1))))))