[lice @ indent-region (buggy) + eval-defun]
[lice.git] / syntax.lisp
blob5a378449dab6b159a23083cab3a5b57f070cb2a6
1 ;;; Cheap syntax functions
3 (in-package :lice)
5 (defparameter +syntax-classes+
6 '(:whitespace :punctuation :word-constituent :symbol-constituent :open :close :quote :string :math :escape
7 :character-quote :comment :end-comment :inherit :comment-fence :string-fence))
9 (deftype syntax-class ()
10 '(member :whitespace ; for a whitespace character
11 :punctuation ; for random punctuation characters
12 :word-constituent ; for a word constituent
13 :symbol-constituent ; symbol constituent but not word constituent
14 :open ; for a beginning delimiter
15 :close ; for an ending delimiter
16 :quote ; for a prefix character like Lisp '
17 :string ; for a string-grouping character like Lisp "
18 :math ; for delimiters like $ in Tex.
19 :escape ; for a character that begins a C-style escape
20 :character-quote ; for a character that quotes the
21 ; following character
22 :comment ; for a comment-starting character
23 :end-comment ; for a comment-ending character
24 :inherit ; use the standard syntax table for
25 ; this character
26 :comment-fence ; Starts/ends comment which is
27 ; delimited on the other side by any
28 ; char with the same syntaxcode.
29 :string-fence ; Starts/ends string which is delimited
30 ; on the other side by any char with
31 ; the same syntaxcode.
34 (defclass syntax-table ()
35 ((hash :initform (make-hash-table :test 'equal) :initarg :hash :accessor syntax-table-hash)
36 (parent :initform nil :initarg :parent :accessor syntax-table-parent))
37 (:documentation "A syntax table class."))
39 (defstruct syntax-descriptor
40 (class nil :type syntax-class)
41 ;; FIXME: this will be a bitvector at some point but for now just
42 ;; make it a list.
43 (flags nil :type list)
44 ;; this is where the terminator paren char etc is stored
45 extra)
47 (defun set-raw-syntax-entry (table char descriptor)
48 (setf (gethash char (syntax-table-hash table)) descriptor))
50 (defvar *syntax-code-object* (loop for i in +syntax-classes+
51 collect i
52 collect (make-syntax-descriptor :class i :flags nil))
53 "A pool of syntax descriptors to be shared in the standard
54 syntax table in an attempt to save memory. FIXME: premature
55 optimization?")
57 (defvar *standard-syntax-table*
58 (let ((table (make-instance 'syntax-table :parent nil))
59 tmp)
60 ;; Control characters should not be whitespace.
61 (setf tmp (getf *syntax-code-object* :punctuation))
62 (loop for i below (char-code #\Space) do
63 (set-raw-syntax-entry table (code-char i) tmp))
64 ;; Except that a few really are whitespace.
65 (setf tmp (getf *syntax-code-object* :whitespace))
66 (set-raw-syntax-entry table #\Space tmp)
67 (set-raw-syntax-entry table #\Tab tmp)
68 (set-raw-syntax-entry table #\Newline tmp)
69 (set-raw-syntax-entry table #\Return tmp)
70 (set-raw-syntax-entry table #\Page tmp)
72 (setf tmp (getf *syntax-code-object* :word-constituent))
73 (loop
74 for i from (char-code #\a) to (char-code #\z)
75 for j from (char-code #\A) to (char-code #\Z) do
76 (set-raw-syntax-entry table (code-char i) tmp)
77 (set-raw-syntax-entry table (code-char j) tmp))
78 (loop for i from (char-code #\0) to (char-code #\9) do
79 (set-raw-syntax-entry table (code-char i) tmp))
81 (set-raw-syntax-entry table #\$ tmp)
82 (set-raw-syntax-entry table #\% tmp)
84 (set-raw-syntax-entry table #\( (make-syntax-descriptor :class :open :extra ")"))
85 (set-raw-syntax-entry table #\) (make-syntax-descriptor :class :close :extra "("))
86 (set-raw-syntax-entry table #\[ (make-syntax-descriptor :class :open :extra "]"))
87 (set-raw-syntax-entry table #\] (make-syntax-descriptor :class :close :extra "["))
88 (set-raw-syntax-entry table #\{ (make-syntax-descriptor :class :open :extra "}"))
89 (set-raw-syntax-entry table #\} (make-syntax-descriptor :class :close :extra "{"))
91 (set-raw-syntax-entry table #\" (make-syntax-descriptor :class :string))
92 (set-raw-syntax-entry table #\\ (make-syntax-descriptor :class :escape))
94 (setf tmp (getf *syntax-code-object* :symbol-constituent))
95 (loop for i across "_-+*/&|<>=" do
96 (set-raw-syntax-entry table i tmp))
98 (setf tmp (getf *syntax-code-object* :punctuation))
99 (loop for i across ".,;:?!#@~^'`" do
100 (set-raw-syntax-entry table i tmp))
101 ;; TODO: i18n characters
102 table)
103 "The standard syntax table")
105 (defun make-syntax-table (&optional (parent *standard-syntax-table*))
106 "Return a new syntax table.
107 Create a syntax table which inherits from parent (if non-nil) or
108 from `standard-syntax-table' otherwise."
109 (make-instance 'syntax-table
110 :parent parent))
112 (defun copy-syntax-table (&optional (table *standard-syntax-table*))
113 "Construct a new syntax table and return it.
114 It is a copy of the TABLE, which defaults to the standard syntax table."
115 (let* ((hash (make-hash-table)))
116 (maphash (lambda (k v)
117 (setf (gethash k hash) v))
118 (syntax-table-hash table))
119 (make-instance 'syntax-table
120 :hash hash :parent (syntax-table-parent table))))
122 (defun modify-syntax-entry (char class &key flags extra (table (syntax-table)))
123 "Set syntax for character CHAR according to CLASS, FLAGS, and EXTRA."
124 (check-type char character)
125 (check-type class syntax-class)
126 (check-type flags list)
127 (check-type table syntax-table)
128 (set-raw-syntax-entry table char
129 (make-syntax-descriptor :class class :flags flags :extra extra)))
131 (defun char-syntax (character &optional (table (syntax-table)))
132 "Return the syntax code of CHARACTER, described by a character.
133 For example, if CHARACTER is a word constituent,
134 the symbol `:WORD-CONSTITUENT' is returned."
135 (let ((descr (gethash character (syntax-table-hash table))))
136 (when descr
137 (syntax-descriptor-class descr))))
139 (defparameter +word-constituents+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
141 (defvar *words-include-escapes* nil
142 "Non-nil means `forward-word', etc., should treat escape chars part of words.")
144 (defun syntax-after (pos &aux (buffer (current-buffer)))
145 "Return the raw syntax of the char after POS.
146 If POS is outside the buffer's accessible portion, return nil."
147 (unless (or (< pos (point-min)) (>= pos (point-max)))
148 (let* ((ch (buffer-char-after buffer pos))
149 (descr (and ch (gethash ch (syntax-table buffer)))))
150 descr)))
152 ;; FIXME: having the flags as a list is memory intensive. How about a
153 ;; bit vector or number and a function that converts between the two?
155 (defun syntax-table (&aux (buffer (current-buffer)))
156 (buffer-syntax-table buffer))
158 (defun (setf syntax-table) (value &aux (buffer (current-buffer)))
159 "Select a new syntax table for the current buffer. One argument, a syntax table."
160 (check-type value syntax-table)
161 (setf (buffer-syntax-table buffer) value))
163 ;; The above looks a bit weird so lets also have a set function.
164 (defun set-syntax-table (value)
165 "Select a new syntax table for the current buffer. One argument, a syntax table."
166 (setf (syntax-table) value))
168 (defun &syntax-with-flags (ch table &optional (default :whitespace))
169 (or (gethash ch (syntax-table-hash table))
170 ;; try the parent
171 (and (syntax-table-parent table)
172 (&syntax-with-flags ch (syntax-table-parent table) default))
173 ;; return the default
174 (make-syntax-descriptor :class default)))
176 (defun &syntax (ch table &optional (default :whitespace))
177 (let ((descr (&syntax-with-flags ch table default)))
178 (if descr
179 (syntax-descriptor-class descr)
180 default)))
182 (defun &syntax-flags-syntax (syntax)
183 (syntax-descriptor-class syntax))
185 (defun &syntax-comment-start-first (ch table)
186 (let ((descr (&syntax-with-flags ch table)))
187 (when descr
188 (and (find :comment-start-first (syntax-descriptor-flags descr)) t))))
190 (defun &syntax-flags-comment-start-first (syntax)
191 (and (find :comment-start-first (syntax-descriptor-flags syntax)) t))
193 (defun &syntax-comment-start-second (ch table)
194 (let ((descr (&syntax-with-flags ch table)))
195 (when descr
196 (and (find :comment-start-second (syntax-descriptor-flags descr)) t))))
198 (defun &syntax-flags-comment-start-second (syntax)
199 (and (find :comment-start-second (syntax-descriptor-flags syntax)) t))
201 (defun &syntax-comment-end-first (ch table)
202 (let ((descr (&syntax-with-flags ch table)))
203 (when descr
204 (and (find :comment-end-first (syntax-descriptor-flags descr)) t))))
206 (defun &syntax-flags-comment-end-first (syntax)
207 (and (find :comment-end-first (syntax-descriptor-flags syntax)) t))
209 (defun &syntax-comment-end-second (ch table)
210 (let ((descr (&syntax-with-flags ch table)))
211 (when descr
212 (and (find :comment-end-second (syntax-descriptor-flags descr)) t))))
214 (defun &syntax-flags-comment-end-second (syntax)
215 (and (find :comment-end-second (syntax-descriptor-flags syntax)) t))
217 (defun &syntax-prefix (ch table)
218 (let ((descr (&syntax-with-flags ch table)))
219 (when descr
220 (and (find :prefix (syntax-descriptor-flags descr)) t))))
222 (defun &syntax-flags-prefix (syntax)
223 (and (find :prefix (syntax-descriptor-flags syntax)) t))
225 (defun &syntax-comment-style (ch table)
226 (let ((descr (&syntax-with-flags ch table)))
227 (when descr
228 (and (find :comment-style (syntax-descriptor-flags descr)) t))))
230 (defun &syntax-flags-comment-style (syntax)
231 (and (find :comment-style (syntax-descriptor-flags syntax)) t))
233 (defun &syntax-comment-nested (ch table)
234 (let ((descr (&syntax-with-flags ch table)))
235 (when descr
236 (and (find :comment-nested (syntax-descriptor-flags descr)) t))))
238 (defun &syntax-flags-comment-nested (syntax)
239 (and (find :comment-nested (syntax-descriptor-flags syntax)) t))
241 (defmacro with-syntax-table (table &body body)
242 "Evaluate BODY with syntax table of current buffer set to TABLE.
243 The syntax table of the current buffer is saved, BODY is evaluated, and the
244 saved table is restored, even in case of an abnormal exit.
245 Value is what BODY returns."
246 (let ((old-table (gensym "TABLE"))
247 (old-buffer (gensym "BUFFER")))
248 `(let ((,old-table (syntax-table))
249 (,old-buffer (current-buffer)))
250 (unwind-protect
251 (progn
252 (set-syntax-table ,table)
253 ,@body)
254 (save-current-buffer
255 (set-buffer ,old-buffer)
256 (set-syntax-table ,old-table))))))
258 (defun syntax-class (syntax)
259 "Return the syntax class part of the syntax descriptor SYNTAX.
260 If SYNTAX is nil, return nil."
261 (&syntax-flags-syntax syntax))
263 (defun scan-words (from count)
264 "Return the position across COUNT words from FROM.
265 If that many words cannot be found before the end of the buffer,
266 return NIL. COUNT negative means scan backward and stop at word
267 beginning."
268 (let* ((buffer (current-buffer))
269 (beg (begv buffer))
270 (end (zv buffer))
271 (from-aref (buffer-char-to-aref buffer from))
272 (table (syntax-table))
273 ch code)
274 (while (> count 0)
275 (loop
276 (when (= from end)
277 (return-from scan-words nil))
278 (setf ch (buffer-fetch-char from-aref buffer)
279 code (&syntax ch table))
280 (inc-both from from-aref buffer)
281 (when (or (and *words-include-escapes*
282 (or (eq code :escape)
283 (eq code :character-quote)))
284 (eq code :word-constituent))
285 (return nil)))
286 (loop
287 (when (= from end)
288 (return nil))
289 (setf ch (buffer-fetch-char from-aref buffer)
290 code (&syntax from-aref table))
291 (when (and (and (not *words-include-escapes*)
292 (or (eq code :escape)
293 (eq code :character-quote)))
294 (or (not (eq code :word-constituent))))
295 ;; (word-boundary-p ..)
296 (return nil))
297 (inc-both from from-aref buffer))
298 (decf count))
299 (while (< count 0)
300 (loop
301 (when (= from beg)
302 (return-from scan-words nil))
303 (dec-both from from-aref buffer)
304 (setf ch (buffer-fetch-char from-aref buffer)
305 code (&syntax ch table))
306 (when (or (and *words-include-escapes*
307 (or (eq code :escape)
308 (eq code :character-quote)))
309 (eq code :word-constituent))
310 (return nil)))
311 (loop
312 (when (= from beg)
313 (return nil))
314 (setf ch (buffer-fetch-char from-aref buffer)
315 code (&syntax ch table))
316 (when (and (and (not *words-include-escapes*)
317 (or (eq code :escape)
318 (eq code :character-quote)))
319 (not (eq code :word-constituent)))
320 (return nil))
321 (dec-both from from-aref buffer))
322 (incf count))
323 from))
325 (defcommand forward-word ((n) :prefix)
326 "Move point forward ARG words (backward if ARG is negative).
327 Normally returns t.
328 If an edge of the buffer or a field boundary is reached, point is left there
329 and the function returns nil. Field boundaries are not noticed if
330 `inhibit-field-text-motion' is non-nil."
331 (labels ((isaword (c)
332 (find c +word-constituents+ :test #'char=)))
333 (let ((buffer (current-buffer)))
334 (cond ((> n 0)
335 (gap-move-to buffer (buffer-point-aref buffer))
336 ;; do it n times
337 (loop for i from 0 below n
338 while (let (p1 p2)
339 ;; search forward for a word constituent
340 (setf p1 (position-if #'isaword (buffer-data buffer)
341 :start (buffer-point-aref buffer)))
342 ;; search forward for a non word constituent
343 (when p1
344 (setf p2 (position-if (complement #'isaword) (buffer-data buffer) :start p1)))
345 (if p2
346 (goto-char (buffer-aref-to-char buffer p2))
347 (goto-char (point-max)))
348 p2)))
349 ((< n 0)
350 (setf n (- n))
351 (gap-move-to buffer (buffer-point-aref buffer))
352 ;; do it n times
353 (loop for i from 0 below n
354 for start = (buffer-gap-start buffer) then (buffer-point-aref buffer)
355 while (let (p1 p2)
356 ;; search backward for a word constituent
357 (setf p1 (position-if #'isaword (buffer-data buffer)
358 :from-end t
359 :end start))
360 ;; search backward for a non word constituent
361 (when p1
362 (setf p2 (position-if (complement #'isaword) (buffer-data buffer) :from-end t :end p1)))
363 (if p2
364 (goto-char (1+ (buffer-aref-to-char buffer p2)))
365 (goto-char (point-min)))
366 p2)))))))
368 (defcommand backward-word ((n) :prefix)
369 "Move point forward ARG words (backward if ARG is negative).
370 Normally returns t.
371 If an edge of the buffer or a field boundary is reached, point is left there
372 and the function returns nil. Field boundaries are not noticed if
373 `inhibit-field-text-motion' is non-nil."
374 (forward-word (- n)))
376 (defcommand kill-word ((arg)
377 :prefix)
378 "Kill characters forward until encountering the end of a word.
379 With argument, do this that many times."
380 (kill-region (point) (progn (forward-word arg) (point))))
382 (defcommand backward-kill-word ((arg)
383 :prefix)
384 "Kill characters backward until encountering the end of a word.
385 With argument, do this that many times."
386 (kill-word (- arg)))
388 (defvar *parse-sexp-ignore-comments* t
389 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.")
391 (defvar *open-paren-in-column-0-is-defun-start* t
392 "*Non-nil means an open paren in column 0 denotes the start of a defun.")
394 ;; Conditions used by the syntax code because the parsing is so bloody
395 ;; hairy.
396 (define-condition syntax-done () ())
397 (define-condition syntax-lossage () ())
399 (define-condition expression-ends-prematurely (lice-condition)
401 (define-condition unbalanced-parenthesis (lice-condition)
402 ((last-good :initarg :last-good :accessor unbalanced-parenthesis-last-good)
403 (from :initarg :from :accessor unbalanced-parenthesis-from)))
405 (defun &char-quoted (char-pos aref-pos buffer table)
406 "Returns TRUE if char at CHAR-POS is quoted.
407 Global syntax-table data should be set up already to be good at CHAR-POS
408 or after. On return global syntax data is good for lookup at CHAR-POS."
409 (let ((beg (begv buffer))
410 quoted)
411 (dec-both char-pos aref-pos buffer)
412 (while (>= char-pos beg)
413 (let* ((ch (buffer-fetch-char aref-pos buffer))
414 (code (&syntax ch table)))
415 (when (not (find code '(:character-quote :escape)))
416 (return nil))
417 (dec-both char-pos aref-pos buffer)
418 (setf quoted (not quoted))))
419 quoted))
421 (defun find-defun-start (pos pos-aref buffer table)
422 "Return a defun-start position before POS and not too far before.
423 It should be the last one before POS, or nearly the last.
425 When open_paren_in_column_0_is_defun_start is nonzero,
426 only the beginning of the buffer is treated as a defun-start.
428 We record the information about where the scan started
429 and what its result was, so that another call in the same area
430 can return the same value very quickly.
432 There is no promise at which position the global syntax data is
433 valid on return from the subroutine, so the caller should explicitly
434 update the global data."
435 (declare (ignore pos-aref))
436 (unless *open-paren-in-column-0-is-defun-start*
437 (return-from find-defun-start (make-parse-state :start-value (begv buffer)
438 :start-value-aref (begv-aref buffer))))
439 ;; Back up to start of line.
440 (let* ((begv (begv buffer))
441 (pt (buffer-scan-newline buffer pos (begv buffer) -1))
442 (pt-aref (buffer-char-to-aref buffer pt)))
443 (while (> pt begv)
444 (let ((ch (buffer-fetch-char pt-aref buffer)))
445 (when (eq (&syntax ch table) :open)
446 (return nil))
447 ;; Move to beg of previous line.
448 (setf pt (buffer-scan-newline buffer pt (begv buffer) -2)
449 pt-aref (buffer-char-to-aref buffer pt))))
450 ;; Return what we found
451 (make-parse-state :start-value pt
452 :start-value-aref pt-aref
453 :start-buffer buffer
454 ;; :modiff MODIFF
455 :start-begv (begv buffer)
456 :start-pos pos)))
459 (defun skip-chars (forwardp syntaxp string lim)
460 (declare (ignore syntaxp))
461 (labels ((match-char (c negate ranges chars)
462 ;; check ranges
463 (catch :continue
464 (loop for r in ranges do
465 (if negate
466 (when (<= (car r) (char-code c) (cdr r))
467 (throw :done nil))
468 (when (<= (car r) (char-code c) (cdr r))
469 (throw :continue nil))))
470 ;; check chars
471 (if negate
472 (when (find c chars :test 'char=)
473 (throw :done nil))
474 (when (find c chars :test 'char=)
475 (throw :continue nil)))
476 (unless negate
477 ;; if the char fell through all that then we're done
478 (throw :done nil)))))
479 (check-type string string)
480 (check-number-coerce-marker lim)
481 (let ((idx 0)
482 negate
483 ranges
484 chars
485 (start-point (point)))
486 ;; don't allow scan outside bounds of buffer.
487 (setf lim (min (max lim (begv)) (zv)))
489 (when (char= (char string 0) #\^)
490 (setf negate t)
491 (incf idx))
493 ;; compile the ranges and chars
494 (while (< idx (length string))
495 (let ((c (char string idx)))
497 ;; (if syntaxp
498 ;; ;; TODO: handle syntaxp
499 ;; nil
500 ;; (progn
501 ;; ;; TODO: handle iso classes
502 ;; ))
503 (when (char= c #\\)
504 (incf idx)
505 (when (= idx (length string))
506 (return nil))
507 (setf c (char string idx)))
508 (incf idx)
510 ;; Treat `-' as range character only if another character
511 ;; follows.
512 (if (and (< (1+ idx) (length string))
513 (char= (char string idx) #\-))
514 (progn
515 (incf idx)
516 (let* ((c2 (char string idx))
517 (code1 (char-code c))
518 (code2 (char-code c2)))
519 (when (<= code1 code2)
520 (push (cons code1 code2) ranges))
521 (incf idx)))
522 (progn
523 (push c chars)))))
524 ;; scan
525 (let* ((buffer (current-buffer))
526 (pos (point buffer))
527 (pos-aref (buffer-char-to-aref buffer pos)))
528 (catch :done
529 (if forwardp
530 (while (< pos lim)
531 (match-char (buffer-fetch-char pos-aref buffer)
532 negate ranges chars)
533 (inc-both pos pos-aref buffer))
534 (progn
535 ;; do a little dance to end up in the right spot
536 (dec-both pos pos-aref buffer)
537 (unwind-protect
538 (while (> pos lim)
539 (match-char (buffer-fetch-char pos-aref buffer)
540 negate ranges chars)
541 (dec-both pos pos-aref buffer))
542 (inc-both pos pos-aref buffer)))))
543 (set-point pos buffer)
544 ;; return the number of chars we scanned
545 (- pos start-point)))))
547 (defun skip-chars-forward (string &optional (lim (zv)))
548 "Move point forward, stopping before a char not in string."
549 (skip-chars t nil string lim))
551 (defun skip-chars-backward (string &optional (lim (begv)))
552 "Move point backward, stopping after a char not in string."
553 (skip-chars nil nil string lim))
555 (defun skip-whitespace-forward (&optional (lim (zv)))
556 "Move point forward, stopping before a char that is not a space or tab."
557 (skip-chars-forward (coerce '(#\Space #\Tab) 'string) lim))
559 (defun &back-comment (from from-aref stop comment-nested comment-style buffer table)
560 "Checks whether charpos FROM is at the end of a comment.
561 FROM_BYTE is the bytepos corresponding to FROM.
562 Do not move back before STOP.
564 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
565 return -1 otherwise.
567 If successful, return the charpos of the comment's beginning, and the aref pos.
569 **Global syntax data remains valid for backward search starting at
570 **the returned value (or at FROM, if the search was not successful)."
571 ;; Look back, counting the parity of string-quotes,
572 ;; and recording the comment-starters seen.
573 ;; When we reach a safe place, assume that's not in a string;
574 ;; then step the main scan to the earliest comment-starter seen
575 ;; an even number of string quotes away from the safe place.
577 ;; OFROM[I] is position of the earliest comment-starter seen
578 ;; which is I+2X quotes from the comment-end.
579 ;; PARITY is current parity of quotes from the comment end.
580 (let ((string-style :none)
581 (string-lossage nil)
582 ;; Not a real lossage: indicates that we have passed a matching comment
583 ;; starter plus a non-matching comment-ender, meaning that any matching
584 ;; comment-starter we might see later could be a false positive (hidden
585 ;; inside another comment).
586 ;; Test case: { a (* b } c (* d *)
587 (comment-lossage nil)
588 (comment-end from)
589 (comment-end-aref from-aref)
590 (comment-start-pos 0)
591 comment-start-aref
592 ;; Place where the containing defun starts,
593 ;; or nil if we didn't come across it yet.
594 defun-start
595 defun-start-aref
596 code
597 (nesting 1) ; current comment nesting
599 (syntax nil))
600 (handler-case
601 (progn
602 ;; FIXME: A }} comment-ender style leads to incorrect behavior
603 ;; in the case of {{ c }}} because we ignore the last two chars which are
604 ;; assumed to be comment-enders although they aren't.
606 ;; At beginning of range to scan, we're outside of strings;
607 ;; that determines quote parity to the comment-end.
608 (while (/= from stop)
609 (catch :continue
610 (let (temp-aref prev-syntax com2start com2end)
611 (dec-both from from-aref buffer)
612 (setf prev-syntax syntax
613 ch (buffer-fetch-char from-aref buffer)
614 syntax (&syntax-with-flags ch table)
615 code (&syntax ch table)
616 ;; Check for 2-char comment markers.
617 com2start (and (&syntax-flags-comment-start-first syntax)
618 (&syntax-flags-comment-start-second prev-syntax)
619 (eq comment-style (&syntax-flags-comment-style prev-syntax))
620 (eq (or (&syntax-flags-comment-nested prev-syntax)
621 (&syntax-flags-comment-nested syntax))
622 comment-nested))
623 com2end (and (&syntax-flags-comment-end-first syntax)
624 (&syntax-flags-comment-end-second prev-syntax)))
625 ;; Nasty cases with overlapping 2-char comment markers:
626 ;; - snmp-mode: -- c -- foo -- c --
627 ;; --- c --
628 ;; ------ c --
629 ;; - c-mode: *||*
630 ;; |* *|* *|
631 ;; |*| |* |*|
632 ;; /// */
634 ;; If a 2-char comment sequence partly overlaps with
635 ;; another, we don't try to be clever.
636 (when (and (> from stop)
637 (or com2end com2start))
638 (let ((next from)
639 (next-aref from-aref)
640 next-c
641 next-syntax)
642 (dec-both next next-aref buffer)
643 (setf next-c (buffer-fetch-char next-aref buffer)
644 next-syntax (&syntax-with-flags next-c table))
645 (when (or (and (or com2start comment-nested)
646 (&syntax-flags-comment-end-second syntax)
647 (&syntax-flags-comment-end-first next-syntax))
648 (and (or com2end comment-nested)
649 (&syntax-flags-comment-start-second syntax)
650 (eq comment-style (&syntax-flags-comment-style syntax))
651 (&syntax-flags-comment-start-first next-syntax)))
652 (signal 'syntax-lossage))))
654 (when (and com2start
655 (= comment-start-pos 0))
656 ;; We're looking at a comment starter. But it might be a comment
657 ;; ender as well (see snmp-mode). The first time we see one, we
658 ;; need to consider it as a comment starter,
659 ;; and the subsequent times as a comment ender.
660 (setf com2end 0))
662 ;; Turn a 2-char comment sequences into the appropriate syntax.
663 (cond (com2end
664 (setf code :end-comment))
665 (com2start
666 (setf code :comment))
667 ;; Ignore comment starters of a different style.
668 ((and (eq code :comment)
669 (or (not (eq comment-style (&syntax-flags-comment-style syntax)))
670 (not (eq comment-nested (&syntax-flags-comment-nested syntax)))))
671 (throw :continue nil)))
673 ;; Ignore escaped characters, except comment-enders.
674 (when (and (not (eq code :end-comment))
675 (&char-quoted from from-aref buffer table))
676 (throw :continue nil))
678 (case code
679 ((:string-fence :comment-fence :string)
680 (when (find code '(:string-fence :comment-fence))
681 (setf ch (if (eq code :string-fence)
682 :string-style
683 :comment-style)))
684 ;; Track parity of quotes.
685 (cond ((eq string-style :none)
686 ;; Entering a string.
687 (setf string-style ch))
688 ((eq string-style ch)
689 ;; leaving the string
690 (setf string-style :none))
692 ;; If we have two kinds of string delimiters.
693 ;; There's no way to grok this scanning backwards.
694 (setf string-lossage t))))
695 (:comment
696 ;; We've already checked that it is the relevant comstyle.
697 (when (or (eq string-style :none)
698 comment-lossage
699 string-lossage)
700 ;; There are odd string quotes involved, so let's be careful.
701 ;; Test case in Pascal: " { " a { " } */
702 (signal 'syntax-lossage))
703 (if (not comment-nested)
704 ;; Record best comment-starter so far.
705 (setf comment-start-pos from
706 comment-start-aref from-aref)
707 (progn
708 (decf nesting)
709 (when (<= nesting)
710 ;; nested comments have to be balanced, so we don't need to
711 ;; keep looking for earlier ones. We use here the same (slightly
712 ;; incorrect) reasoning as below: since it is followed by uniform
713 ;; paired string quotes, this comment-start has to be outside of
714 ;; strings, else the comment-end itself would be inside a string.
715 (signal 'syntax-done)))))
716 (:end-comment
717 (cond ((and (eq comment-style (&syntax-flags-comment-style syntax))
718 (or (and com2end
719 (&syntax-flags-comment-nested prev-syntax))
720 (eq comment-nested (&syntax-flags-comment-nested syntax))))
721 ;; This is the same style of comment ender as ours.
722 (if comment-nested
723 (incf nesting)
724 ;; Anything before that can't count because it would match
725 ;; this comment-ender rather than ours.
726 (setf from stop)))
727 ((or (/= comment-start-pos 0)
728 (char/= ch #\Newline))
729 ;; We're mixing comment styles here, so we'd better be careful.
730 ;; The (comstart_pos != 0 || c != '\n') check is not quite correct
731 ;; (we should just always set comment_lossage), but removing it
732 ;; would imply that any multiline comment in C would go through
733 ;; lossage, which seems overkill.
734 ;; The failure should only happen in the rare cases such as
735 ;; { (* } *)
736 (setf comment-lossage t))))
737 (:open
738 ;; Assume a defun-start point is outside of strings.
739 (when (and *open-paren-in-column-0-is-defun-start*
740 (or (= from stop)
741 (progn
742 (setf temp-aref (aref-minus-1 from-aref buffer))
743 (char= (buffer-fetch-char temp-aref buffer) #\Newline))))
744 (setf defun-start from
745 defun-start-aref from-aref
746 ;; Break out of the loop.
747 from stop)))))))
749 (if (= comment-start-pos 0)
750 (setf from comment-end
751 from-aref comment-end-aref)
752 ;; If comstart_pos is set and we get here (ie. didn't jump to `lossage'
753 ;; or `done'), then we've found the beginning of the non-nested comment.
754 (setf from comment-start-pos
755 from-aref comment-start-aref)))
756 (syntax-lossage ()
757 ;; We had two kinds of string delimiters mixed up
758 ;; together. Decode this going forwards.
759 ;; Scan fwd from a known safe place (beginning-of-defun)
760 ;; to the one in question; this records where we
761 ;; last passed a comment starter.
762 ;; If we did not already find the defun start, find it now.
763 (when (= defun-start 0)
764 (let ((ret (find-defun-start comment-end comment-end-aref buffer table)))
765 (setf defun-start (parse-state-start-value ret)
766 defun-start-aref (parse-state-start-value-aref ret))))
767 (loop do
768 (let ((state (scan-sexps-forward defun-start defun-start-aref
769 comment-end -10000 0 nil 0 buffer table)))
770 (setf defun-start comment-end)
771 (if (and (eq (parse-state-in-comment state) comment-nested)
772 (eq (parse-state-comment-style state) comment-style))
773 (setf from (parse-state-comment-string-start state))
774 (progn
775 (setf from comment-end)
776 (when (parse-state-in-comment state) ; XXX
777 ;; If comment_end is inside some other comment, maybe ours
778 ;; is nested, so we need to try again from within the
779 ;; surrounding comment. Example: { a (* " *)
780 (setf defun-start (+ (parse-state-comment-string-start state) 2)
781 defun-start-aref (buffer-char-to-aref buffer defun-start))))))
782 while (< defun-start comment-end))
783 (setf from-aref (buffer-char-to-aref buffer from))))
784 (values (if (= from comment-end) -1 from)
785 from-aref)))
787 (defun &forward-comment (from from-aref stop nesting style prev-syntax buffer table)
788 "Jump over a comment, assuming we are at the beginning of one.
789 FROM is the current position.
790 FROM_BYTE is the bytepos corresponding to FROM.
791 Do not move past STOP (a charpos).
792 The comment over which we have to jump is of style STYLE
793 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
794 NESTING should be positive to indicate the nesting at the beginning
795 for nested comments and should be zero or negative else.
796 ST_COMMENT_STYLE cannot be nested.
797 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
798 (or nil If the search cannot start in the middle of a two-character).
800 If successful, return 1 and store the charpos of the comment's end
801 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
802 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
803 corresponding bytepos into *BYTEPOS_PTR and the current nesting
804 (as defined for state.incomment) in *INCOMMENT_PTR.
806 The comment end is the last character of the comment rather than the
807 character just after the comment.
809 Global syntax data is assumed to initially be valid for FROM and
810 remains valid for forward search starting at the returned position."
811 (let (c
813 code
814 (syntax prev-syntax))
815 (labels ((forward ()
816 (when (= from stop)
817 (return-from &forward-comment
818 (values nil from from-aref nesting)))
820 (setf c (buffer-fetch-char from-aref buffer)
821 code (&syntax c table)
822 syntax (&syntax-with-flags c table))
824 (when (and (eq code :end-comment)
825 (eq (&syntax-flags-comment-style syntax) style)
826 (if (&syntax-flags-comment-nested syntax)
827 (and (> nesting 0)
828 (progn (decf nesting)
829 (zerop nesting)))
830 (< nesting 0)))
831 ;; we have encountered a comment end of the same
832 ;; style as the comment sequence which began this
833 ;; comment section.
834 (throw :done nil))
835 (when (and (eq code :comment-fence)
836 (eq style :st-comment-style))
837 ;; we have encountered a comment end of the same style
838 ;; as the comment sequence which began this comment
839 ;; section.
840 (throw :done nil))
841 (when (and (> nesting 0)
842 (eq code :comment)
843 (&syntax-flags-comment-nested syntax)
844 (eq (&syntax-flags-comment-style syntax) style))
845 ;; we have encountered a nested comment of the same style
846 ;; as the comment sequence which began this comment section
847 (incf nesting))
848 (inc-both from from-aref buffer))
849 (do-comment ()
850 (when (and (< from stop)
851 (&syntax-flags-comment-end-first syntax)
852 (eq (&syntax-flags-comment-style syntax) style)
853 (progn
854 (setf c1 (buffer-fetch-char from-aref buffer))
855 (&syntax-comment-end-second c1 table))
856 (if (or (&syntax-flags-comment-nested syntax)
857 (&syntax-comment-nested c1 table))
858 (> nesting 0)
859 (< nesting 0)))
860 (decf nesting)
861 (if (<= nesting 0)
862 ;; we have encountered a comment end of the same style
863 ;; as the comment sequence which began this comment
864 ;; section
865 (throw :done nil)
866 (inc-both from from-aref buffer)))
867 (when (and (> nesting 0)
868 (< from stop)
869 (&syntax-flags-comment-start-first syntax)
870 (progn
871 (setf c1 (buffer-fetch-char from-aref buffer))
872 (eq (&syntax-comment-style c1 table) style))
873 (or (&syntax-flags-comment-nested syntax)
874 (&syntax-comment-nested c1 table)))
875 ;; we have encountered a nested comment of the same style
876 ;; as the comment sequence which began this comment
877 ;; section
878 (inc-both from from-aref buffer)
879 (incf nesting))))
880 ;; normalize nesting
881 (cond ((or (null nesting)
882 (<= nesting 0))
883 (setf nesting -1))
884 ((not (numberp nesting))
885 (setf nesting 1)))
886 ;; Enter the loop in the middle so that we find
887 ;; a 2-char comment ender if we start in the middle of it.
888 (catch :done
889 (if syntax
890 (progn
891 (do-comment)
892 (loop
893 (forward)
894 (do-comment)))
895 (loop
896 (forward)
897 (do-comment))))
898 (values t from from-aref))))
900 (defun prev-char-comment-end-first (pos pos-aref buffer table)
901 "Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE."
902 (dec-both pos pos-aref buffer)
903 (&syntax-comment-end-first (buffer-fetch-char pos-aref buffer)
904 table))
906 (defun &scan-lists (from count depth sexpflag &aux (buffer (current-buffer)))
907 "This is from the emacs function"
908 ;; the big TODO here is to use the CL readtable
909 (labels ((lose (last-good from)
910 (signal 'unbalanced-parenthesis :last-good last-good :from from)))
911 (let ((stop (if (> count 0) (zv buffer) (begv buffer)))
912 (from-aref (buffer-char-to-aref buffer from))
913 (min-depth (min 0 depth))
914 (table (syntax-table))
915 (last-good from)
916 quoted
917 math-exit
918 comment-start-first
919 code
920 ch ch1
921 temp-code
922 temp-pos
923 comment-nested
924 comment-style
925 found
926 prefix)
927 ;; normalize FROM
928 (setf from (max (min (zv buffer) from)
929 (begv buffer)))
930 (while (> count 0)
931 ;; the code needs to be able to jump out of the mess it got
932 ;; in.
933 (handler-case
934 (progn
935 (while (< from stop)
936 (catch :continue
937 (setf ch (buffer-fetch-char from-aref buffer)
938 code (&syntax ch table)
939 comment-start-first (&syntax-comment-start-first ch table)
940 comment-nested (&syntax-comment-nested ch table)
941 comment-style (&syntax-comment-style ch table)
942 prefix (&syntax-prefix ch table))
943 (when (= depth min-depth)
944 (setf last-good from))
945 (inc-both from from-aref buffer)
946 (when (and (< from stop) comment-start-first
947 (progn (setf ch (buffer-fetch-char from-aref buffer))
948 (&syntax-comment-start-second ch table))
949 *parse-sexp-ignore-comments*)
950 ;; we have encountered a comment start sequence and
951 ;; we are ignoring all text inside comments. We
952 ;; must record the comment style this sequence
953 ;; begins so that later, only a comment end of the
954 ;; same style actually ends the comment section
955 (setf code :comment
956 ch1 (buffer-fetch-char from-aref buffer)
957 comment-style (&syntax-comment-style ch1 table)
958 comment-nested (or comment-nested
959 (&syntax-comment-nested ch1 table)))
960 (inc-both from from-aref buffer))
962 (when prefix
963 (throw :continue nil))
965 (when (or (eq code :escape)
966 (eq code :character-quote))
967 (when (= from stop) (lose last-good from))
968 (inc-both from from-aref buffer)
969 ;; treat following character as a word constituent
970 (setf code :word-constituent))
972 (case code
973 ((:word-constituent :symbol-constituent)
974 (unless (or (not (zerop depth))
975 (not sexpflag))
976 (let (temp)
977 (while (< from stop)
978 (setf ch (buffer-fetch-char from-aref buffer)
979 temp (&syntax ch table))
980 (case temp
981 ((:escape :character-quote)
982 (inc-both from from-aref buffer)
983 (when (= from stop) (lose last-good from)))
984 ((:word-constituent :symbol-constituent :quote))
986 (signal 'syntax-done)))
987 (inc-both from from-aref buffer)))
988 (signal 'syntax-done)))
989 ((:comment-fence :comment)
990 (when (eq code :comment-fence)
991 (setf comment-style :st-comment-style))
992 (multiple-value-setq (found from from-aref) (&forward-comment from from-aref stop comment-nested comment-style nil buffer table))
993 (unless found
994 (when (zerop depth) (signal 'syntax-done))
995 (lose last-good from))
996 (inc-both from from-aref buffer))
997 (:math
998 (when sexpflag
999 (when (and (/= from stop)
1000 (char= ch (buffer-fetch-char from-aref buffer)))
1001 (inc-both from from-aref buffer))
1002 (if math-exit
1003 (progn
1004 (setf math-exit nil)
1005 (decf depth)
1006 (when (zerop depth) (signal 'syntax-done))
1007 (when (< depth min-depth)
1008 (signal 'expression-ends-prematurely))) ; XXX
1009 (progn
1010 (setf math-exit t)
1011 (incf depth)
1012 (when (zerop depth) (signal 'syntax-done))))))
1013 (:open
1014 (incf depth)
1015 (when (zerop depth) (signal 'syntax-done)))
1016 (:close
1017 (decf depth)
1018 (when (zerop depth) (signal 'syntax-done))
1019 (when (< depth min-depth)
1020 (signal 'expression-ends-prematurely)))
1021 ((:string :string-fence)
1022 (let* ((tmp-pos (aref-minus-1 from-aref buffer))
1023 (string-term (buffer-fetch-char tmp-pos buffer))
1024 temp)
1025 (loop
1026 (when (>= from stop) (lose last-good from))
1027 (setf ch (buffer-fetch-char from-aref buffer))
1028 (when (if (eq code :string)
1029 (and (char= ch string-term)
1030 (eq (&syntax ch table) :string))
1031 (eq (&syntax ch table) :string-fence))
1032 (return nil))
1033 (setf temp (&syntax ch table))
1034 (case temp
1035 ((:character-quote :escape)
1036 (inc-both from from-aref buffer)))
1037 (inc-both from from-aref buffer))
1038 (inc-both from from-aref buffer)
1039 (when (and (zerop depth)
1040 sexpflag)
1041 (signal 'syntax-done))))
1043 ;; Ignore whitespace, punctuation, quote, endcomment.
1044 ))))
1045 (unless (zerop depth) (lose last-good from))
1046 (return-from &scan-lists nil))
1047 (syntax-done ()
1048 (decf count))))
1050 (while (< count 0)
1051 (handler-case
1052 (progn
1053 (while (> from stop)
1054 (catch :continue
1055 (dec-both from from-aref buffer)
1056 (setf ch (buffer-fetch-char from-aref buffer)
1057 code (&syntax ch table))
1058 (when (= depth min-depth)
1059 (setf last-good from))
1060 (setf comment-style nil
1061 comment-nested (&syntax-comment-nested ch table))
1062 (when (eq code :end-comment)
1063 (setf comment-style (&syntax-comment-style ch table)))
1064 (when (and (> from stop)
1065 (&syntax-comment-end-second ch table)
1066 (prev-char-comment-end-first from from-aref buffer table)
1067 *parse-sexp-ignore-comments*)
1068 ;; We must record the comment style
1069 ;; encountered so that later, we can match
1070 ;; only the proper comment begin sequence of
1071 ;; the same style.
1072 (dec-both from from-aref buffer)
1073 (setf code :end-comment
1074 ch1 (buffer-fetch-char from-aref buffer)
1075 comment-nested (or comment-nested
1076 (&syntax-comment-nested ch1 table))))
1077 ;; Quoting turns anything except a comment-ender
1078 ;; into a word character. Note that this cannot
1079 ;; be true if we decremented FROM in the
1080 ;; if-statement above.
1081 (cond
1082 ((and (not (eq code :end-comment))
1083 (&char-quoted from from-aref buffer table))
1084 (dec-both from from-aref buffer)
1085 (setf code :word))
1086 ((&syntax-prefix ch table)
1087 ;; loop around again. I think this is nasty but fuckit.
1088 (throw :continue nil)))
1089 (case code
1090 ((:word-constituent :symbol-constituent :escape :character-quote)
1091 (unless (or (not (zerop depth))
1092 (not sexpflag))
1093 ;; This word counts as a sexp; count
1094 ;; object finished after passing it.
1095 (while (> from stop)
1096 (setf temp-pos from-aref)
1097 (decf temp-pos)
1098 (setf ch1 (buffer-fetch-char temp-pos buffer)
1099 temp-code (&syntax ch1 table))
1100 ;; Don't allow comment-end to be quoted.
1101 (when (eq temp-code :end-comment)
1102 (signal 'syntax-done))
1103 (setf quoted (&char-quoted (1- from) temp-pos buffer table))
1104 (when quoted
1105 (dec-both from from-aref buffer)
1106 (setf temp-pos (aref-minus-1 temp-pos buffer)))
1107 (setf ch1 (buffer-fetch-char temp-pos buffer)
1108 temp-code (&syntax ch1 table))
1109 (when (not (or quoted
1110 (eq temp-code :word-constituent)
1111 (eq temp-code :symbol-constituent)
1112 (eq temp-code :quote)))
1113 (signal 'syntax-done))
1114 (dec-both from from-aref buffer))
1115 (signal 'syntax-done)))
1116 (:math
1117 (when sexpflag
1118 (setf temp-pos (aref-minus-1 from-aref buffer))
1119 (when (and (/= from stop)
1120 (char= ch (buffer-fetch-char temp-pos buffer)))
1121 (dec-both from from-aref buffer))
1122 (if math-exit
1123 (progn
1124 (setf math-exit nil)
1125 (decf depth)
1126 (when (zerop depth) (signal 'syntax-done))
1127 (when (< depth min-depth)
1128 (signal 'expression-ends-prematurely)))
1129 (progn
1130 (setf math-exit t)
1131 (incf depth)
1132 (when (zerop depth) (signal 'syntax-done))))))
1133 (:close
1134 (incf depth)
1135 (when (zerop depth) (signal 'syntax-done)))
1136 (:open
1137 (decf depth)
1138 (when (zerop depth) (signal 'syntax-done))
1139 (when (< depth min-depth)
1140 (signal 'expression-ends-prematurely)))
1141 (:end-comment
1142 (when *parse-sexp-ignore-comments*
1143 (multiple-value-bind (found char-pos aref-pos)
1144 (&back-comment from from-aref stop comment-nested comment-style buffer table)
1145 (when (eq found :not-comment-end)
1146 (setf from char-pos
1147 from-aref aref-pos)))))
1148 ((:comment-fence :string-fence)
1149 (loop
1150 (when (= from stop) (lose last-good from))
1151 (dec-both from from-aref buffer)
1152 (when (and (not (&char-quoted from from-aref buffer table))
1153 (progn
1154 (setf ch (buffer-fetch-char from-aref buffer))
1155 (eq (&syntax ch table) code)))
1156 (return nil)))
1157 (when (and (eq code :string-fence)
1158 (zerop depth)
1159 sexpflag)
1160 (signal 'syntax-done)))
1161 (:string
1162 (let ((string-term (buffer-fetch-char from-aref buffer)))
1163 (loop
1164 (when (= from stop) (lose last-good from))
1165 (dec-both from from-aref buffer)
1166 (when (and (not (&char-quoted from from-aref buffer table))
1167 (progn
1168 (setf ch (buffer-fetch-char from-aref buffer))
1169 (char= string-term ch))
1170 (eq (&syntax ch table) :string))
1171 (return nil)))
1172 (when (and (zerop depth)
1173 sexpflag)
1174 (signal 'syntax-done))))
1176 ;; Ignore whitespace, punctuation, quote, endcomment.
1177 ))))
1178 (when (not (zerop depth)) (lose last-good from))
1179 (return-from &scan-lists nil))
1180 (syntax-done ()
1181 (incf count))))
1182 from)))
1184 (defun scan-lists (from count depth)
1185 "Scan from character number FROM by COUNT lists.
1186 Returns the character number of the position thus found.
1188 If DEPTH is nonzero, paren depth begins counting from that value,
1189 only places where the depth in parentheses becomes zero
1190 are candidates for stopping; COUNT such places are counted.
1191 Thus, a positive value for DEPTH means go out levels.
1193 Comments are ignored if `*parse-sexp-ignore-comments*' is non-nil.
1195 If the beginning or end of (the accessible part of) the buffer is reached
1196 and the depth is wrong, an error is signaled.
1197 If the depth is right but the count is not used up, nil is returned."
1198 (check-type from number)
1199 (check-type count number)
1200 (check-type depth number)
1201 (&scan-lists from count depth nil))
1203 (defun scan-sexps (from count)
1204 "Scan from character number FROM by COUNT balanced expressions.
1205 If COUNT is negative, scan backwards.
1206 Returns the character number of the position thus found.
1208 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1210 If the beginning or end of (the accessible part of) the buffer is reached
1211 in the middle of a parenthetical grouping, an error is signaled.
1212 If the beginning or end is reached between groupings
1213 but before count is used up, nil is returned."
1214 (check-type from number)
1215 (check-type count number)
1216 (&scan-lists from count 0 t))
1218 (defun backward-prefix-chars (&aux (buffer (current-buffer)) (table (syntax-table)))
1219 "Move point backward over any number of chars with prefix syntax.
1220 This includes chars with \"quote\" or \"prefix\" syntax (' or p)."
1221 (let* ((beg (begv buffer))
1222 (pos (point buffer))
1223 (pos-aref (buffer-char-to-aref buffer pos))
1224 (opoint (point buffer))
1225 (opoint-aref (buffer-char-to-aref buffer pos))
1227 (when (<= pos beg)
1228 ;; SET_PT_BOTH (opoint, opoint_byte);
1229 (return-from backward-prefix-chars nil))
1231 (dec-both pos pos-aref buffer)
1232 (while (and (not (&char-quoted pos pos-aref buffer table))
1233 (progn
1234 (setf c (buffer-fetch-char pos-aref buffer))
1235 (or (eq (&syntax c table) :quote)
1236 (&syntax-prefix c table))))
1237 (setf opoint pos
1238 opoint-aref pos-aref)
1239 (when (> (1+ pos) beg)
1240 (dec-both pos pos-aref buffer)))
1241 (set-point-both buffer opoint opoint-aref)
1242 nil))
1244 (defstruct parse-state
1245 depth min-depth
1246 this-level-start
1247 prev-level-start
1248 location
1249 level-starts
1250 quoted
1251 in-comment
1252 comment-style
1253 comment-string-start
1254 in-string
1255 start-value
1256 start-value-aref)
1258 (defstruct syntax-level
1259 last prev)
1261 ;; this function cries out for continuations. you almost have to look
1262 ;; at the C code to understand what's going on here, i bet. Hell, I
1263 ;; don't even understand it.
1264 (defun scan-sexps-forward (from from-aref end target-depth stop-before old-state comment-stop buffer table)
1265 "Parse forward from FROM / FROM_BYTE to END,
1266 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1267 and return a description of the state of the parse at END.
1268 If STOPBEFORE is nonzero, stop at the start of an atom.
1269 If COMMENTSTOP is 1, stop at the start of a comment.
1270 If COMMENTSTOP is -1, stop at the start or end of a comment,
1271 after the beginning of a string, or after the end of a string."
1272 ;;(message "scan-sexps-forward ~@{~a ~}" from from-aref end target-depth stop-before old-state comment-stop buffer table)
1273 (let ((state (make-parse-state))
1274 (prev-from from)
1275 (prev-from-aref from-aref)
1276 prev-from-syntax
1277 (boundary-stop (null comment-stop))
1278 no-fence
1280 code
1281 comment-nested
1282 depth
1283 min-depth
1284 temp
1285 start-quoted
1286 levels)
1287 (labels ((inc-from ()
1288 (setf prev-from from
1289 prev-from-aref from-aref
1290 temp (buffer-fetch-char prev-from-aref buffer)
1291 prev-from-syntax (&syntax-with-flags temp table))
1292 (inc-both from from-aref buffer))
1293 (cur-level ()
1294 "Return the current level struct"
1295 (car levels))
1296 (do-start-in-comment ()
1297 ;; The (from == BEGV) test was to enter the loop in the middle so
1298 ;; that we find a 2-char comment ender even if we start in the
1299 ;; middle of it. We don't want to do that if we're just at the
1300 ;; beginning of the comment (think of (*) ... (*)).
1301 (multiple-value-bind (found out-char out-aref in-comment)
1302 (&forward-comment from from-aref end
1303 (parse-state-in-comment state)
1304 (parse-state-comment-style state)
1305 (if (or (eq from (begv buffer))
1306 (< from (+ (parse-state-comment-string-start state) 3)))
1307 nil prev-from-syntax)
1308 buffer table)
1309 (setf from out-char
1310 from-aref out-aref
1311 (parse-state-in-comment state) in-comment)
1312 ;; Beware! prev_from and friends are invalid now.
1313 ;; Luckily, the `done' doesn't use them and the INC_FROM
1314 ;; sets them to a sane value without looking at them.
1315 (unless found (throw :end :done))
1316 (inc-from)
1317 (setf (parse-state-in-comment state) nil
1318 (parse-state-comment-style state) nil) ; reset the comment style
1319 (when boundary-stop (throw :end :done))))
1320 (do-sym-done ()
1321 ;;(message "do-sym-done ~s" (parse-state-level-starts state))
1322 (setf (syntax-level-prev (cur-level)) (syntax-level-last (cur-level))))
1323 (do-sym-started ()
1324 ;; (message "do-sym-started")
1325 (while (< from end)
1326 (case (&syntax (buffer-fetch-char from-aref buffer) table)
1327 ((:escape :character-quote)
1328 (inc-from)
1329 (when (= from end)
1330 (throw :end :end-quoted)))
1331 ((:word-constituent :symbol-constituent :quote))
1333 (do-sym-done)
1334 (return nil)))
1335 (inc-from)))
1336 (do-start-quoted ()
1337 (when (= from end) (throw :end :end-quoted))
1338 (inc-from)
1339 (do-sym-started))
1340 (do-in-string-loop ()
1341 (loop
1342 (let (c)
1343 (when (>= from end) (throw :end :done))
1344 (setf c (buffer-fetch-char from-aref buffer)
1345 temp (&syntax c table))
1346 ;; Check TEMP here so that if the char has
1347 ;; a syntax-table property which says it is NOT
1348 ;; a string character, it does not end the string.
1349 (when (and no-fence
1350 (equal c (parse-state-in-string state))
1351 (eq temp :string))
1352 (return nil))
1353 (case temp
1354 (:string-fence
1355 (unless no-fence (return nil)))
1356 ((:character-quote :escape)
1357 (inc-from)
1358 (when (>= from end) (throw :end :end-quoted))))
1359 (inc-from))))
1360 (do-string-end ()
1361 ;;(message "do-string-end ~s" (parse-state-level-starts state))
1362 (setf (parse-state-in-string state) nil
1363 (syntax-level-prev (cur-level)) (syntax-level-last (cur-level)))
1364 (inc-from)
1365 (when boundary-stop (throw :end :done)))
1366 (do-start-in-string ()
1367 (setf no-fence (not (eq (parse-state-in-string state) :st-string-style)))
1368 (do-in-string-loop)
1369 (do-string-end))
1370 (do-start-quoted-in-string ()
1371 (when (>= from end) (throw :end :end-quoted))
1372 (inc-from)
1373 (do-in-string-loop)))
1375 (when (/= from (begv buffer))
1376 (dec-both prev-from prev-from-aref buffer))
1378 (if old-state
1379 (progn
1380 (setf state old-state
1381 start-quoted (parse-state-quoted state)
1382 depth (or (parse-state-depth state) 0)
1383 start-quoted (parse-state-quoted state))
1384 (dolist (i (parse-state-level-starts state))
1385 (push (make-syntax-level :last i) levels))
1386 ;; make sure we have at least one in the list
1387 (unless levels
1388 (push (make-syntax-level) levels)))
1389 (setf depth 0
1390 state (make-parse-state)
1391 levels (list (make-syntax-level))))
1393 ;;(message "top ~s" (parse-state-level-starts state))
1395 (setf (parse-state-quoted state) nil
1396 min-depth depth)
1398 (setf temp (buffer-fetch-char prev-from-aref buffer)
1399 prev-from-syntax (&syntax-with-flags temp table))
1401 ;; "Enter" the loop at a place appropriate for initial state. In
1402 ;; the C code this is a bunch of goto's. Here we call the
1403 ;; appropriate function that sync's us so we're ready to enter
1404 ;; the loop.
1405 (cond ((parse-state-in-comment state)
1406 (do-start-quoted))
1407 ((parse-state-in-string state)
1408 (setf no-fence (not (eq (parse-state-in-string state) :st-string-style)))
1409 (if start-quoted
1410 (do-start-quoted-in-string)
1411 (do-start-in-string)))
1412 (start-quoted
1413 (do-start-quoted)))
1414 ;; (message "sane here")
1415 (case
1416 (catch :end
1417 (while (< from end)
1418 (catch :continue
1419 (inc-from)
1420 (setf code (&syntax-flags-syntax prev-from-syntax))
1421 ;; (message "here the code is ~s" code)
1422 (cond ((and (< from end)
1423 (&syntax-flags-comment-start-first prev-from-syntax)
1424 (progn
1425 (setf c1 (buffer-fetch-char from-aref buffer))
1426 (&syntax-comment-start-second c1 table)))
1427 ;; (message "here 1")
1428 ;; Record the comment style we have entered so that only
1429 ;; the comment-end sequence of the same style actually
1430 ;; terminates the comment section.
1431 (setf (parse-state-comment-style state) (&syntax-comment-style c1 table)
1432 comment-nested (&syntax-flags-comment-nested prev-from-syntax)
1433 comment-nested (or comment-nested
1434 (&syntax-comment-nested c1 table))
1435 (parse-state-in-comment state) comment-nested
1436 (parse-state-comment-string-start state) prev-from)
1437 (inc-from)
1438 (setf code :comment))
1439 ((eq code :comment-fence)
1440 ;; (message "here 2")
1441 ;; Record the comment style we have entered so that only
1442 ;; the comment-end sequence of the same style actually
1443 ;; terminates the comment section.
1444 (setf (parse-state-comment-style state) :st-comment-style
1445 (parse-state-in-comment state) -1 ; XXX
1446 (parse-state-comment-string-start state) prev-from
1447 code :comment))
1448 ((eq code :comment)
1449 ;; (message "here 3")
1450 (setf (parse-state-comment-style state) (&syntax-flags-comment-style prev-from-syntax)
1451 (parse-state-in-comment state) (&syntax-flags-comment-nested prev-from-syntax)
1452 (parse-state-comment-string-start state) prev-from)))
1454 (when (&syntax-flags-prefix prev-from-syntax)
1455 (throw :continue nil))
1457 ;;(message "code: ~s" code)
1458 (case code
1459 ((:escape :character-quote)
1460 ;; this arg means stop at sexp start
1461 (when stop-before (throw :end :stop))
1462 ;;(message ":escae ~s" (parse-state-level-starts state))
1463 (setf (syntax-level-last (cur-level)) prev-from)
1464 (do-start-quoted))
1466 ((:word-constituent :symbol-constituent)
1467 (when stop-before (throw :end :stop))
1468 ;;(message ":word-con ~s" (parse-state-level-starts state))
1469 (setf (syntax-level-last (cur-level)) prev-from)
1470 (do-sym-started))
1472 ((:comment-fence :comment)
1473 (when (or comment-stop
1474 boundary-stop)
1475 (throw :end :done))
1476 (do-start-in-comment))
1478 (:open
1479 (when stop-before (throw :end :stop))
1480 (incf depth)
1481 ;;(message ":open ~s" (parse-state-level-starts state))
1482 (setf (syntax-level-last (cur-level)) prev-from)
1483 ;; (message ":open ~a" (parse-state-level-starts state))
1484 (push (make-syntax-level) levels)
1485 ;; (when (> (length level-list) 100) ; XXX hardcoded
1486 ;; (error "nesting too deep for parser"))
1487 (when (= target-depth depth) (throw :end :done)))
1489 (:close
1490 (decf depth)
1491 (when (< depth min-depth)
1492 (setf min-depth depth))
1493 (unless (= (length levels) 1)
1494 (message "XXX: popping when levels is 1!")
1495 (pop levels))
1496 (setf (syntax-level-prev (cur-level)) (syntax-level-last (cur-level)))
1497 (when (= target-depth depth)
1498 (throw :end :done)))
1500 ((:string :string-fence)
1501 (setf (parse-state-comment-string-start state) (1- from))
1502 (when stop-before
1503 (throw :end :stop))
1504 (setf (syntax-level-last (cur-level)) prev-from)
1505 (setf (parse-state-in-string state) (if (eq code :string)
1506 (buffer-fetch-char prev-from-aref buffer)
1507 :st-string-style))
1508 (when boundary-stop
1509 (throw :end :done))
1510 (do-start-in-string))
1512 (:math
1513 ;; FIXME: We should do something with it.
1516 ;; Ignore whitespace, punctuation, quote, endcomment.
1517 ))))
1518 :done)
1519 (:stop
1520 ;; Here if stopping before start of sexp.
1521 ;; We have just fetched the char that starts it
1522 ;; but return the position before it.
1523 (setf from prev-from))
1524 (:end-quoted
1525 (setf (parse-state-quoted state) t)))
1527 ;;(message ":end ~s" (parse-state-level-starts state))
1528 ;; done
1529 (setf (parse-state-depth state) depth
1530 (parse-state-min-depth state) min-depth
1531 (parse-state-this-level-start state) (syntax-level-prev (cur-level))
1532 (parse-state-prev-level-start state) (if (<= (length levels) 1)
1533 nil (syntax-level-last (second levels)))
1534 (parse-state-location state) from
1535 (parse-state-level-starts state) (mapcar 'syntax-level-last (cdr levels)))
1536 state)))
1538 (defun parse-partial-sexp (from to &key (target-depth -100000) stop-before old-state comment-stop &aux (buffer (current-buffer)) (table (syntax-table)))
1539 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1540 Parsing stops at TO or when certain criteria are met;
1541 point is set to where parsing stops.
1542 If fifth arg OLDSTATE is omitted or nil,
1543 parsing assumes that FROM is the beginning of a function.
1544 Value is a list of elements describing final state of parsing:
1545 0. depth in parens. parse-state-depth
1546 1. character address of start of innermost containing list; nil if none. parse-state-prev-level-start
1547 2. character address of start of last complete sexp terminated. parse-state-this-level-start
1548 3. non-nil if inside a string.
1549 (it is the character that will terminate the string,
1550 or t if the string should be terminated by a generic string delimiter.) parse-state-in-string
1551 4. nil if outside a comment, t if inside a non-nestable comment,
1552 else an integer (the current comment nesting). parse-state-in-comment
1553 5. t if following a quote character. parse-state-quoted
1554 6. the minimum paren-depth encountered during this scan. parse-state-min-depth
1555 7. t if in a comment of style b; symbol `syntax-table' if the comment
1556 should be terminated by a generic comment delimiter. parse-state-comment-style
1557 8. character address of start of comment or string; nil if not in one. parse-state-in-comment
1558 9. Intermediate data for continuation of parsing (subject to change). parse-state-level-starts
1559 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1560 in parentheses becomes equal to TARGETDEPTH.
1561 Fourth arg STOPBEFORE non-nil means stop when come to
1562 any character that starts a sexp.
1563 Fifth arg OLDSTATE is a list like what this function returns.
1564 It is used to initialize the state of the parse. Elements number 1, 2, 6
1565 and 8 are ignored.
1566 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1567 If it is symbol `syntax-table', stop after the start of a comment or a
1568 string, or after end of a comment or a string."
1569 (check-type target-depth number)
1570 (multiple-value-setq (from to) (validate-region from to buffer))
1571 (let ((state (scan-sexps-forward from (buffer-char-to-aref buffer from) to
1572 target-depth (not (null stop-before)) old-state
1573 ;; XXX
1574 (if comment-stop
1575 (if (eq comment-stop 'syntax-table) -1 1)
1577 buffer table)))
1578 (goto-char (parse-state-location state) buffer)
1579 state))