70cdb3c79eb2960639b81a28b2e8f592d36c0a37
[lice.git] / syntax.lisp
blob70cdb3c79eb2960639b81a28b2e8f592d36c0a37
1 ;;; Cheap syntax functions
3 (in-package :lice)
5 (defparameter +syntax-classes+
6 '(:whitespace :punctuation :word-constituent :symbol-constituent :open :close :quote :string :math :escape
7 :character-quote :comment :end-comment :inherit :comment-fence :string-fence))
9 (deftype syntax-class ()
10 '(member :whitespace ; for a whitespace character
11 :punctuation ; for random punctuation characters
12 :word-constituent ; for a word constituent
13 :symbol-constituent ; symbol constituent but not word constituent
14 :open ; for a beginning delimiter
15 :close ; for an ending delimiter
16 :quote ; for a prefix character like Lisp '
17 :string ; for a string-grouping character like Lisp "
18 :math ; for delimiters like $ in Tex.
19 :escape ; for a character that begins a C-style escape
20 :character-quote ; for a character that quotes the
21 ; following character
22 :comment ; for a comment-starting character
23 :end-comment ; for a comment-ending character
24 :inherit ; use the standard syntax table for
25 ; this character
26 :comment-fence ; Starts/ends comment which is
27 ; delimited on the other side by any
28 ; char with the same syntaxcode.
29 :string-fence ; Starts/ends string which is delimited
30 ; on the other side by any char with
31 ; the same syntaxcode.
34 (defclass syntax-table ()
35 ((hash :initform (make-hash-table :test 'equal) :initarg :hash :accessor syntax-table-hash)
36 (parent :initform nil :initarg :parent :accessor syntax-table-parent))
37 (:documentation "A syntax table class."))
39 (defstruct syntax-descriptor
40 (class nil :type syntax-class)
41 ;; FIXME: this will be a bitvector at some point but for now just
42 ;; make it a list.
43 (flags nil :type list)
44 ;; this is where the terminator paren char etc is stored
45 extra)
47 (defun set-raw-syntax-entry (table char descriptor)
48 (setf (gethash char (syntax-table-hash table)) descriptor))
50 (defvar *syntax-code-object* (loop for i in +syntax-classes+
51 collect i
52 collect (make-syntax-descriptor :class i :flags nil))
53 "A pool of syntax descriptors to be shared in the standard
54 syntax table in an attempt to save memory. FIXME: premature
55 optimization?")
57 (defvar *standard-syntax-table*
58 (let ((table (make-instance 'syntax-table :parent nil))
59 tmp)
60 ;; Control characters should not be whitespace.
61 (setf tmp (getf *syntax-code-object* :punctuation))
62 (loop for i below (char-code #\Space) do
63 (set-raw-syntax-entry table (code-char i) tmp))
64 ;; Except that a few really are whitespace.
65 (setf tmp (getf *syntax-code-object* :whitespace))
66 (set-raw-syntax-entry table #\Space tmp)
67 (set-raw-syntax-entry table #\Tab tmp)
68 (set-raw-syntax-entry table #\Newline tmp)
69 (set-raw-syntax-entry table #\Return tmp)
70 (set-raw-syntax-entry table #\Page tmp)
72 (setf tmp (getf *syntax-code-object* :word-constituent))
73 (loop
74 for i from (char-code #\a) to (char-code #\z)
75 for j from (char-code #\A) to (char-code #\Z) do
76 (set-raw-syntax-entry table (code-char i) tmp)
77 (set-raw-syntax-entry table (code-char j) tmp))
78 (loop for i from (char-code #\0) to (char-code #\9) do
79 (set-raw-syntax-entry table (code-char i) tmp))
81 (set-raw-syntax-entry table #\$ tmp)
82 (set-raw-syntax-entry table #\% tmp)
84 (set-raw-syntax-entry table #\( (make-syntax-descriptor :class :open :extra ")"))
85 (set-raw-syntax-entry table #\) (make-syntax-descriptor :class :close :extra "("))
86 (set-raw-syntax-entry table #\[ (make-syntax-descriptor :class :open :extra "]"))
87 (set-raw-syntax-entry table #\] (make-syntax-descriptor :class :close :extra "["))
88 (set-raw-syntax-entry table #\{ (make-syntax-descriptor :class :open :extra "}"))
89 (set-raw-syntax-entry table #\} (make-syntax-descriptor :class :close :extra "{"))
91 (set-raw-syntax-entry table #\" (make-syntax-descriptor :class :string))
92 (set-raw-syntax-entry table #\\ (make-syntax-descriptor :class :escape))
94 (setf tmp (getf *syntax-code-object* :symbol-constituent))
95 (loop for i across "_-+*/&|<>=" do
96 (set-raw-syntax-entry table i tmp))
98 (setf tmp (getf *syntax-code-object* :punctuation))
99 (loop for i across ".,;:?!#@~^'`" do
100 (set-raw-syntax-entry table i tmp))
101 ;; TODO: i18n characters
102 table)
103 "The standard syntax table")
105 (defun make-syntax-table (&optional (parent *standard-syntax-table*))
106 "Return a new syntax table.
107 Create a syntax table which inherits from parent (if non-nil) or
108 from `standard-syntax-table' otherwise."
109 (make-instance 'syntax-table
110 :parent parent))
112 (defun copy-syntax-table (&optional (table *standard-syntax-table*))
113 "Construct a new syntax table and return it.
114 It is a copy of the TABLE, which defaults to the standard syntax table."
115 (let* ((hash (make-hash-table)))
116 (maphash (lambda (k v)
117 (setf (gethash k hash) v))
118 (syntax-table-hash table))
119 (make-instance 'syntax-table
120 :hash hash :parent (syntax-table-parent table))))
122 (defun modify-syntax-entry (char class &key flags extra (table (syntax-table)))
123 "Set syntax for character CHAR according to CLASS, FLAGS, and EXTRA."
124 (check-type char character)
125 (check-type class syntax-class)
126 (check-type flags list)
127 (check-type table syntax-table)
128 (set-raw-syntax-entry table char
129 (make-syntax-descriptor :class class :flags flags :extra extra)))
131 (defun char-syntax (character &optional (table (syntax-table)))
132 "Return the syntax code of CHARACTER, described by a character.
133 For example, if CHARACTER is a word constituent,
134 the symbol `:WORD-CONSTITUENT' is returned."
135 (let ((descr (gethash character (syntax-table-hash table))))
136 (when descr
137 (syntax-descriptor-class descr))))
139 (defparameter +word-constituents+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
141 (defvar *words-include-escapes* nil
142 "Non-nil means `forward-word', etc., should treat escape chars part of words.")
144 (defun syntax-after (pos &aux (buffer (current-buffer)))
145 "Return the raw syntax of the char after POS.
146 If POS is outside the buffer's accessible portion, return nil."
147 (unless (or (< pos (point-min)) (>= pos (point-max)))
148 (let* ((ch (buffer-char-after buffer pos))
149 (descr (and ch (gethash ch (syntax-table buffer)))))
150 descr)))
152 ;; FIXME: having the flags as a list is memory intensive. How about a
153 ;; bit vector or number and a function that converts between the two?
155 (defun buffer-syntax-table (buffer)
156 (major-mode-syntax-table (buffer-major-mode buffer)))
158 (defun syntax-table (&aux (buffer (current-buffer)))
159 (buffer-syntax-table buffer))
161 ;; (defun (setf syntax-table) (value &aux (buffer (current-buffer)))
162 ;; "Select a new syntax table for the current buffer. One argument, a syntax table."
163 ;; (check-type value syntax-table)
164 ;; (setf (buffer-syntax-table buffer) value))
166 ;; ;; The above looks a bit weird so lets also have a set function.
167 ;; (defun set-syntax-table (value)
168 ;; "Select a new syntax table for the current buffer. One argument, a syntax table."
169 ;; (setf (syntax-table) value))
171 (defun &syntax-with-flags (ch table &optional (default :whitespace))
172 (or (gethash ch (syntax-table-hash table))
173 ;; try the parent
174 (and (syntax-table-parent table)
175 (&syntax-with-flags ch (syntax-table-parent table) default))
176 ;; return the default
177 (make-syntax-descriptor :class default)))
179 (defun &syntax (ch table &optional (default :whitespace))
180 (let ((descr (&syntax-with-flags ch table default)))
181 (if descr
182 (syntax-descriptor-class descr)
183 default)))
185 (defun &syntax-flags-syntax (syntax)
186 (syntax-descriptor-class syntax))
188 (defun &syntax-comment-start-first (ch table)
189 (let ((descr (&syntax-with-flags ch table)))
190 (when descr
191 (and (find :comment-start-first (syntax-descriptor-flags descr)) t))))
193 (defun &syntax-flags-comment-start-first (syntax)
194 (and (find :comment-start-first (syntax-descriptor-flags syntax)) t))
196 (defun &syntax-comment-start-second (ch table)
197 (let ((descr (&syntax-with-flags ch table)))
198 (when descr
199 (and (find :comment-start-second (syntax-descriptor-flags descr)) t))))
201 (defun &syntax-flags-comment-start-second (syntax)
202 (and (find :comment-start-second (syntax-descriptor-flags syntax)) t))
204 (defun &syntax-comment-end-first (ch table)
205 (let ((descr (&syntax-with-flags ch table)))
206 (when descr
207 (and (find :comment-end-first (syntax-descriptor-flags descr)) t))))
209 (defun &syntax-flags-comment-end-first (syntax)
210 (and (find :comment-end-first (syntax-descriptor-flags syntax)) t))
212 (defun &syntax-comment-end-second (ch table)
213 (let ((descr (&syntax-with-flags ch table)))
214 (when descr
215 (and (find :comment-end-second (syntax-descriptor-flags descr)) t))))
217 (defun &syntax-flags-comment-end-second (syntax)
218 (and (find :comment-end-second (syntax-descriptor-flags syntax)) t))
220 (defun &syntax-prefix (ch table)
221 (let ((descr (&syntax-with-flags ch table)))
222 (when descr
223 (and (find :prefix (syntax-descriptor-flags descr)) t))))
225 (defun &syntax-flags-prefix (syntax)
226 (and (find :prefix (syntax-descriptor-flags syntax)) t))
228 (defun &syntax-comment-style (ch table)
229 (let ((descr (&syntax-with-flags ch table)))
230 (when descr
231 (and (find :comment-style (syntax-descriptor-flags descr)) t))))
233 (defun &syntax-flags-comment-style (syntax)
234 (and (find :comment-style (syntax-descriptor-flags syntax)) t))
236 (defun &syntax-comment-nested (ch table)
237 (let ((descr (&syntax-with-flags ch table)))
238 (when descr
239 (and (find :comment-nested (syntax-descriptor-flags descr)) t))))
241 (defun &syntax-flags-comment-nested (syntax)
242 (and (find :comment-nested (syntax-descriptor-flags syntax)) t))
244 (defmacro with-syntax-table (table &body body)
245 "Evaluate BODY with syntax table of current buffer set to TABLE.
246 The syntax table of the current buffer is saved, BODY is evaluated, and the
247 saved table is restored, even in case of an abnormal exit.
248 Value is what BODY returns."
249 (let ((old-table (gensym "TABLE"))
250 (old-buffer (gensym "BUFFER")))
251 `(let ((,old-table (syntax-table))
252 (,old-buffer (current-buffer)))
253 (unwind-protect
254 (progn
255 (set-syntax-table ,table)
256 ,@body)
257 (save-current-buffer
258 (set-buffer ,old-buffer)
259 (set-syntax-table ,old-table))))))
261 (defun syntax-class (syntax)
262 "Return the syntax class part of the syntax descriptor SYNTAX.
263 If SYNTAX is nil, return nil."
264 (&syntax-flags-syntax syntax))
266 (defun scan-words (from count)
267 "Return the position across COUNT words from FROM.
268 If that many words cannot be found before the end of the buffer,
269 return NIL. COUNT negative means scan backward and stop at word
270 beginning."
271 (let* ((buffer (current-buffer))
272 (beg (begv buffer))
273 (end (zv buffer))
274 (from-aref (buffer-char-to-aref buffer from))
275 (table (syntax-table))
276 ch code)
277 (while (> count 0)
278 (loop
279 (when (= from end)
280 (return-from scan-words nil))
281 (setf ch (buffer-fetch-char from-aref buffer)
282 code (&syntax ch table))
283 (inc-both from from-aref buffer)
284 (when (or (and *words-include-escapes*
285 (or (eq code :escape)
286 (eq code :character-quote)))
287 (eq code :word-constituent))
288 (return nil)))
289 (loop
290 (when (= from end)
291 (return nil))
292 (setf ch (buffer-fetch-char from-aref buffer)
293 code (&syntax from-aref table))
294 (when (and (and (not *words-include-escapes*)
295 (or (eq code :escape)
296 (eq code :character-quote)))
297 (or (not (eq code :word-constituent))))
298 ;; (word-boundary-p ..)
299 (return nil))
300 (inc-both from from-aref buffer))
301 (decf count))
302 (while (< count 0)
303 (loop
304 (when (= from beg)
305 (return-from scan-words nil))
306 (dec-both from from-aref buffer)
307 (setf ch (buffer-fetch-char from-aref buffer)
308 code (&syntax ch table))
309 (when (or (and *words-include-escapes*
310 (or (eq code :escape)
311 (eq code :character-quote)))
312 (eq code :word-constituent))
313 (return nil)))
314 (loop
315 (when (= from beg)
316 (return nil))
317 (setf ch (buffer-fetch-char from-aref buffer)
318 code (&syntax ch table))
319 (when (and (and (not *words-include-escapes*)
320 (or (eq code :escape)
321 (eq code :character-quote)))
322 (not (eq code :word-constituent)))
323 (return nil))
324 (dec-both from from-aref buffer))
325 (incf count))
326 from))
328 (defcommand forward-word ((n) :prefix)
329 "Move point forward ARG words (backward if ARG is negative).
330 Normally returns t.
331 If an edge of the buffer or a field boundary is reached, point is left there
332 and the function returns nil. Field boundaries are not noticed if
333 `inhibit-field-text-motion' is non-nil."
334 (labels ((isaword (c)
335 (find c +word-constituents+ :test #'char=)))
336 (let ((buffer (current-buffer)))
337 (cond ((> n 0)
338 (gap-move-to buffer (buffer-point-aref buffer))
339 ;; do it n times
340 (loop for i from 0 below n
341 while (let (p1 p2)
342 ;; search forward for a word constituent
343 (setf p1 (position-if #'isaword (buffer-data buffer)
344 :start (buffer-point-aref buffer)))
345 ;; search forward for a non word constituent
346 (when p1
347 (setf p2 (position-if (complement #'isaword) (buffer-data buffer) :start p1)))
348 (if p2
349 (goto-char (buffer-aref-to-char buffer p2))
350 (goto-char (point-max)))
351 p2)))
352 ((< n 0)
353 (setf n (- n))
354 (gap-move-to buffer (buffer-point-aref buffer))
355 ;; do it n times
356 (loop for i from 0 below n
357 for start = (buffer-gap-start buffer) then (buffer-point-aref buffer)
358 while (let (p1 p2)
359 ;; search backward for a word constituent
360 (setf p1 (position-if #'isaword (buffer-data buffer)
361 :from-end t
362 :end start))
363 ;; search backward for a non word constituent
364 (when p1
365 (setf p2 (position-if (complement #'isaword) (buffer-data buffer) :from-end t :end p1)))
366 (if p2
367 (goto-char (1+ (buffer-aref-to-char buffer p2)))
368 (goto-char (point-min)))
369 p2)))))))
371 (defcommand backward-word ((n) :prefix)
372 "Move point forward ARG words (backward if ARG is negative).
373 Normally returns t.
374 If an edge of the buffer or a field boundary is reached, point is left there
375 and the function returns nil. Field boundaries are not noticed if
376 `inhibit-field-text-motion' is non-nil."
377 (forward-word (- n)))
379 (defcommand kill-word ((arg)
380 :prefix)
381 "Kill characters forward until encountering the end of a word.
382 With argument, do this that many times."
383 (kill-region (point) (progn (forward-word arg) (point))))
385 (defcommand backward-kill-word ((arg)
386 :prefix)
387 "Kill characters backward until encountering the end of a word.
388 With argument, do this that many times."
389 (kill-word (- arg)))
391 (defvar *parse-sexp-ignore-comments* t
392 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.")
394 (defvar *open-paren-in-column-0-is-defun-start* t
395 "*Non-nil means an open paren in column 0 denotes the start of a defun.")
397 ;; Conditions used by the syntax code because the parsing is so bloody
398 ;; hairy.
399 (define-condition syntax-done () ())
400 (define-condition syntax-lossage () ())
402 (define-condition expression-ends-prematurely (lice-condition)
404 (define-condition unbalanced-parenthesis (lice-condition)
405 ((last-good :initarg :last-good :accessor unbalanced-parenthesis-last-good)
406 (from :initarg :from :accessor unbalanced-parenthesis-from)))
408 (defun &char-quoted (char-pos aref-pos buffer table)
409 "Returns TRUE if char at CHAR-POS is quoted.
410 Global syntax-table data should be set up already to be good at CHAR-POS
411 or after. On return global syntax data is good for lookup at CHAR-POS."
412 (let ((beg (begv buffer))
413 quoted)
414 (dec-both char-pos aref-pos buffer)
415 (while (>= char-pos beg)
416 (let* ((ch (buffer-fetch-char aref-pos buffer))
417 (code (&syntax ch table)))
418 (when (not (find code '(:character-quote :escape)))
419 (return nil))
420 (dec-both char-pos aref-pos buffer)
421 (setf quoted (not quoted))))
422 quoted))
424 (defun find-defun-start (pos pos-aref buffer table)
425 "Return a defun-start position before POS and not too far before.
426 It should be the last one before POS, or nearly the last.
428 When open_paren_in_column_0_is_defun_start is nonzero,
429 only the beginning of the buffer is treated as a defun-start.
431 We record the information about where the scan started
432 and what its result was, so that another call in the same area
433 can return the same value very quickly.
435 There is no promise at which position the global syntax data is
436 valid on return from the subroutine, so the caller should explicitly
437 update the global data."
438 (declare (ignore pos-aref))
439 (unless *open-paren-in-column-0-is-defun-start*
440 (return-from find-defun-start (make-parse-state :start-value (begv buffer)
441 :start-value-aref (begv-aref buffer))))
442 ;; Back up to start of line.
443 (let* ((begv (begv buffer))
444 (pt (buffer-scan-newline buffer pos (begv buffer) -1))
445 (pt-aref (buffer-char-to-aref buffer pt)))
446 (while (> pt begv)
447 (let ((ch (buffer-fetch-char pt-aref buffer)))
448 (when (eq (&syntax ch table) :open)
449 (return nil))
450 ;; Move to beg of previous line.
451 (setf pt (buffer-scan-newline buffer pt (begv buffer) -2)
452 pt-aref (buffer-char-to-aref buffer pt))))
453 ;; Return what we found
454 (make-parse-state :start-value pt
455 :start-value-aref pt-aref
456 :start-buffer buffer
457 ;; :modiff MODIFF
458 :start-begv (begv buffer)
459 :start-pos pos)))
461 ;; FIXME: doesn't handle ^. Maybe if :not is the first symbol in the list?
462 (defun skip-syntax-forward (syntax-list &optional (lim (point-max)))
463 "Move point forward across chars in specified syntax classes.
464 SYNTAX-LIST is a string of syntax code characters.
465 Stop before a char whose syntax is not in SYNTAX-LIST, or at position LIM.
466 If SYNTAX-LIST starts with ^, skip characters whose syntax is NOT in SYNTAX-LIST.
467 This function returns the distance traveled, either zero or positive."
468 (check-type lim integer)
469 (let* ((buffer (current-buffer))
470 (table (syntax-table))
471 (pos (point))
472 (start pos)
473 (pos-aref (buffer-char-to-aref buffer pos))
474 ch syntax)
475 (while (< pos lim)
476 (setf ch (buffer-fetch-char pos-aref buffer)
477 syntax (&syntax ch table))
478 (unless (find syntax syntax-list)
479 (return nil))
480 (inc-both pos pos-aref buffer))
481 (goto-char pos)
482 (- pos start)))
484 (defun skip-chars (forwardp syntaxp string lim)
485 (declare (ignore syntaxp))
486 (labels ((match-char (c negate ranges chars)
487 ;; check ranges
488 (catch :continue
489 (loop for r in ranges do
490 (if negate
491 (when (<= (car r) (char-code c) (cdr r))
492 (throw :done nil))
493 (when (<= (car r) (char-code c) (cdr r))
494 (throw :continue nil))))
495 ;; check chars
496 (if negate
497 (when (find c chars :test 'char=)
498 (throw :done nil))
499 (when (find c chars :test 'char=)
500 (throw :continue nil)))
501 (unless negate
502 ;; if the char fell through all that then we're done
503 (throw :done nil)))))
504 (check-type string string)
505 (check-number-coerce-marker lim)
506 (let ((idx 0)
507 negate
508 ranges
509 chars
510 (start-point (point)))
511 ;; don't allow scan outside bounds of buffer.
512 (setf lim (min (max lim (begv)) (zv)))
514 (when (char= (char string 0) #\^)
515 (setf negate t)
516 (incf idx))
518 ;; compile the ranges and chars
519 (while (< idx (length string))
520 (let ((c (char string idx)))
522 ;; (if syntaxp
523 ;; ;; TODO: handle syntaxp
524 ;; nil
525 ;; (progn
526 ;; ;; TODO: handle iso classes
527 ;; ))
528 (when (char= c #\\)
529 (incf idx)
530 (when (= idx (length string))
531 (return nil))
532 (setf c (char string idx)))
533 (incf idx)
535 ;; Treat `-' as range character only if another character
536 ;; follows.
537 (if (and (< (1+ idx) (length string))
538 (char= (char string idx) #\-))
539 (progn
540 (incf idx)
541 (let* ((c2 (char string idx))
542 (code1 (char-code c))
543 (code2 (char-code c2)))
544 (when (<= code1 code2)
545 (push (cons code1 code2) ranges))
546 (incf idx)))
547 (progn
548 (push c chars)))))
549 ;; scan
550 (let* ((buffer (current-buffer))
551 (pos (point buffer))
552 (pos-aref (buffer-char-to-aref buffer pos)))
553 (catch :done
554 (if forwardp
555 (while (< pos lim)
556 (match-char (buffer-fetch-char pos-aref buffer)
557 negate ranges chars)
558 (inc-both pos pos-aref buffer))
559 (progn
560 ;; do a little dance to end up in the right spot
561 (dec-both pos pos-aref buffer)
562 (unwind-protect
563 (while (> pos lim)
564 (match-char (buffer-fetch-char pos-aref buffer)
565 negate ranges chars)
566 (dec-both pos pos-aref buffer))
567 (inc-both pos pos-aref buffer)))))
568 (set-point pos buffer)
569 ;; return the number of chars we scanned
570 (- pos start-point)))))
572 (defun skip-chars-forward (string &optional (lim (zv)))
573 "Move point forward, stopping before a char not in string."
574 (skip-chars t nil string lim))
576 (defun skip-chars-backward (string &optional (lim (begv)))
577 "Move point backward, stopping after a char not in string."
578 (skip-chars nil nil string lim))
580 (defun skip-whitespace-forward (&optional (lim (zv)))
581 "Move point forward, stopping before a char that is not a space or tab."
582 (skip-chars-forward (coerce '(#\Space #\Tab) 'string) lim))
584 (defun &back-comment (from from-aref stop comment-nested comment-style buffer table)
585 "Checks whether charpos FROM is at the end of a comment.
586 FROM_BYTE is the bytepos corresponding to FROM.
587 Do not move back before STOP.
589 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
590 return -1 otherwise.
592 If successful, return the charpos of the comment's beginning, and the aref pos.
594 **Global syntax data remains valid for backward search starting at
595 **the returned value (or at FROM, if the search was not successful)."
596 ;; Look back, counting the parity of string-quotes,
597 ;; and recording the comment-starters seen.
598 ;; When we reach a safe place, assume that's not in a string;
599 ;; then step the main scan to the earliest comment-starter seen
600 ;; an even number of string quotes away from the safe place.
602 ;; OFROM[I] is position of the earliest comment-starter seen
603 ;; which is I+2X quotes from the comment-end.
604 ;; PARITY is current parity of quotes from the comment end.
605 (let ((string-style :none)
606 (string-lossage nil)
607 ;; Not a real lossage: indicates that we have passed a matching comment
608 ;; starter plus a non-matching comment-ender, meaning that any matching
609 ;; comment-starter we might see later could be a false positive (hidden
610 ;; inside another comment).
611 ;; Test case: { a (* b } c (* d *)
612 (comment-lossage nil)
613 (comment-end from)
614 (comment-end-aref from-aref)
615 (comment-start-pos 0)
616 comment-start-aref
617 ;; Place where the containing defun starts,
618 ;; or nil if we didn't come across it yet.
619 defun-start
620 defun-start-aref
621 code
622 (nesting 1) ; current comment nesting
624 (syntax nil))
625 (handler-case
626 (progn
627 ;; FIXME: A }} comment-ender style leads to incorrect behavior
628 ;; in the case of {{ c }}} because we ignore the last two chars which are
629 ;; assumed to be comment-enders although they aren't.
631 ;; At beginning of range to scan, we're outside of strings;
632 ;; that determines quote parity to the comment-end.
633 (while (/= from stop)
634 (catch :continue
635 (let (temp-aref prev-syntax com2start com2end)
636 (dec-both from from-aref buffer)
637 (setf prev-syntax syntax
638 ch (buffer-fetch-char from-aref buffer)
639 syntax (&syntax-with-flags ch table)
640 code (&syntax ch table)
641 ;; Check for 2-char comment markers.
642 com2start (and (&syntax-flags-comment-start-first syntax)
643 (&syntax-flags-comment-start-second prev-syntax)
644 (eq comment-style (&syntax-flags-comment-style prev-syntax))
645 (eq (or (&syntax-flags-comment-nested prev-syntax)
646 (&syntax-flags-comment-nested syntax))
647 comment-nested))
648 com2end (and (&syntax-flags-comment-end-first syntax)
649 (&syntax-flags-comment-end-second prev-syntax)))
650 ;; Nasty cases with overlapping 2-char comment markers:
651 ;; - snmp-mode: -- c -- foo -- c --
652 ;; --- c --
653 ;; ------ c --
654 ;; - c-mode: *||*
655 ;; |* *|* *|
656 ;; |*| |* |*|
657 ;; /// */
659 ;; If a 2-char comment sequence partly overlaps with
660 ;; another, we don't try to be clever.
661 (when (and (> from stop)
662 (or com2end com2start))
663 (let ((next from)
664 (next-aref from-aref)
665 next-c
666 next-syntax)
667 (dec-both next next-aref buffer)
668 (setf next-c (buffer-fetch-char next-aref buffer)
669 next-syntax (&syntax-with-flags next-c table))
670 (when (or (and (or com2start comment-nested)
671 (&syntax-flags-comment-end-second syntax)
672 (&syntax-flags-comment-end-first next-syntax))
673 (and (or com2end comment-nested)
674 (&syntax-flags-comment-start-second syntax)
675 (eq comment-style (&syntax-flags-comment-style syntax))
676 (&syntax-flags-comment-start-first next-syntax)))
677 (signal 'syntax-lossage))))
679 (when (and com2start
680 (= comment-start-pos 0))
681 ;; We're looking at a comment starter. But it might be a comment
682 ;; ender as well (see snmp-mode). The first time we see one, we
683 ;; need to consider it as a comment starter,
684 ;; and the subsequent times as a comment ender.
685 (setf com2end 0))
687 ;; Turn a 2-char comment sequences into the appropriate syntax.
688 (cond (com2end
689 (setf code :end-comment))
690 (com2start
691 (setf code :comment))
692 ;; Ignore comment starters of a different style.
693 ((and (eq code :comment)
694 (or (not (eq comment-style (&syntax-flags-comment-style syntax)))
695 (not (eq comment-nested (&syntax-flags-comment-nested syntax)))))
696 (throw :continue nil)))
698 ;; Ignore escaped characters, except comment-enders.
699 (when (and (not (eq code :end-comment))
700 (&char-quoted from from-aref buffer table))
701 (throw :continue nil))
703 (case code
704 ((:string-fence :comment-fence :string)
705 (when (find code '(:string-fence :comment-fence))
706 (setf ch (if (eq code :string-fence)
707 :string-style
708 :comment-style)))
709 ;; Track parity of quotes.
710 (cond ((eq string-style :none)
711 ;; Entering a string.
712 (setf string-style ch))
713 ((eq string-style ch)
714 ;; leaving the string
715 (setf string-style :none))
717 ;; If we have two kinds of string delimiters.
718 ;; There's no way to grok this scanning backwards.
719 (setf string-lossage t))))
720 (:comment
721 ;; We've already checked that it is the relevant comstyle.
722 (when (or (eq string-style :none)
723 comment-lossage
724 string-lossage)
725 ;; There are odd string quotes involved, so let's be careful.
726 ;; Test case in Pascal: " { " a { " } */
727 (signal 'syntax-lossage))
728 (if (not comment-nested)
729 ;; Record best comment-starter so far.
730 (setf comment-start-pos from
731 comment-start-aref from-aref)
732 (progn
733 (decf nesting)
734 (when (<= nesting)
735 ;; nested comments have to be balanced, so we don't need to
736 ;; keep looking for earlier ones. We use here the same (slightly
737 ;; incorrect) reasoning as below: since it is followed by uniform
738 ;; paired string quotes, this comment-start has to be outside of
739 ;; strings, else the comment-end itself would be inside a string.
740 (signal 'syntax-done)))))
741 (:end-comment
742 (cond ((and (eq comment-style (&syntax-flags-comment-style syntax))
743 (or (and com2end
744 (&syntax-flags-comment-nested prev-syntax))
745 (eq comment-nested (&syntax-flags-comment-nested syntax))))
746 ;; This is the same style of comment ender as ours.
747 (if comment-nested
748 (incf nesting)
749 ;; Anything before that can't count because it would match
750 ;; this comment-ender rather than ours.
751 (setf from stop)))
752 ((or (/= comment-start-pos 0)
753 (char/= ch #\Newline))
754 ;; We're mixing comment styles here, so we'd better be careful.
755 ;; The (comstart_pos != 0 || c != '\n') check is not quite correct
756 ;; (we should just always set comment_lossage), but removing it
757 ;; would imply that any multiline comment in C would go through
758 ;; lossage, which seems overkill.
759 ;; The failure should only happen in the rare cases such as
760 ;; { (* } *)
761 (setf comment-lossage t))))
762 (:open
763 ;; Assume a defun-start point is outside of strings.
764 (when (and *open-paren-in-column-0-is-defun-start*
765 (or (= from stop)
766 (progn
767 (setf temp-aref (aref-minus-1 from-aref buffer))
768 (char= (buffer-fetch-char temp-aref buffer) #\Newline))))
769 (setf defun-start from
770 defun-start-aref from-aref
771 ;; Break out of the loop.
772 from stop)))))))
774 (if (= comment-start-pos 0)
775 (setf from comment-end
776 from-aref comment-end-aref)
777 ;; If comstart_pos is set and we get here (ie. didn't jump to `lossage'
778 ;; or `done'), then we've found the beginning of the non-nested comment.
779 (setf from comment-start-pos
780 from-aref comment-start-aref)))
781 (syntax-lossage ()
782 ;; We had two kinds of string delimiters mixed up
783 ;; together. Decode this going forwards.
784 ;; Scan fwd from a known safe place (beginning-of-defun)
785 ;; to the one in question; this records where we
786 ;; last passed a comment starter.
787 ;; If we did not already find the defun start, find it now.
788 (when (= defun-start 0)
789 (let ((ret (find-defun-start comment-end comment-end-aref buffer table)))
790 (setf defun-start (parse-state-start-value ret)
791 defun-start-aref (parse-state-start-value-aref ret))))
792 (loop do
793 (let ((state (scan-sexps-forward defun-start defun-start-aref
794 comment-end -10000 0 nil 0 buffer table)))
795 (setf defun-start comment-end)
796 (if (and (eq (parse-state-in-comment state) comment-nested)
797 (eq (parse-state-comment-style state) comment-style))
798 (setf from (parse-state-comment-string-start state))
799 (progn
800 (setf from comment-end)
801 (when (parse-state-in-comment state) ; XXX
802 ;; If comment_end is inside some other comment, maybe ours
803 ;; is nested, so we need to try again from within the
804 ;; surrounding comment. Example: { a (* " *)
805 (setf defun-start (+ (parse-state-comment-string-start state) 2)
806 defun-start-aref (buffer-char-to-aref buffer defun-start))))))
807 while (< defun-start comment-end))
808 (setf from-aref (buffer-char-to-aref buffer from))))
809 (values (if (= from comment-end) -1 from)
810 from-aref)))
812 (defun &forward-comment (from from-aref stop nesting style prev-syntax buffer table)
813 "Jump over a comment, assuming we are at the beginning of one.
814 FROM is the current position.
815 FROM_BYTE is the bytepos corresponding to FROM.
816 Do not move past STOP (a charpos).
817 The comment over which we have to jump is of style STYLE
818 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
819 NESTING should be positive to indicate the nesting at the beginning
820 for nested comments and should be zero or negative else.
821 ST_COMMENT_STYLE cannot be nested.
822 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
823 (or nil If the search cannot start in the middle of a two-character).
825 If successful, return 1 and store the charpos of the comment's end
826 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
827 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
828 corresponding bytepos into *BYTEPOS_PTR and the current nesting
829 (as defined for state.incomment) in *INCOMMENT_PTR.
831 The comment end is the last character of the comment rather than the
832 character just after the comment.
834 Global syntax data is assumed to initially be valid for FROM and
835 remains valid for forward search starting at the returned position."
836 (let (c
838 code
839 (syntax prev-syntax))
840 (labels ((forward ()
841 (when (= from stop)
842 (return-from &forward-comment
843 (values nil from from-aref nesting)))
845 (setf c (buffer-fetch-char from-aref buffer)
846 code (&syntax c table)
847 syntax (&syntax-with-flags c table))
849 (when (and (eq code :end-comment)
850 (eq (&syntax-flags-comment-style syntax) style)
851 (if (&syntax-flags-comment-nested syntax)
852 (and (> nesting 0)
853 (progn (decf nesting)
854 (zerop nesting)))
855 (< nesting 0)))
856 ;; we have encountered a comment end of the same
857 ;; style as the comment sequence which began this
858 ;; comment section.
859 (throw :done nil))
860 (when (and (eq code :comment-fence)
861 (eq style :st-comment-style))
862 ;; we have encountered a comment end of the same style
863 ;; as the comment sequence which began this comment
864 ;; section.
865 (throw :done nil))
866 (when (and (> nesting 0)
867 (eq code :comment)
868 (&syntax-flags-comment-nested syntax)
869 (eq (&syntax-flags-comment-style syntax) style))
870 ;; we have encountered a nested comment of the same style
871 ;; as the comment sequence which began this comment section
872 (incf nesting))
873 (inc-both from from-aref buffer))
874 (do-comment ()
875 (when (and (< from stop)
876 (&syntax-flags-comment-end-first syntax)
877 (eq (&syntax-flags-comment-style syntax) style)
878 (progn
879 (setf c1 (buffer-fetch-char from-aref buffer))
880 (&syntax-comment-end-second c1 table))
881 (if (or (&syntax-flags-comment-nested syntax)
882 (&syntax-comment-nested c1 table))
883 (> nesting 0)
884 (< nesting 0)))
885 (decf nesting)
886 (if (<= nesting 0)
887 ;; we have encountered a comment end of the same style
888 ;; as the comment sequence which began this comment
889 ;; section
890 (throw :done nil)
891 (inc-both from from-aref buffer)))
892 (when (and (> nesting 0)
893 (< from stop)
894 (&syntax-flags-comment-start-first syntax)
895 (progn
896 (setf c1 (buffer-fetch-char from-aref buffer))
897 (eq (&syntax-comment-style c1 table) style))
898 (or (&syntax-flags-comment-nested syntax)
899 (&syntax-comment-nested c1 table)))
900 ;; we have encountered a nested comment of the same style
901 ;; as the comment sequence which began this comment
902 ;; section
903 (inc-both from from-aref buffer)
904 (incf nesting))))
905 ;; normalize nesting
906 (cond ((or (null nesting)
907 (<= nesting 0))
908 (setf nesting -1))
909 ((not (numberp nesting))
910 (setf nesting 1)))
911 ;; Enter the loop in the middle so that we find
912 ;; a 2-char comment ender if we start in the middle of it.
913 (catch :done
914 (if syntax
915 (progn
916 (do-comment)
917 (loop
918 (forward)
919 (do-comment)))
920 (loop
921 (forward)
922 (do-comment))))
923 (values t from from-aref))))
925 (defun prev-char-comment-end-first (pos pos-aref buffer table)
926 "Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE."
927 (dec-both pos pos-aref buffer)
928 (&syntax-comment-end-first (buffer-fetch-char pos-aref buffer)
929 table))
931 (defun &scan-lists (from count depth sexpflag &aux (buffer (current-buffer)))
932 "This is from the emacs function"
933 ;; the big TODO here is to use the CL readtable
934 (labels ((lose (last-good from)
935 (signal 'unbalanced-parenthesis :last-good last-good :from from)))
936 (let ((stop (if (> count 0) (zv buffer) (begv buffer)))
937 (from-aref (buffer-char-to-aref buffer from))
938 (min-depth (min 0 depth))
939 (table (syntax-table))
940 (last-good from)
941 quoted
942 math-exit
943 comment-start-first
944 code
945 ch ch1
946 temp-code
947 temp-pos
948 comment-nested
949 comment-style
950 found
951 prefix)
952 ;; normalize FROM
953 (setf from (max (min (zv buffer) from)
954 (begv buffer)))
955 (while (> count 0)
956 ;; the code needs to be able to jump out of the mess it got
957 ;; in.
958 (handler-case
959 (progn
960 (while (< from stop)
961 (catch :continue
962 (setf ch (buffer-fetch-char from-aref buffer)
963 code (&syntax ch table)
964 comment-start-first (&syntax-comment-start-first ch table)
965 comment-nested (&syntax-comment-nested ch table)
966 comment-style (&syntax-comment-style ch table)
967 prefix (&syntax-prefix ch table))
968 (when (= depth min-depth)
969 (setf last-good from))
970 (inc-both from from-aref buffer)
971 (when (and (< from stop) comment-start-first
972 (progn (setf ch (buffer-fetch-char from-aref buffer))
973 (&syntax-comment-start-second ch table))
974 *parse-sexp-ignore-comments*)
975 ;; we have encountered a comment start sequence and
976 ;; we are ignoring all text inside comments. We
977 ;; must record the comment style this sequence
978 ;; begins so that later, only a comment end of the
979 ;; same style actually ends the comment section
980 (setf code :comment
981 ch1 (buffer-fetch-char from-aref buffer)
982 comment-style (&syntax-comment-style ch1 table)
983 comment-nested (or comment-nested
984 (&syntax-comment-nested ch1 table)))
985 (inc-both from from-aref buffer))
987 (when prefix
988 (throw :continue nil))
990 (when (or (eq code :escape)
991 (eq code :character-quote))
992 (when (= from stop) (lose last-good from))
993 (inc-both from from-aref buffer)
994 ;; treat following character as a word constituent
995 (setf code :word-constituent))
997 (case code
998 ((:word-constituent :symbol-constituent)
999 (unless (or (not (zerop depth))
1000 (not sexpflag))
1001 (let (temp)
1002 (while (< from stop)
1003 (setf ch (buffer-fetch-char from-aref buffer)
1004 temp (&syntax ch table))
1005 (case temp
1006 ((:escape :character-quote)
1007 (inc-both from from-aref buffer)
1008 (when (= from stop) (lose last-good from)))
1009 ((:word-constituent :symbol-constituent :quote))
1011 (signal 'syntax-done)))
1012 (inc-both from from-aref buffer)))
1013 (signal 'syntax-done)))
1014 ((:comment-fence :comment)
1015 (when (eq code :comment-fence)
1016 (setf comment-style :st-comment-style))
1017 (multiple-value-setq (found from from-aref) (&forward-comment from from-aref stop comment-nested comment-style nil buffer table))
1018 (unless found
1019 (when (zerop depth) (signal 'syntax-done))
1020 (lose last-good from))
1021 (inc-both from from-aref buffer))
1022 (:math
1023 (when sexpflag
1024 (when (and (/= from stop)
1025 (char= ch (buffer-fetch-char from-aref buffer)))
1026 (inc-both from from-aref buffer))
1027 (if math-exit
1028 (progn
1029 (setf math-exit nil)
1030 (decf depth)
1031 (when (zerop depth) (signal 'syntax-done))
1032 (when (< depth min-depth)
1033 (signal 'expression-ends-prematurely))) ; XXX
1034 (progn
1035 (setf math-exit t)
1036 (incf depth)
1037 (when (zerop depth) (signal 'syntax-done))))))
1038 (:open
1039 (incf depth)
1040 (when (zerop depth) (signal 'syntax-done)))
1041 (:close
1042 (decf depth)
1043 (when (zerop depth) (signal 'syntax-done))
1044 (when (< depth min-depth)
1045 (signal 'expression-ends-prematurely)))
1046 ((:string :string-fence)
1047 (let* ((tmp-pos (aref-minus-1 from-aref buffer))
1048 (string-term (buffer-fetch-char tmp-pos buffer))
1049 temp)
1050 (loop
1051 (when (>= from stop) (lose last-good from))
1052 (setf ch (buffer-fetch-char from-aref buffer))
1053 (when (if (eq code :string)
1054 (and (char= ch string-term)
1055 (eq (&syntax ch table) :string))
1056 (eq (&syntax ch table) :string-fence))
1057 (return nil))
1058 (setf temp (&syntax ch table))
1059 (case temp
1060 ((:character-quote :escape)
1061 (inc-both from from-aref buffer)))
1062 (inc-both from from-aref buffer))
1063 (inc-both from from-aref buffer)
1064 (when (and (zerop depth)
1065 sexpflag)
1066 (signal 'syntax-done))))
1068 ;; Ignore whitespace, punctuation, quote, endcomment.
1069 ))))
1070 (unless (zerop depth) (lose last-good from))
1071 (return-from &scan-lists nil))
1072 (syntax-done ()
1073 (decf count))))
1075 (while (< count 0)
1076 (handler-case
1077 (progn
1078 (while (> from stop)
1079 (catch :continue
1080 (dec-both from from-aref buffer)
1081 (setf ch (buffer-fetch-char from-aref buffer)
1082 code (&syntax ch table))
1083 (when (= depth min-depth)
1084 (setf last-good from))
1085 (setf comment-style nil
1086 comment-nested (&syntax-comment-nested ch table))
1087 (when (eq code :end-comment)
1088 (setf comment-style (&syntax-comment-style ch table)))
1089 (when (and (> from stop)
1090 (&syntax-comment-end-second ch table)
1091 (prev-char-comment-end-first from from-aref buffer table)
1092 *parse-sexp-ignore-comments*)
1093 ;; We must record the comment style
1094 ;; encountered so that later, we can match
1095 ;; only the proper comment begin sequence of
1096 ;; the same style.
1097 (dec-both from from-aref buffer)
1098 (setf code :end-comment
1099 ch1 (buffer-fetch-char from-aref buffer)
1100 comment-nested (or comment-nested
1101 (&syntax-comment-nested ch1 table))))
1102 ;; Quoting turns anything except a comment-ender
1103 ;; into a word character. Note that this cannot
1104 ;; be true if we decremented FROM in the
1105 ;; if-statement above.
1106 (cond
1107 ((and (not (eq code :end-comment))
1108 (&char-quoted from from-aref buffer table))
1109 (dec-both from from-aref buffer)
1110 (setf code :word))
1111 ((&syntax-prefix ch table)
1112 ;; loop around again. I think this is nasty but fuckit.
1113 (throw :continue nil)))
1114 (case code
1115 ((:word-constituent :symbol-constituent :escape :character-quote)
1116 (unless (or (not (zerop depth))
1117 (not sexpflag))
1118 ;; This word counts as a sexp; count
1119 ;; object finished after passing it.
1120 (while (> from stop)
1121 (setf temp-pos from-aref)
1122 (decf temp-pos)
1123 (setf ch1 (buffer-fetch-char temp-pos buffer)
1124 temp-code (&syntax ch1 table))
1125 ;; Don't allow comment-end to be quoted.
1126 (when (eq temp-code :end-comment)
1127 (signal 'syntax-done))
1128 (setf quoted (&char-quoted (1- from) temp-pos buffer table))
1129 (when quoted
1130 (dec-both from from-aref buffer)
1131 (setf temp-pos (aref-minus-1 temp-pos buffer)))
1132 (setf ch1 (buffer-fetch-char temp-pos buffer)
1133 temp-code (&syntax ch1 table))
1134 (when (not (or quoted
1135 (eq temp-code :word-constituent)
1136 (eq temp-code :symbol-constituent)
1137 (eq temp-code :quote)))
1138 (signal 'syntax-done))
1139 (dec-both from from-aref buffer))
1140 (signal 'syntax-done)))
1141 (:math
1142 (when sexpflag
1143 (setf temp-pos (aref-minus-1 from-aref buffer))
1144 (when (and (/= from stop)
1145 (char= ch (buffer-fetch-char temp-pos buffer)))
1146 (dec-both from from-aref buffer))
1147 (if math-exit
1148 (progn
1149 (setf math-exit nil)
1150 (decf depth)
1151 (when (zerop depth) (signal 'syntax-done))
1152 (when (< depth min-depth)
1153 (signal 'expression-ends-prematurely)))
1154 (progn
1155 (setf math-exit t)
1156 (incf depth)
1157 (when (zerop depth) (signal 'syntax-done))))))
1158 (:close
1159 (incf depth)
1160 (when (zerop depth) (signal 'syntax-done)))
1161 (:open
1162 (decf depth)
1163 (when (zerop depth) (signal 'syntax-done))
1164 (when (< depth min-depth)
1165 (signal 'expression-ends-prematurely)))
1166 (:end-comment
1167 (when *parse-sexp-ignore-comments*
1168 (multiple-value-bind (found char-pos aref-pos)
1169 (&back-comment from from-aref stop comment-nested comment-style buffer table)
1170 (when (eq found :not-comment-end)
1171 (setf from char-pos
1172 from-aref aref-pos)))))
1173 ((:comment-fence :string-fence)
1174 (loop
1175 (when (= from stop) (lose last-good from))
1176 (dec-both from from-aref buffer)
1177 (when (and (not (&char-quoted from from-aref buffer table))
1178 (progn
1179 (setf ch (buffer-fetch-char from-aref buffer))
1180 (eq (&syntax ch table) code)))
1181 (return nil)))
1182 (when (and (eq code :string-fence)
1183 (zerop depth)
1184 sexpflag)
1185 (signal 'syntax-done)))
1186 (:string
1187 (let ((string-term (buffer-fetch-char from-aref buffer)))
1188 (loop
1189 (when (= from stop) (lose last-good from))
1190 (dec-both from from-aref buffer)
1191 (when (and (not (&char-quoted from from-aref buffer table))
1192 (progn
1193 (setf ch (buffer-fetch-char from-aref buffer))
1194 (char= string-term ch))
1195 (eq (&syntax ch table) :string))
1196 (return nil)))
1197 (when (and (zerop depth)
1198 sexpflag)
1199 (signal 'syntax-done))))
1201 ;; Ignore whitespace, punctuation, quote, endcomment.
1202 ))))
1203 (when (not (zerop depth)) (lose last-good from))
1204 (return-from &scan-lists nil))
1205 (syntax-done ()
1206 (incf count))))
1207 from)))
1209 (defun scan-lists (from count depth)
1210 "Scan from character number FROM by COUNT lists.
1211 Returns the character number of the position thus found.
1213 If DEPTH is nonzero, paren depth begins counting from that value,
1214 only places where the depth in parentheses becomes zero
1215 are candidates for stopping; COUNT such places are counted.
1216 Thus, a positive value for DEPTH means go out levels.
1218 Comments are ignored if `*parse-sexp-ignore-comments*' is non-nil.
1220 If the beginning or end of (the accessible part of) the buffer is reached
1221 and the depth is wrong, an error is signaled.
1222 If the depth is right but the count is not used up, nil is returned."
1223 (check-type from number)
1224 (check-type count number)
1225 (check-type depth number)
1226 (&scan-lists from count depth nil))
1228 (defun scan-sexps (from count)
1229 "Scan from character number FROM by COUNT balanced expressions.
1230 If COUNT is negative, scan backwards.
1231 Returns the character number of the position thus found.
1233 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1235 If the beginning or end of (the accessible part of) the buffer is reached
1236 in the middle of a parenthetical grouping, an error is signaled.
1237 If the beginning or end is reached between groupings
1238 but before count is used up, nil is returned."
1239 (check-type from number)
1240 (check-type count number)
1241 (&scan-lists from count 0 t))
1243 (defun backward-prefix-chars (&aux (buffer (current-buffer)) (table (syntax-table)))
1244 "Move point backward over any number of chars with prefix syntax.
1245 This includes chars with \"quote\" or \"prefix\" syntax (' or p)."
1246 (let* ((beg (begv buffer))
1247 (pos (point buffer))
1248 (pos-aref (buffer-char-to-aref buffer pos))
1249 (opoint (point buffer))
1250 (opoint-aref (buffer-char-to-aref buffer pos))
1252 (when (<= pos beg)
1253 ;; SET_PT_BOTH (opoint, opoint_byte);
1254 (return-from backward-prefix-chars nil))
1256 (dec-both pos pos-aref buffer)
1257 (while (and (not (&char-quoted pos pos-aref buffer table))
1258 (progn
1259 (setf c (buffer-fetch-char pos-aref buffer))
1260 (or (eq (&syntax c table) :quote)
1261 (&syntax-prefix c table))))
1262 (setf opoint pos
1263 opoint-aref pos-aref)
1264 (when (> (1+ pos) beg)
1265 (dec-both pos pos-aref buffer)))
1266 (set-point-both buffer opoint opoint-aref)
1267 nil))
1269 (defstruct parse-state
1270 depth min-depth
1271 this-level-start
1272 prev-level-start
1273 location
1274 level-starts
1275 quoted
1276 in-comment
1277 comment-style
1278 comment-string-start
1279 in-string
1280 start-value
1281 start-value-aref)
1283 (defstruct syntax-level
1284 last prev)
1286 ;; this function cries out for continuations. you almost have to look
1287 ;; at the C code to understand what's going on here, i bet. Hell, I
1288 ;; don't even understand it.
1289 (defun scan-sexps-forward (from from-aref end target-depth stop-before old-state comment-stop buffer table)
1290 "Parse forward from FROM / FROM_BYTE to END,
1291 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1292 and return a description of the state of the parse at END.
1293 If STOPBEFORE is nonzero, stop at the start of an atom.
1294 If COMMENTSTOP is 1, stop at the start of a comment.
1295 If COMMENTSTOP is -1, stop at the start or end of a comment,
1296 after the beginning of a string, or after the end of a string."
1297 ;;(message "scan-sexps-forward ~@{~a ~}" from from-aref end target-depth stop-before old-state comment-stop buffer table)
1298 (let ((state (make-parse-state))
1299 (prev-from from)
1300 (prev-from-aref from-aref)
1301 prev-from-syntax
1302 (boundary-stop (null comment-stop))
1303 no-fence
1305 code
1306 comment-nested
1307 depth
1308 min-depth
1309 temp
1310 start-quoted
1311 levels)
1312 (labels ((inc-from ()
1313 (setf prev-from from
1314 prev-from-aref from-aref
1315 temp (buffer-fetch-char prev-from-aref buffer)
1316 prev-from-syntax (&syntax-with-flags temp table))
1317 (inc-both from from-aref buffer))
1318 (cur-level ()
1319 "Return the current level struct"
1320 (car levels))
1321 (do-start-in-comment ()
1322 ;; The (from == BEGV) test was to enter the loop in the middle so
1323 ;; that we find a 2-char comment ender even if we start in the
1324 ;; middle of it. We don't want to do that if we're just at the
1325 ;; beginning of the comment (think of (*) ... (*)).
1326 (multiple-value-bind (found out-char out-aref in-comment)
1327 (&forward-comment from from-aref end
1328 (parse-state-in-comment state)
1329 (parse-state-comment-style state)
1330 (if (or (eq from (begv buffer))
1331 (< from (+ (parse-state-comment-string-start state) 3)))
1332 nil prev-from-syntax)
1333 buffer table)
1334 (setf from out-char
1335 from-aref out-aref
1336 (parse-state-in-comment state) in-comment)
1337 ;; Beware! prev_from and friends are invalid now.
1338 ;; Luckily, the `done' doesn't use them and the INC_FROM
1339 ;; sets them to a sane value without looking at them.
1340 (unless found (throw :end :done))
1341 (inc-from)
1342 (setf (parse-state-in-comment state) nil
1343 (parse-state-comment-style state) nil) ; reset the comment style
1344 (when boundary-stop (throw :end :done))))
1345 (do-sym-done ()
1346 ;;(message "do-sym-done ~s" (parse-state-level-starts state))
1347 (setf (syntax-level-prev (cur-level)) (syntax-level-last (cur-level))))
1348 (do-sym-started ()
1349 ;; (message "do-sym-started")
1350 (while (< from end)
1351 (case (&syntax (buffer-fetch-char from-aref buffer) table)
1352 ((:escape :character-quote)
1353 (inc-from)
1354 (when (= from end)
1355 (throw :end :end-quoted)))
1356 ((:word-constituent :symbol-constituent :quote))
1358 (do-sym-done)
1359 (return nil)))
1360 (inc-from)))
1361 (do-start-quoted ()
1362 (when (= from end) (throw :end :end-quoted))
1363 (inc-from)
1364 (do-sym-started))
1365 (do-in-string-loop ()
1366 (loop
1367 (let (c)
1368 (when (>= from end) (throw :end :done))
1369 (setf c (buffer-fetch-char from-aref buffer)
1370 temp (&syntax c table))
1371 ;; Check TEMP here so that if the char has
1372 ;; a syntax-table property which says it is NOT
1373 ;; a string character, it does not end the string.
1374 (when (and no-fence
1375 (equal c (parse-state-in-string state))
1376 (eq temp :string))
1377 (return nil))
1378 (case temp
1379 (:string-fence
1380 (unless no-fence (return nil)))
1381 ((:character-quote :escape)
1382 (inc-from)
1383 (when (>= from end) (throw :end :end-quoted))))
1384 (inc-from))))
1385 (do-string-end ()
1386 ;;(message "do-string-end ~s" (parse-state-level-starts state))
1387 (setf (parse-state-in-string state) nil
1388 (syntax-level-prev (cur-level)) (syntax-level-last (cur-level)))
1389 (inc-from)
1390 (when boundary-stop (throw :end :done)))
1391 (do-start-in-string ()
1392 (setf no-fence (not (eq (parse-state-in-string state) :st-string-style)))
1393 (do-in-string-loop)
1394 (do-string-end))
1395 (do-start-quoted-in-string ()
1396 (when (>= from end) (throw :end :end-quoted))
1397 (inc-from)
1398 (do-in-string-loop)))
1400 (when (/= from (begv buffer))
1401 (dec-both prev-from prev-from-aref buffer))
1403 (if old-state
1404 (progn
1405 (setf state old-state
1406 start-quoted (parse-state-quoted state)
1407 depth (or (parse-state-depth state) 0)
1408 start-quoted (parse-state-quoted state))
1409 (dolist (i (parse-state-level-starts state))
1410 (push (make-syntax-level :last i) levels))
1411 ;; make sure we have at least one in the list
1412 (unless levels
1413 (push (make-syntax-level) levels)))
1414 (setf depth 0
1415 state (make-parse-state)
1416 levels (list (make-syntax-level))))
1418 ;;(message "top ~s" (parse-state-level-starts state))
1420 (setf (parse-state-quoted state) nil
1421 min-depth depth)
1423 (setf temp (buffer-fetch-char prev-from-aref buffer)
1424 prev-from-syntax (&syntax-with-flags temp table))
1426 ;; "Enter" the loop at a place appropriate for initial state. In
1427 ;; the C code this is a bunch of goto's. Here we call the
1428 ;; appropriate function that sync's us so we're ready to enter
1429 ;; the loop.
1430 (cond ((parse-state-in-comment state)
1431 (do-start-quoted))
1432 ((parse-state-in-string state)
1433 (setf no-fence (not (eq (parse-state-in-string state) :st-string-style)))
1434 (if start-quoted
1435 (do-start-quoted-in-string)
1436 (do-start-in-string)))
1437 (start-quoted
1438 (do-start-quoted)))
1439 ;; (message "sane here")
1440 (case
1441 (catch :end
1442 (while (< from end)
1443 (catch :continue
1444 (inc-from)
1445 (setf code (&syntax-flags-syntax prev-from-syntax))
1446 ;; (message "here the code is ~s" code)
1447 (cond ((and (< from end)
1448 (&syntax-flags-comment-start-first prev-from-syntax)
1449 (progn
1450 (setf c1 (buffer-fetch-char from-aref buffer))
1451 (&syntax-comment-start-second c1 table)))
1452 ;; (message "here 1")
1453 ;; Record the comment style we have entered so that only
1454 ;; the comment-end sequence of the same style actually
1455 ;; terminates the comment section.
1456 (setf (parse-state-comment-style state) (&syntax-comment-style c1 table)
1457 comment-nested (&syntax-flags-comment-nested prev-from-syntax)
1458 comment-nested (or comment-nested
1459 (&syntax-comment-nested c1 table))
1460 (parse-state-in-comment state) comment-nested
1461 (parse-state-comment-string-start state) prev-from)
1462 (inc-from)
1463 (setf code :comment))
1464 ((eq code :comment-fence)
1465 ;; (message "here 2")
1466 ;; Record the comment style we have entered so that only
1467 ;; the comment-end sequence of the same style actually
1468 ;; terminates the comment section.
1469 (setf (parse-state-comment-style state) :st-comment-style
1470 (parse-state-in-comment state) -1 ; XXX
1471 (parse-state-comment-string-start state) prev-from
1472 code :comment))
1473 ((eq code :comment)
1474 ;; (message "here 3")
1475 (setf (parse-state-comment-style state) (&syntax-flags-comment-style prev-from-syntax)
1476 (parse-state-in-comment state) (&syntax-flags-comment-nested prev-from-syntax)
1477 (parse-state-comment-string-start state) prev-from)))
1479 (when (&syntax-flags-prefix prev-from-syntax)
1480 (throw :continue nil))
1482 ;;(message "code: ~s" code)
1483 (case code
1484 ((:escape :character-quote)
1485 ;; this arg means stop at sexp start
1486 (when stop-before (throw :end :stop))
1487 ;;(message ":escae ~s" (parse-state-level-starts state))
1488 (setf (syntax-level-last (cur-level)) prev-from)
1489 (do-start-quoted))
1491 ((:word-constituent :symbol-constituent)
1492 (when stop-before (throw :end :stop))
1493 ;;(message ":word-con ~s" (parse-state-level-starts state))
1494 (setf (syntax-level-last (cur-level)) prev-from)
1495 (do-sym-started))
1497 ((:comment-fence :comment)
1498 (when (or comment-stop
1499 boundary-stop)
1500 (throw :end :done))
1501 (do-start-in-comment))
1503 (:open
1504 (when stop-before (throw :end :stop))
1505 (incf depth)
1506 ;;(message ":open ~s" (parse-state-level-starts state))
1507 (setf (syntax-level-last (cur-level)) prev-from)
1508 ;; (message ":open ~a" (parse-state-level-starts state))
1509 (push (make-syntax-level) levels)
1510 ;; (when (> (length level-list) 100) ; XXX hardcoded
1511 ;; (error "nesting too deep for parser"))
1512 (when (= target-depth depth) (throw :end :done)))
1514 (:close
1515 (decf depth)
1516 (when (< depth min-depth)
1517 (setf min-depth depth))
1518 (unless (= (length levels) 1)
1519 (message "XXX: popping when levels is 1!")
1520 (pop levels))
1521 (setf (syntax-level-prev (cur-level)) (syntax-level-last (cur-level)))
1522 (when (= target-depth depth)
1523 (throw :end :done)))
1525 ((:string :string-fence)
1526 (setf (parse-state-comment-string-start state) (1- from))
1527 (when stop-before
1528 (throw :end :stop))
1529 (setf (syntax-level-last (cur-level)) prev-from)
1530 (setf (parse-state-in-string state) (if (eq code :string)
1531 (buffer-fetch-char prev-from-aref buffer)
1532 :st-string-style))
1533 (when boundary-stop
1534 (throw :end :done))
1535 (do-start-in-string))
1537 (:math
1538 ;; FIXME: We should do something with it.
1541 ;; Ignore whitespace, punctuation, quote, endcomment.
1542 ))))
1543 :done)
1544 (:stop
1545 ;; Here if stopping before start of sexp.
1546 ;; We have just fetched the char that starts it
1547 ;; but return the position before it.
1548 (setf from prev-from))
1549 (:end-quoted
1550 (setf (parse-state-quoted state) t)))
1552 ;;(message ":end ~s" (parse-state-level-starts state))
1553 ;; done
1554 (setf (parse-state-depth state) depth
1555 (parse-state-min-depth state) min-depth
1556 (parse-state-this-level-start state) (syntax-level-prev (cur-level))
1557 (parse-state-prev-level-start state) (if (<= (length levels) 1)
1558 nil (syntax-level-last (second levels)))
1559 (parse-state-location state) from
1560 (parse-state-level-starts state) (mapcar 'syntax-level-last (cdr levels)))
1561 state)))
1563 (defun parse-partial-sexp (from to &key (target-depth -100000) stop-before old-state comment-stop &aux (buffer (current-buffer)) (table (syntax-table)))
1564 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1565 Parsing stops at TO or when certain criteria are met;
1566 point is set to where parsing stops.
1567 If fifth arg OLDSTATE is omitted or nil,
1568 parsing assumes that FROM is the beginning of a function.
1569 Value is a list of elements describing final state of parsing:
1570 0. depth in parens. parse-state-depth
1571 1. character address of start of innermost containing list; nil if none. parse-state-prev-level-start
1572 2. character address of start of last complete sexp terminated. parse-state-this-level-start
1573 3. non-nil if inside a string.
1574 (it is the character that will terminate the string,
1575 or t if the string should be terminated by a generic string delimiter.) parse-state-in-string
1576 4. nil if outside a comment, t if inside a non-nestable comment,
1577 else an integer (the current comment nesting). parse-state-in-comment
1578 5. t if following a quote character. parse-state-quoted
1579 6. the minimum paren-depth encountered during this scan. parse-state-min-depth
1580 7. t if in a comment of style b; symbol `syntax-table' if the comment
1581 should be terminated by a generic comment delimiter. parse-state-comment-style
1582 8. character address of start of comment or string; nil if not in one. parse-state-in-comment
1583 9. Intermediate data for continuation of parsing (subject to change). parse-state-level-starts
1584 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1585 in parentheses becomes equal to TARGETDEPTH.
1586 Fourth arg STOPBEFORE non-nil means stop when come to
1587 any character that starts a sexp.
1588 Fifth arg OLDSTATE is a list like what this function returns.
1589 It is used to initialize the state of the parse. Elements number 1, 2, 6
1590 and 8 are ignored.
1591 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1592 If it is symbol `syntax-table', stop after the start of a comment or a
1593 string, or after end of a comment or a string."
1594 (check-type target-depth number)
1595 (multiple-value-setq (from to) (validate-region from to buffer))
1596 (let ((state (scan-sexps-forward from (buffer-char-to-aref buffer from) to
1597 target-depth (not (null stop-before)) old-state
1598 ;; XXX
1599 (if comment-stop
1600 (if (eq comment-stop 'syntax-table) -1 1)
1602 buffer table)))
1603 (goto-char (parse-state-location state) buffer)
1604 state))