[lice @ massive rearrangement to get rid of compiler warnings and mimic the file...
[lice.git] / syntax.lisp
blob83b9a7b9c87bb35aedc79108bbc1de003a60dd2a
1 ;;; Cheap syntax functions
3 (in-package "LICE")
5 (defparameter +syntax-classes+
6 '(:whitespace :punctuation :word-constituent :symbol-constituent :open :close :quote :string :math :escape
7 :character-quote :comment :end-comment :inherit :comment-fence :string-fence))
9 (deftype syntax-class ()
10 '(member :whitespace ; for a whitespace character
11 :punctuation ; for random punctuation characters
12 :word-constituent ; for a word constituent
13 :symbol-constituent ; symbol constituent but not word constituent
14 :open ; for a beginning delimiter
15 :close ; for an ending delimiter
16 :quote ; for a prefix character like Lisp '
17 :string ; for a string-grouping character like Lisp "
18 :math ; for delimiters like $ in Tex.
19 :escape ; for a character that begins a C-style escape
20 :character-quote ; for a character that quotes the
21 ; following character
22 :comment ; for a comment-starting character
23 :end-comment ; for a comment-ending character
24 :inherit ; use the standard syntax table for
25 ; this character
26 :comment-fence ; Starts/ends comment which is
27 ; delimited on the other side by any
28 ; char with the same syntaxcode.
29 :string-fence ; Starts/ends string which is delimited
30 ; on the other side by any char with
31 ; the same syntaxcode.
34 (defclass syntax-table ()
35 ((hash :initform (make-hash-table :test 'equal) :initarg :hash :accessor syntax-table-hash)
36 (parent :initform nil :initarg :parent :accessor syntax-table-parent))
37 (:documentation "A syntax table class."))
39 (defstruct syntax-descriptor
40 (class nil :type syntax-class)
41 ;; FIXME: this will be a bitvector at some point but for now just
42 ;; make it a list.
43 (flags nil :type list)
44 ;; this is where the terminator paren char etc is stored
45 extra)
47 (defun set-raw-syntax-entry (table char descriptor)
48 (setf (gethash char (syntax-table-hash table)) descriptor))
50 (defvar *syntax-code-object* (loop for i in +syntax-classes+
51 collect i
52 collect (make-syntax-descriptor :class i :flags nil))
53 "A pool of syntax descriptors to be shared in the standard
54 syntax table in an attempt to save memory. FIXME: premature
55 optimization?")
57 (defvar *standard-syntax-table*
58 (let ((table (make-instance 'syntax-table :parent nil))
59 tmp)
60 ;; Control characters should not be whitespace.
61 (setf tmp (getf *syntax-code-object* :punctuation))
62 (loop for i below (char-code #\Space) do
63 (set-raw-syntax-entry table (code-char i) tmp))
64 ;; Except that a few really are whitespace.
65 (setf tmp (getf *syntax-code-object* :whitespace))
66 (set-raw-syntax-entry table #\Space tmp)
67 (set-raw-syntax-entry table #\Tab tmp)
68 (set-raw-syntax-entry table #\Newline tmp)
69 (set-raw-syntax-entry table #\Return tmp)
70 (set-raw-syntax-entry table #\Page tmp)
72 (setf tmp (getf *syntax-code-object* :word-constituent))
73 (loop
74 for i from (char-code #\a) to (char-code #\z)
75 for j from (char-code #\A) to (char-code #\Z) do
76 (set-raw-syntax-entry table (code-char i) tmp)
77 (set-raw-syntax-entry table (code-char j) tmp))
78 (loop for i from (char-code #\0) to (char-code #\9) do
79 (set-raw-syntax-entry table (code-char i) tmp))
81 (set-raw-syntax-entry table #\$ tmp)
82 (set-raw-syntax-entry table #\% tmp)
84 (set-raw-syntax-entry table #\( (make-syntax-descriptor :class :open :extra ")"))
85 (set-raw-syntax-entry table #\) (make-syntax-descriptor :class :close :extra "("))
86 (set-raw-syntax-entry table #\[ (make-syntax-descriptor :class :open :extra "]"))
87 (set-raw-syntax-entry table #\] (make-syntax-descriptor :class :close :extra "["))
88 (set-raw-syntax-entry table #\{ (make-syntax-descriptor :class :open :extra "}"))
89 (set-raw-syntax-entry table #\} (make-syntax-descriptor :class :close :extra "{"))
91 (set-raw-syntax-entry table #\" (make-syntax-descriptor :class :string))
92 (set-raw-syntax-entry table #\\ (make-syntax-descriptor :class :escape))
94 (setf tmp (getf *syntax-code-object* :symbol-constituent))
95 (loop for i across "_-+*/&|<>=" do
96 (set-raw-syntax-entry table i tmp))
98 (setf tmp (getf *syntax-code-object* :punctuation))
99 (loop for i across ".,;:?!#@~^'`" do
100 (set-raw-syntax-entry table i tmp))
101 ;; TODO: i18n characters
102 table)
103 "The standard syntax table")
105 (defun make-syntax-table (&optional (parent *standard-syntax-table*))
106 "Return a new syntax table.
107 Create a syntax table which inherits from parent (if non-nil) or
108 from `standard-syntax-table' otherwise."
109 (make-instance 'syntax-table
110 :parent parent))
112 (defun copy-syntax-table (&optional (table *standard-syntax-table*))
113 "Construct a new syntax table and return it.
114 It is a copy of the TABLE, which defaults to the standard syntax table."
115 (let* ((hash (make-hash-table)))
116 (maphash (lambda (k v)
117 (setf (gethash k hash) v))
118 (syntax-table-hash table))
119 (make-instance 'syntax-table
120 :hash hash :parent (syntax-table-parent table))))
122 (defun syntax-table (&aux (buffer (current-buffer)))
123 (buffer-syntax-table buffer))
125 (defun modify-syntax-entry (char class &key flags extra (table (syntax-table)))
126 "Set syntax for character CHAR according to CLASS, FLAGS, and EXTRA."
127 (check-type char character)
128 (check-type class syntax-class)
129 (check-type flags list)
130 (check-type table syntax-table)
131 (set-raw-syntax-entry table char
132 (make-syntax-descriptor :class class :flags flags :extra extra)))
134 (defun char-syntax (character &optional (table (syntax-table)))
135 "Return the syntax code of CHARACTER, described by a character.
136 For example, if CHARACTER is a word constituent,
137 the symbol `:WORD-CONSTITUENT' is returned."
138 (let ((descr (gethash character (syntax-table-hash table))))
139 (when descr
140 (syntax-descriptor-class descr))))
142 (defparameter +word-constituents+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
144 (defvar *words-include-escapes* nil
145 "Non-nil means `forward-word', etc., should treat escape chars part of words.")
147 (defun syntax-after (pos &aux (buffer (current-buffer)))
148 "Return the raw syntax of the char after POS.
149 If POS is outside the buffer's accessible portion, return nil."
150 (unless (or (< pos (begv buffer)) (>= pos (zv buffer)))
151 (let* ((ch (buffer-char-after buffer pos))
152 (descr (and ch (gethash ch (syntax-table buffer)))))
153 descr)))
155 ;; FIXME: having the flags as a list is memory intensive. How about a
156 ;; bit vector or number and a function that converts between the two?
158 (defun (setf syntax-table) (value &aux (buffer (current-buffer)))
159 "Select a new syntax table for the current buffer. One argument, a syntax table."
160 (check-type value syntax-table)
161 (setf (buffer-syntax-table buffer) value))
163 (depricate set-syntax-table (setf syntax-table))
164 (defun set-syntax-table (value)
165 "Select a new syntax table for the current buffer. One argument, a syntax table."
166 (setf (syntax-table) value))
168 (defun &syntax-with-flags (ch table &optional (default :whitespace))
169 (or (gethash ch (syntax-table-hash table))
170 ;; try the parent
171 (and (syntax-table-parent table)
172 (&syntax-with-flags ch (syntax-table-parent table) default))
173 ;; return the default
174 (make-syntax-descriptor :class default)))
176 (defun &syntax (ch table &optional (default :whitespace))
177 (let ((descr (&syntax-with-flags ch table default)))
178 (if descr
179 (syntax-descriptor-class descr)
180 default)))
182 (defun &syntax-flags-syntax (syntax)
183 (syntax-descriptor-class syntax))
185 (defun &syntax-comment-start-first (ch table)
186 (let ((descr (&syntax-with-flags ch table)))
187 (when descr
188 (and (find :comment-start-first (syntax-descriptor-flags descr)) t))))
190 (defun &syntax-flags-comment-start-first (syntax)
191 (and (find :comment-start-first (syntax-descriptor-flags syntax)) t))
193 (defun &syntax-comment-start-second (ch table)
194 (let ((descr (&syntax-with-flags ch table)))
195 (when descr
196 (and (find :comment-start-second (syntax-descriptor-flags descr)) t))))
198 (defun &syntax-flags-comment-start-second (syntax)
199 (and (find :comment-start-second (syntax-descriptor-flags syntax)) t))
201 (defun &syntax-comment-end-first (ch table)
202 (let ((descr (&syntax-with-flags ch table)))
203 (when descr
204 (and (find :comment-end-first (syntax-descriptor-flags descr)) t))))
206 (defun &syntax-flags-comment-end-first (syntax)
207 (and (find :comment-end-first (syntax-descriptor-flags syntax)) t))
209 (defun &syntax-comment-end-second (ch table)
210 (let ((descr (&syntax-with-flags ch table)))
211 (when descr
212 (and (find :comment-end-second (syntax-descriptor-flags descr)) t))))
214 (defun &syntax-flags-comment-end-second (syntax)
215 (and (find :comment-end-second (syntax-descriptor-flags syntax)) t))
217 (defun &syntax-prefix (ch table)
218 (let ((descr (&syntax-with-flags ch table)))
219 (when descr
220 (and (find :prefix (syntax-descriptor-flags descr)) t))))
222 (defun &syntax-flags-prefix (syntax)
223 (and (find :prefix (syntax-descriptor-flags syntax)) t))
225 (defun &syntax-comment-style (ch table)
226 (let ((descr (&syntax-with-flags ch table)))
227 (when descr
228 (and (find :comment-style (syntax-descriptor-flags descr)) t))))
230 (defun &syntax-flags-comment-style (syntax)
231 (and (find :comment-style (syntax-descriptor-flags syntax)) t))
233 (defun &syntax-comment-nested (ch table)
234 (let ((descr (&syntax-with-flags ch table)))
235 (when descr
236 (and (find :comment-nested (syntax-descriptor-flags descr)) t))))
238 (defun &syntax-flags-comment-nested (syntax)
239 (and (find :comment-nested (syntax-descriptor-flags syntax)) t))
241 (defmacro with-syntax-table (table &body body)
242 "Evaluate BODY with syntax table of current buffer set to TABLE.
243 The syntax table of the current buffer is saved, BODY is evaluated, and the
244 saved table is restored, even in case of an abnormal exit.
245 Value is what BODY returns."
246 (let ((old-table (gensym "TABLE"))
247 (old-buffer (gensym "BUFFER")))
248 `(let ((,old-table (syntax-table))
249 (,old-buffer (current-buffer)))
250 (unwind-protect
251 (progn
252 (set-syntax-table ,table)
253 ,@body)
254 (save-current-buffer
255 (set-buffer ,old-buffer)
256 (set-syntax-table ,old-table))))))
258 (defun syntax-class (syntax)
259 "Return the syntax class part of the syntax descriptor SYNTAX.
260 If SYNTAX is nil, return nil."
261 (&syntax-flags-syntax syntax))
263 (defun scan-words (from count)
264 "Return the position across COUNT words from FROM.
265 If that many words cannot be found before the end of the buffer,
266 return NIL. COUNT negative means scan backward and stop at word
267 beginning."
268 (let* ((buffer (current-buffer))
269 (beg (begv buffer))
270 (end (zv buffer))
271 (from-aref (buffer-char-to-aref buffer from))
272 (table (syntax-table))
273 ch code)
274 (while (> count 0)
275 (loop
276 (when (= from end)
277 (return-from scan-words nil))
278 (setf ch (buffer-fetch-char from-aref buffer)
279 code (&syntax ch table))
280 (inc-both from from-aref buffer)
281 (when (or (and *words-include-escapes*
282 (or (eq code :escape)
283 (eq code :character-quote)))
284 (eq code :word-constituent))
285 (return nil)))
286 (loop
287 (when (= from end)
288 (return nil))
289 (setf ch (buffer-fetch-char from-aref buffer)
290 code (&syntax from-aref table))
291 (when (and (and (not *words-include-escapes*)
292 (or (eq code :escape)
293 (eq code :character-quote)))
294 (or (not (eq code :word-constituent))))
295 ;; (word-boundary-p ..)
296 (return nil))
297 (inc-both from from-aref buffer))
298 (decf count))
299 (while (< count 0)
300 (loop
301 (when (= from beg)
302 (return-from scan-words nil))
303 (dec-both from from-aref buffer)
304 (setf ch (buffer-fetch-char from-aref buffer)
305 code (&syntax ch table))
306 (when (or (and *words-include-escapes*
307 (or (eq code :escape)
308 (eq code :character-quote)))
309 (eq code :word-constituent))
310 (return nil)))
311 (loop
312 (when (= from beg)
313 (return nil))
314 (setf ch (buffer-fetch-char from-aref buffer)
315 code (&syntax ch table))
316 (when (and (and (not *words-include-escapes*)
317 (or (eq code :escape)
318 (eq code :character-quote)))
319 (not (eq code :word-constituent)))
320 (return nil))
321 (dec-both from from-aref buffer))
322 (incf count))
323 from))
325 (defvar *parse-sexp-ignore-comments* t
326 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.")
328 (defvar *open-paren-in-column-0-is-defun-start* t
329 "*Non-nil means an open paren in column 0 denotes the start of a defun.")
331 ;; Conditions used by the syntax code because the parsing is so bloody
332 ;; hairy.
333 (define-condition syntax-done () ())
334 (define-condition syntax-lossage () ())
336 (define-condition expression-ends-prematurely (lice-condition)
338 (define-condition unbalanced-parenthesis (lice-condition)
339 ((last-good :initarg :last-good :accessor unbalanced-parenthesis-last-good)
340 (from :initarg :from :accessor unbalanced-parenthesis-from)))
342 (defun &char-quoted (char-pos aref-pos buffer table)
343 "Returns TRUE if char at CHAR-POS is quoted.
344 Global syntax-table data should be set up already to be good at CHAR-POS
345 or after. On return global syntax data is good for lookup at CHAR-POS."
346 (let ((beg (begv buffer))
347 quoted)
348 (dec-both char-pos aref-pos buffer)
349 (while (>= char-pos beg)
350 (let* ((ch (buffer-fetch-char aref-pos buffer))
351 (code (&syntax ch table)))
352 (when (not (find code '(:character-quote :escape)))
353 (return nil))
354 (dec-both char-pos aref-pos buffer)
355 (setf quoted (not quoted))))
356 quoted))
358 (defstruct parse-state
359 depth min-depth
360 this-level-start
361 prev-level-start
362 location
363 level-starts
364 quoted
365 in-comment
366 comment-style
367 comment-string-start
368 in-string
369 start-value
370 start-value-aref)
372 (defun find-defun-start (pos pos-aref buffer table)
373 "Return a defun-start position before POS and not too far before.
374 It should be the last one before POS, or nearly the last.
376 When open_paren_in_column_0_is_defun_start is nonzero,
377 only the beginning of the buffer is treated as a defun-start.
379 We record the information about where the scan started
380 and what its result was, so that another call in the same area
381 can return the same value very quickly.
383 There is no promise at which position the global syntax data is
384 valid on return from the subroutine, so the caller should explicitly
385 update the global data."
386 (declare (ignore pos-aref))
387 (unless *open-paren-in-column-0-is-defun-start*
388 (return-from find-defun-start (make-parse-state :start-value (begv buffer)
389 :start-value-aref (begv-aref buffer))))
390 ;; Back up to start of line.
391 (let* ((begv (begv buffer))
392 (pt (buffer-scan-newline buffer pos (begv buffer) -1))
393 (pt-aref (buffer-char-to-aref buffer pt)))
394 (while (> pt begv)
395 (let ((ch (buffer-fetch-char pt-aref buffer)))
396 (when (eq (&syntax ch table) :open)
397 (return nil))
398 ;; Move to beg of previous line.
399 (setf pt (buffer-scan-newline buffer pt (begv buffer) -2)
400 pt-aref (buffer-char-to-aref buffer pt))))
401 ;; Return what we found
402 (make-parse-state :start-value pt
403 :start-value-aref pt-aref
404 :start-buffer buffer
405 ;; :modiff MODIFF
406 :start-begv (begv buffer)
407 :start-pos pos)))
409 ;; FIXME: doesn't handle ^. Maybe if :not is the first symbol in the list?
410 (defun skip-syntax-forward (syntax-list &optional (lim (zv)))
411 "Move point forward across chars in specified syntax classes.
412 SYNTAX-LIST is a string of syntax code characters.
413 Stop before a char whose syntax is not in SYNTAX-LIST, or at position LIM.
414 If SYNTAX-LIST starts with ^, skip characters whose syntax is NOT in SYNTAX-LIST.
415 This function returns the distance traveled, either zero or positive."
416 (check-type lim integer)
417 (let* ((buffer (current-buffer))
418 (table (syntax-table))
419 (pos (pt))
420 (start pos)
421 (pos-aref (buffer-char-to-aref buffer pos))
422 ch syntax)
423 (while (< pos lim)
424 (setf ch (buffer-fetch-char pos-aref buffer)
425 syntax (&syntax ch table))
426 (unless (find syntax syntax-list)
427 (return nil))
428 (inc-both pos pos-aref buffer))
429 (goto-char pos)
430 (- pos start)))
432 (defun skip-chars (forwardp syntaxp string lim)
433 (declare (ignore syntaxp))
434 (labels ((match-char (c negate ranges chars)
435 ;; check ranges
436 (catch :continue
437 (loop for r in ranges do
438 (if negate
439 (when (<= (car r) (char-code c) (cdr r))
440 (throw :done nil))
441 (when (<= (car r) (char-code c) (cdr r))
442 (throw :continue nil))))
443 ;; check chars
444 (if negate
445 (when (find c chars :test 'char=)
446 (throw :done nil))
447 (when (find c chars :test 'char=)
448 (throw :continue nil)))
449 (unless negate
450 ;; if the char fell through all that then we're done
451 (throw :done nil)))))
452 (check-type string string)
453 (check-number-coerce-marker lim)
454 (let ((idx 0)
455 negate
456 ranges
457 chars
458 (start-point (pt)))
459 ;; don't allow scan outside bounds of buffer.
460 (setf lim (min (max lim (begv)) (zv)))
462 (when (char= (char string 0) #\^)
463 (setf negate t)
464 (incf idx))
466 ;; compile the ranges and chars
467 (while (< idx (length string))
468 (let ((c (char string idx)))
470 ;; (if syntaxp
471 ;; ;; TODO: handle syntaxp
472 ;; nil
473 ;; (progn
474 ;; ;; TODO: handle iso classes
475 ;; ))
476 (when (char= c #\\)
477 (incf idx)
478 (when (= idx (length string))
479 (return nil))
480 (setf c (char string idx)))
481 (incf idx)
483 ;; Treat `-' as range character only if another character
484 ;; follows.
485 (if (and (< (1+ idx) (length string))
486 (char= (char string idx) #\-))
487 (progn
488 (incf idx)
489 (let* ((c2 (char string idx))
490 (code1 (char-code c))
491 (code2 (char-code c2)))
492 (when (<= code1 code2)
493 (push (cons code1 code2) ranges))
494 (incf idx)))
495 (progn
496 (push c chars)))))
497 ;; scan
498 (let* ((buffer (current-buffer))
499 (pos (pt buffer))
500 (pos-aref (buffer-char-to-aref buffer pos)))
501 (catch :done
502 (if forwardp
503 (while (< pos lim)
504 (match-char (buffer-fetch-char pos-aref buffer)
505 negate ranges chars)
506 (inc-both pos pos-aref buffer))
507 (progn
508 ;; do a little dance to end up in the right spot
509 (dec-both pos pos-aref buffer)
510 (unwind-protect
511 (while (> pos lim)
512 (match-char (buffer-fetch-char pos-aref buffer)
513 negate ranges chars)
514 (dec-both pos pos-aref buffer))
515 (inc-both pos pos-aref buffer)))))
516 (set-point pos buffer)
517 ;; return the number of chars we scanned
518 (- pos start-point)))))
520 (defun skip-chars-forward (string &optional (lim (zv)))
521 "Move point forward, stopping before a char not in string."
522 (skip-chars t nil string lim))
524 (defun skip-chars-backward (string &optional (lim (begv)))
525 "Move point backward, stopping after a char not in string."
526 (skip-chars nil nil string lim))
528 (defun skip-whitespace-forward (&optional (lim (zv)))
529 "Move point forward, stopping before a char that is not a space or tab."
530 (skip-chars-forward (coerce '(#\Space #\Tab) 'string) lim))
532 (defun &forward-comment (from from-aref stop nesting style prev-syntax buffer table)
533 "Jump over a comment, assuming we are at the beginning of one.
534 FROM is the current position.
535 FROM_BYTE is the bytepos corresponding to FROM.
536 Do not move past STOP (a charpos).
537 The comment over which we have to jump is of style STYLE
538 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
539 NESTING should be positive to indicate the nesting at the beginning
540 for nested comments and should be zero or negative else.
541 ST_COMMENT_STYLE cannot be nested.
542 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
543 (or nil If the search cannot start in the middle of a two-character).
545 If successful, return 1 and store the charpos of the comment's end
546 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
547 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
548 corresponding bytepos into *BYTEPOS_PTR and the current nesting
549 (as defined for state.incomment) in *INCOMMENT_PTR.
551 The comment end is the last character of the comment rather than the
552 character just after the comment.
554 Global syntax data is assumed to initially be valid for FROM and
555 remains valid for forward search starting at the returned position."
556 (let (c
558 code
559 (syntax prev-syntax))
560 (labels ((forward ()
561 (when (= from stop)
562 (return-from &forward-comment
563 (values nil from from-aref nesting)))
565 (setf c (buffer-fetch-char from-aref buffer)
566 code (&syntax c table)
567 syntax (&syntax-with-flags c table))
569 (when (and (eq code :end-comment)
570 (eq (&syntax-flags-comment-style syntax) style)
571 (if (&syntax-flags-comment-nested syntax)
572 (and (> nesting 0)
573 (progn (decf nesting)
574 (zerop nesting)))
575 (< nesting 0)))
576 ;; we have encountered a comment end of the same
577 ;; style as the comment sequence which began this
578 ;; comment section.
579 (throw :done nil))
580 (when (and (eq code :comment-fence)
581 (eq style :st-comment-style))
582 ;; we have encountered a comment end of the same style
583 ;; as the comment sequence which began this comment
584 ;; section.
585 (throw :done nil))
586 (when (and (> nesting 0)
587 (eq code :comment)
588 (&syntax-flags-comment-nested syntax)
589 (eq (&syntax-flags-comment-style syntax) style))
590 ;; we have encountered a nested comment of the same style
591 ;; as the comment sequence which began this comment section
592 (incf nesting))
593 (inc-both from from-aref buffer))
594 (do-comment ()
595 (when (and (< from stop)
596 (&syntax-flags-comment-end-first syntax)
597 (eq (&syntax-flags-comment-style syntax) style)
598 (progn
599 (setf c1 (buffer-fetch-char from-aref buffer))
600 (&syntax-comment-end-second c1 table))
601 (if (or (&syntax-flags-comment-nested syntax)
602 (&syntax-comment-nested c1 table))
603 (> nesting 0)
604 (< nesting 0)))
605 (decf nesting)
606 (if (<= nesting 0)
607 ;; we have encountered a comment end of the same style
608 ;; as the comment sequence which began this comment
609 ;; section
610 (throw :done nil)
611 (inc-both from from-aref buffer)))
612 (when (and (> nesting 0)
613 (< from stop)
614 (&syntax-flags-comment-start-first syntax)
615 (progn
616 (setf c1 (buffer-fetch-char from-aref buffer))
617 (eq (&syntax-comment-style c1 table) style))
618 (or (&syntax-flags-comment-nested syntax)
619 (&syntax-comment-nested c1 table)))
620 ;; we have encountered a nested comment of the same style
621 ;; as the comment sequence which began this comment
622 ;; section
623 (inc-both from from-aref buffer)
624 (incf nesting))))
625 ;; normalize nesting
626 (cond ((or (null nesting)
627 (<= nesting 0))
628 (setf nesting -1))
629 ((not (numberp nesting))
630 (setf nesting 1)))
631 ;; Enter the loop in the middle so that we find
632 ;; a 2-char comment ender if we start in the middle of it.
633 (catch :done
634 (if syntax
635 (progn
636 (do-comment)
637 (loop
638 (forward)
639 (do-comment)))
640 (loop
641 (forward)
642 (do-comment))))
643 (values t from from-aref))))
645 (defstruct syntax-level
646 last prev)
648 ;; this function cries out for continuations. you almost have to look
649 ;; at the C code to understand what's going on here, i bet. Hell, I
650 ;; don't even understand it.
651 (defun scan-sexps-forward (from from-aref end target-depth stop-before old-state comment-stop buffer table)
652 "Parse forward from FROM / FROM_BYTE to END,
653 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
654 and return a description of the state of the parse at END.
655 If STOPBEFORE is nonzero, stop at the start of an atom.
656 If COMMENTSTOP is 1, stop at the start of a comment.
657 If COMMENTSTOP is -1, stop at the start or end of a comment,
658 after the beginning of a string, or after the end of a string."
659 ;;(message "scan-sexps-forward ~@{~a ~}" from from-aref end target-depth stop-before old-state comment-stop buffer table)
660 (let ((state (make-parse-state))
661 (prev-from from)
662 (prev-from-aref from-aref)
663 prev-from-syntax
664 (boundary-stop (null comment-stop))
665 no-fence
667 code
668 comment-nested
669 depth
670 min-depth
671 temp
672 start-quoted
673 levels)
674 (labels ((inc-from ()
675 (setf prev-from from
676 prev-from-aref from-aref
677 temp (buffer-fetch-char prev-from-aref buffer)
678 prev-from-syntax (&syntax-with-flags temp table))
679 (inc-both from from-aref buffer))
680 (cur-level ()
681 "Return the current level struct"
682 (car levels))
683 (do-start-in-comment ()
684 ;; The (from == BEGV) test was to enter the loop in the middle so
685 ;; that we find a 2-char comment ender even if we start in the
686 ;; middle of it. We don't want to do that if we're just at the
687 ;; beginning of the comment (think of (*) ... (*)).
688 (multiple-value-bind (found out-char out-aref in-comment)
689 (&forward-comment from from-aref end
690 (parse-state-in-comment state)
691 (parse-state-comment-style state)
692 (if (or (eq from (begv buffer))
693 (< from (+ (parse-state-comment-string-start state) 3)))
694 nil prev-from-syntax)
695 buffer table)
696 (setf from out-char
697 from-aref out-aref
698 (parse-state-in-comment state) in-comment)
699 ;; Beware! prev_from and friends are invalid now.
700 ;; Luckily, the `done' doesn't use them and the INC_FROM
701 ;; sets them to a sane value without looking at them.
702 (unless found (throw :end :done))
703 (inc-from)
704 (setf (parse-state-in-comment state) nil
705 (parse-state-comment-style state) nil) ; reset the comment style
706 (when boundary-stop (throw :end :done))))
707 (do-sym-done ()
708 ;;(message "do-sym-done ~s" (parse-state-level-starts state))
709 (setf (syntax-level-prev (cur-level)) (syntax-level-last (cur-level))))
710 (do-sym-started ()
711 ;; (message "do-sym-started")
712 (while (< from end)
713 (case (&syntax (buffer-fetch-char from-aref buffer) table)
714 ((:escape :character-quote)
715 (inc-from)
716 (when (= from end)
717 (throw :end :end-quoted)))
718 ((:word-constituent :symbol-constituent :quote))
720 (do-sym-done)
721 (return nil)))
722 (inc-from)))
723 (do-start-quoted ()
724 (when (= from end) (throw :end :end-quoted))
725 (inc-from)
726 (do-sym-started))
727 (do-in-string-loop ()
728 (loop
729 (let (c)
730 (when (>= from end) (throw :end :done))
731 (setf c (buffer-fetch-char from-aref buffer)
732 temp (&syntax c table))
733 ;; Check TEMP here so that if the char has
734 ;; a syntax-table property which says it is NOT
735 ;; a string character, it does not end the string.
736 (when (and no-fence
737 (equal c (parse-state-in-string state))
738 (eq temp :string))
739 (return nil))
740 (case temp
741 (:string-fence
742 (unless no-fence (return nil)))
743 ((:character-quote :escape)
744 (inc-from)
745 (when (>= from end) (throw :end :end-quoted))))
746 (inc-from))))
747 (do-string-end ()
748 ;;(message "do-string-end ~s" (parse-state-level-starts state))
749 (setf (parse-state-in-string state) nil
750 (syntax-level-prev (cur-level)) (syntax-level-last (cur-level)))
751 (inc-from)
752 (when boundary-stop (throw :end :done)))
753 (do-start-in-string ()
754 (setf no-fence (not (eq (parse-state-in-string state) :st-string-style)))
755 (do-in-string-loop)
756 (do-string-end))
757 (do-start-quoted-in-string ()
758 (when (>= from end) (throw :end :end-quoted))
759 (inc-from)
760 (do-in-string-loop)))
762 (when (/= from (begv buffer))
763 (dec-both prev-from prev-from-aref buffer))
765 (if old-state
766 (progn
767 (setf state old-state
768 start-quoted (parse-state-quoted state)
769 depth (or (parse-state-depth state) 0)
770 start-quoted (parse-state-quoted state))
771 (dolist (i (parse-state-level-starts state))
772 (push (make-syntax-level :last i) levels))
773 ;; make sure we have at least one in the list
774 (unless levels
775 (push (make-syntax-level) levels)))
776 (setf depth 0
777 state (make-parse-state)
778 levels (list (make-syntax-level))))
780 ;;(message "top ~s" (parse-state-level-starts state))
782 (setf (parse-state-quoted state) nil
783 min-depth depth)
785 (setf temp (buffer-fetch-char prev-from-aref buffer)
786 prev-from-syntax (&syntax-with-flags temp table))
788 ;; "Enter" the loop at a place appropriate for initial state. In
789 ;; the C code this is a bunch of goto's. Here we call the
790 ;; appropriate function that sync's us so we're ready to enter
791 ;; the loop.
792 (cond ((parse-state-in-comment state)
793 (do-start-quoted))
794 ((parse-state-in-string state)
795 (setf no-fence (not (eq (parse-state-in-string state) :st-string-style)))
796 (if start-quoted
797 (do-start-quoted-in-string)
798 (do-start-in-string)))
799 (start-quoted
800 (do-start-quoted)))
801 ;; (message "sane here")
802 (case
803 (catch :end
804 (while (< from end)
805 (catch :continue
806 (inc-from)
807 (setf code (&syntax-flags-syntax prev-from-syntax))
808 ;; (message "here the code is ~s" code)
809 (cond ((and (< from end)
810 (&syntax-flags-comment-start-first prev-from-syntax)
811 (progn
812 (setf c1 (buffer-fetch-char from-aref buffer))
813 (&syntax-comment-start-second c1 table)))
814 ;; (message "here 1")
815 ;; Record the comment style we have entered so that only
816 ;; the comment-end sequence of the same style actually
817 ;; terminates the comment section.
818 (setf (parse-state-comment-style state) (&syntax-comment-style c1 table)
819 comment-nested (&syntax-flags-comment-nested prev-from-syntax)
820 comment-nested (or comment-nested
821 (&syntax-comment-nested c1 table))
822 (parse-state-in-comment state) comment-nested
823 (parse-state-comment-string-start state) prev-from)
824 (inc-from)
825 (setf code :comment))
826 ((eq code :comment-fence)
827 ;; (message "here 2")
828 ;; Record the comment style we have entered so that only
829 ;; the comment-end sequence of the same style actually
830 ;; terminates the comment section.
831 (setf (parse-state-comment-style state) :st-comment-style
832 (parse-state-in-comment state) -1 ; XXX
833 (parse-state-comment-string-start state) prev-from
834 code :comment))
835 ((eq code :comment)
836 ;; (message "here 3")
837 (setf (parse-state-comment-style state) (&syntax-flags-comment-style prev-from-syntax)
838 (parse-state-in-comment state) (&syntax-flags-comment-nested prev-from-syntax)
839 (parse-state-comment-string-start state) prev-from)))
841 (when (&syntax-flags-prefix prev-from-syntax)
842 (throw :continue nil))
844 ;;(message "code: ~s" code)
845 (case code
846 ((:escape :character-quote)
847 ;; this arg means stop at sexp start
848 (when stop-before (throw :end :stop))
849 ;;(message ":escae ~s" (parse-state-level-starts state))
850 (setf (syntax-level-last (cur-level)) prev-from)
851 (do-start-quoted))
853 ((:word-constituent :symbol-constituent)
854 (when stop-before (throw :end :stop))
855 ;;(message ":word-con ~s" (parse-state-level-starts state))
856 (setf (syntax-level-last (cur-level)) prev-from)
857 (do-sym-started))
859 ((:comment-fence :comment)
860 (when (or comment-stop
861 boundary-stop)
862 (throw :end :done))
863 (do-start-in-comment))
865 (:open
866 (when stop-before (throw :end :stop))
867 (incf depth)
868 ;;(message ":open ~s" (parse-state-level-starts state))
869 (setf (syntax-level-last (cur-level)) prev-from)
870 ;; (message ":open ~a" (parse-state-level-starts state))
871 (push (make-syntax-level) levels)
872 ;; (when (> (length level-list) 100) ; XXX hardcoded
873 ;; (error "nesting too deep for parser"))
874 (when (= target-depth depth) (throw :end :done)))
876 (:close
877 (decf depth)
878 (when (< depth min-depth)
879 (setf min-depth depth))
880 (unless (= (length levels) 1)
881 ;;(message "XXX: popping when levels is 1!")
882 (pop levels))
883 (setf (syntax-level-prev (cur-level)) (syntax-level-last (cur-level)))
884 (when (= target-depth depth)
885 (throw :end :done)))
887 ((:string :string-fence)
888 (setf (parse-state-comment-string-start state) (1- from))
889 (when stop-before
890 (throw :end :stop))
891 (setf (syntax-level-last (cur-level)) prev-from)
892 (setf (parse-state-in-string state) (if (eq code :string)
893 (buffer-fetch-char prev-from-aref buffer)
894 :st-string-style))
895 (when boundary-stop
896 (throw :end :done))
897 (do-start-in-string))
899 (:math
900 ;; FIXME: We should do something with it.
903 ;; Ignore whitespace, punctuation, quote, endcomment.
904 ))))
905 :done)
906 (:stop
907 ;; Here if stopping before start of sexp.
908 ;; We have just fetched the char that starts it
909 ;; but return the position before it.
910 (setf from prev-from))
911 (:end-quoted
912 (setf (parse-state-quoted state) t)))
914 ;;(message ":end ~s" (parse-state-level-starts state))
915 ;; done
916 (setf (parse-state-depth state) depth
917 (parse-state-min-depth state) min-depth
918 (parse-state-this-level-start state) (syntax-level-prev (cur-level))
919 (parse-state-prev-level-start state) (if (<= (length levels) 1)
920 nil (syntax-level-last (second levels)))
921 (parse-state-location state) from
922 (parse-state-level-starts state) (mapcar 'syntax-level-last (cdr levels)))
923 state)))
925 (defun &back-comment (from from-aref stop comment-nested comment-style buffer table)
926 "Checks whether charpos FROM is at the end of a comment.
927 FROM_BYTE is the bytepos corresponding to FROM.
928 Do not move back before STOP.
930 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
931 return -1 otherwise.
933 If successful, return the charpos of the comment's beginning, and the aref pos.
935 **Global syntax data remains valid for backward search starting at
936 **the returned value (or at FROM, if the search was not successful)."
937 ;; Look back, counting the parity of string-quotes,
938 ;; and recording the comment-starters seen.
939 ;; When we reach a safe place, assume that's not in a string;
940 ;; then step the main scan to the earliest comment-starter seen
941 ;; an even number of string quotes away from the safe place.
943 ;; OFROM[I] is position of the earliest comment-starter seen
944 ;; which is I+2X quotes from the comment-end.
945 ;; PARITY is current parity of quotes from the comment end.
946 (let ((string-style :none)
947 (string-lossage nil)
948 ;; Not a real lossage: indicates that we have passed a matching comment
949 ;; starter plus a non-matching comment-ender, meaning that any matching
950 ;; comment-starter we might see later could be a false positive (hidden
951 ;; inside another comment).
952 ;; Test case: { a (* b } c (* d *)
953 (comment-lossage nil)
954 (comment-end from)
955 (comment-end-aref from-aref)
956 (comment-start-pos 0)
957 comment-start-aref
958 ;; Place where the containing defun starts,
959 ;; or nil if we didn't come across it yet.
960 defun-start
961 defun-start-aref
962 code
963 (nesting 1) ; current comment nesting
965 (syntax nil))
966 (handler-case
967 (progn
968 ;; FIXME: A }} comment-ender style leads to incorrect behavior
969 ;; in the case of {{ c }}} because we ignore the last two chars which are
970 ;; assumed to be comment-enders although they aren't.
972 ;; At beginning of range to scan, we're outside of strings;
973 ;; that determines quote parity to the comment-end.
974 (while (/= from stop)
975 (catch :continue
976 (let (temp-aref prev-syntax com2start com2end)
977 (dec-both from from-aref buffer)
978 (setf prev-syntax syntax
979 ch (buffer-fetch-char from-aref buffer)
980 syntax (&syntax-with-flags ch table)
981 code (&syntax ch table)
982 ;; Check for 2-char comment markers.
983 com2start (and (&syntax-flags-comment-start-first syntax)
984 (&syntax-flags-comment-start-second prev-syntax)
985 (eq comment-style (&syntax-flags-comment-style prev-syntax))
986 (eq (or (&syntax-flags-comment-nested prev-syntax)
987 (&syntax-flags-comment-nested syntax))
988 comment-nested))
989 com2end (and (&syntax-flags-comment-end-first syntax)
990 (&syntax-flags-comment-end-second prev-syntax)))
991 ;; Nasty cases with overlapping 2-char comment markers:
992 ;; - snmp-mode: -- c -- foo -- c --
993 ;; --- c --
994 ;; ------ c --
995 ;; - c-mode: *||*
996 ;; |* *|* *|
997 ;; |*| |* |*|
998 ;; /// */
1000 ;; If a 2-char comment sequence partly overlaps with
1001 ;; another, we don't try to be clever.
1002 (when (and (> from stop)
1003 (or com2end com2start))
1004 (let ((next from)
1005 (next-aref from-aref)
1006 next-c
1007 next-syntax)
1008 (dec-both next next-aref buffer)
1009 (setf next-c (buffer-fetch-char next-aref buffer)
1010 next-syntax (&syntax-with-flags next-c table))
1011 (when (or (and (or com2start comment-nested)
1012 (&syntax-flags-comment-end-second syntax)
1013 (&syntax-flags-comment-end-first next-syntax))
1014 (and (or com2end comment-nested)
1015 (&syntax-flags-comment-start-second syntax)
1016 (eq comment-style (&syntax-flags-comment-style syntax))
1017 (&syntax-flags-comment-start-first next-syntax)))
1018 (signal 'syntax-lossage))))
1020 (when (and com2start
1021 (= comment-start-pos 0))
1022 ;; We're looking at a comment starter. But it might be a comment
1023 ;; ender as well (see snmp-mode). The first time we see one, we
1024 ;; need to consider it as a comment starter,
1025 ;; and the subsequent times as a comment ender.
1026 (setf com2end 0))
1028 ;; Turn a 2-char comment sequences into the appropriate syntax.
1029 (cond (com2end
1030 (setf code :end-comment))
1031 (com2start
1032 (setf code :comment))
1033 ;; Ignore comment starters of a different style.
1034 ((and (eq code :comment)
1035 (or (not (eq comment-style (&syntax-flags-comment-style syntax)))
1036 (not (eq comment-nested (&syntax-flags-comment-nested syntax)))))
1037 (throw :continue nil)))
1039 ;; Ignore escaped characters, except comment-enders.
1040 (when (and (not (eq code :end-comment))
1041 (&char-quoted from from-aref buffer table))
1042 (throw :continue nil))
1044 (case code
1045 ((:string-fence :comment-fence :string)
1046 (when (find code '(:string-fence :comment-fence))
1047 (setf ch (if (eq code :string-fence)
1048 :string-style
1049 :comment-style)))
1050 ;; Track parity of quotes.
1051 (cond ((eq string-style :none)
1052 ;; Entering a string.
1053 (setf string-style ch))
1054 ((eq string-style ch)
1055 ;; leaving the string
1056 (setf string-style :none))
1058 ;; If we have two kinds of string delimiters.
1059 ;; There's no way to grok this scanning backwards.
1060 (setf string-lossage t))))
1061 (:comment
1062 ;; We've already checked that it is the relevant comstyle.
1063 (when (or (eq string-style :none)
1064 comment-lossage
1065 string-lossage)
1066 ;; There are odd string quotes involved, so let's be careful.
1067 ;; Test case in Pascal: " { " a { " } */
1068 (signal 'syntax-lossage))
1069 (if (not comment-nested)
1070 ;; Record best comment-starter so far.
1071 (setf comment-start-pos from
1072 comment-start-aref from-aref)
1073 (progn
1074 (decf nesting)
1075 (when (<= nesting)
1076 ;; nested comments have to be balanced, so we don't need to
1077 ;; keep looking for earlier ones. We use here the same (slightly
1078 ;; incorrect) reasoning as below: since it is followed by uniform
1079 ;; paired string quotes, this comment-start has to be outside of
1080 ;; strings, else the comment-end itself would be inside a string.
1081 (signal 'syntax-done)))))
1082 (:end-comment
1083 (cond ((and (eq comment-style (&syntax-flags-comment-style syntax))
1084 (or (and com2end
1085 (&syntax-flags-comment-nested prev-syntax))
1086 (eq comment-nested (&syntax-flags-comment-nested syntax))))
1087 ;; This is the same style of comment ender as ours.
1088 (if comment-nested
1089 (incf nesting)
1090 ;; Anything before that can't count because it would match
1091 ;; this comment-ender rather than ours.
1092 (setf from stop)))
1093 ((or (/= comment-start-pos 0)
1094 (char/= ch #\Newline))
1095 ;; We're mixing comment styles here, so we'd better be careful.
1096 ;; The (comstart_pos != 0 || c != '\n') check is not quite correct
1097 ;; (we should just always set comment_lossage), but removing it
1098 ;; would imply that any multiline comment in C would go through
1099 ;; lossage, which seems overkill.
1100 ;; The failure should only happen in the rare cases such as
1101 ;; { (* } *)
1102 (setf comment-lossage t))))
1103 (:open
1104 ;; Assume a defun-start point is outside of strings.
1105 (when (and *open-paren-in-column-0-is-defun-start*
1106 (or (= from stop)
1107 (progn
1108 (setf temp-aref (aref-minus-1 from-aref buffer))
1109 (char= (buffer-fetch-char temp-aref buffer) #\Newline))))
1110 (setf defun-start from
1111 defun-start-aref from-aref
1112 ;; Break out of the loop.
1113 from stop)))))))
1115 (if (= comment-start-pos 0)
1116 (setf from comment-end
1117 from-aref comment-end-aref)
1118 ;; If comstart_pos is set and we get here (ie. didn't jump to `lossage'
1119 ;; or `done'), then we've found the beginning of the non-nested comment.
1120 (setf from comment-start-pos
1121 from-aref comment-start-aref)))
1122 (syntax-lossage ()
1123 ;; We had two kinds of string delimiters mixed up
1124 ;; together. Decode this going forwards.
1125 ;; Scan fwd from a known safe place (beginning-of-defun)
1126 ;; to the one in question; this records where we
1127 ;; last passed a comment starter.
1128 ;; If we did not already find the defun start, find it now.
1129 (when (= defun-start 0)
1130 (let ((ret (find-defun-start comment-end comment-end-aref buffer table)))
1131 (setf defun-start (parse-state-start-value ret)
1132 defun-start-aref (parse-state-start-value-aref ret))))
1133 (loop do
1134 (let ((state (scan-sexps-forward defun-start defun-start-aref
1135 comment-end -10000 0 nil 0 buffer table)))
1136 (setf defun-start comment-end)
1137 (if (and (eq (parse-state-in-comment state) comment-nested)
1138 (eq (parse-state-comment-style state) comment-style))
1139 (setf from (parse-state-comment-string-start state))
1140 (progn
1141 (setf from comment-end)
1142 (when (parse-state-in-comment state) ; XXX
1143 ;; If comment_end is inside some other comment, maybe ours
1144 ;; is nested, so we need to try again from within the
1145 ;; surrounding comment. Example: { a (* " *)
1146 (setf defun-start (+ (parse-state-comment-string-start state) 2)
1147 defun-start-aref (buffer-char-to-aref buffer defun-start))))))
1148 while (< defun-start comment-end))
1149 (setf from-aref (buffer-char-to-aref buffer from))))
1150 (values (if (= from comment-end) -1 from)
1151 from-aref)))
1153 (defun prev-char-comment-end-first (pos pos-aref buffer table)
1154 "Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE."
1155 (dec-both pos pos-aref buffer)
1156 (&syntax-comment-end-first (buffer-fetch-char pos-aref buffer)
1157 table))
1159 (defun &scan-lists (from count depth sexpflag &aux (buffer (current-buffer)))
1160 "This is from the emacs function"
1161 ;; the big TODO here is to use the CL readtable
1162 (labels ((lose (last-good from)
1163 (signal 'unbalanced-parenthesis :last-good last-good :from from)))
1164 (let ((stop (if (> count 0) (zv buffer) (begv buffer)))
1165 (from-aref (buffer-char-to-aref buffer from))
1166 (min-depth (min 0 depth))
1167 (table (syntax-table))
1168 (last-good from)
1169 quoted
1170 math-exit
1171 comment-start-first
1172 code
1173 ch ch1
1174 temp-code
1175 temp-pos
1176 comment-nested
1177 comment-style
1178 found
1179 prefix)
1180 ;; normalize FROM
1181 (setf from (max (min (zv buffer) from)
1182 (begv buffer)))
1183 (while (> count 0)
1184 ;; the code needs to be able to jump out of the mess it got
1185 ;; in.
1186 (handler-case
1187 (progn
1188 (while (< from stop)
1189 (catch :continue
1190 (setf ch (buffer-fetch-char from-aref buffer)
1191 code (&syntax ch table)
1192 comment-start-first (&syntax-comment-start-first ch table)
1193 comment-nested (&syntax-comment-nested ch table)
1194 comment-style (&syntax-comment-style ch table)
1195 prefix (&syntax-prefix ch table))
1196 (when (= depth min-depth)
1197 (setf last-good from))
1198 (inc-both from from-aref buffer)
1199 (when (and (< from stop) comment-start-first
1200 (progn (setf ch (buffer-fetch-char from-aref buffer))
1201 (&syntax-comment-start-second ch table))
1202 *parse-sexp-ignore-comments*)
1203 ;; we have encountered a comment start sequence and
1204 ;; we are ignoring all text inside comments. We
1205 ;; must record the comment style this sequence
1206 ;; begins so that later, only a comment end of the
1207 ;; same style actually ends the comment section
1208 (setf code :comment
1209 ch1 (buffer-fetch-char from-aref buffer)
1210 comment-style (&syntax-comment-style ch1 table)
1211 comment-nested (or comment-nested
1212 (&syntax-comment-nested ch1 table)))
1213 (inc-both from from-aref buffer))
1215 (when prefix
1216 (throw :continue nil))
1218 (when (or (eq code :escape)
1219 (eq code :character-quote))
1220 (when (= from stop) (lose last-good from))
1221 (inc-both from from-aref buffer)
1222 ;; treat following character as a word constituent
1223 (setf code :word-constituent))
1225 (case code
1226 ((:word-constituent :symbol-constituent)
1227 (unless (or (not (zerop depth))
1228 (not sexpflag))
1229 (let (temp)
1230 (while (< from stop)
1231 (setf ch (buffer-fetch-char from-aref buffer)
1232 temp (&syntax ch table))
1233 (case temp
1234 ((:escape :character-quote)
1235 (inc-both from from-aref buffer)
1236 (when (= from stop) (lose last-good from)))
1237 ((:word-constituent :symbol-constituent :quote))
1239 (signal 'syntax-done)))
1240 (inc-both from from-aref buffer)))
1241 (signal 'syntax-done)))
1242 ((:comment-fence :comment)
1243 (when (eq code :comment-fence)
1244 (setf comment-style :st-comment-style))
1245 (multiple-value-setq (found from from-aref) (&forward-comment from from-aref stop comment-nested comment-style nil buffer table))
1246 (unless found
1247 (when (zerop depth) (signal 'syntax-done))
1248 (lose last-good from))
1249 (inc-both from from-aref buffer))
1250 (:math
1251 (when sexpflag
1252 (when (and (/= from stop)
1253 (char= ch (buffer-fetch-char from-aref buffer)))
1254 (inc-both from from-aref buffer))
1255 (if math-exit
1256 (progn
1257 (setf math-exit nil)
1258 (decf depth)
1259 (when (zerop depth) (signal 'syntax-done))
1260 (when (< depth min-depth)
1261 (signal 'expression-ends-prematurely))) ; XXX
1262 (progn
1263 (setf math-exit t)
1264 (incf depth)
1265 (when (zerop depth) (signal 'syntax-done))))))
1266 (:open
1267 (incf depth)
1268 (when (zerop depth) (signal 'syntax-done)))
1269 (:close
1270 (decf depth)
1271 (when (zerop depth) (signal 'syntax-done))
1272 (when (< depth min-depth)
1273 (signal 'expression-ends-prematurely)))
1274 ((:string :string-fence)
1275 (let* ((tmp-pos (aref-minus-1 from-aref buffer))
1276 (string-term (buffer-fetch-char tmp-pos buffer))
1277 temp)
1278 (loop
1279 (when (>= from stop) (lose last-good from))
1280 (setf ch (buffer-fetch-char from-aref buffer))
1281 (when (if (eq code :string)
1282 (and (char= ch string-term)
1283 (eq (&syntax ch table) :string))
1284 (eq (&syntax ch table) :string-fence))
1285 (return nil))
1286 (setf temp (&syntax ch table))
1287 (case temp
1288 ((:character-quote :escape)
1289 (inc-both from from-aref buffer)))
1290 (inc-both from from-aref buffer))
1291 (inc-both from from-aref buffer)
1292 (when (and (zerop depth)
1293 sexpflag)
1294 (signal 'syntax-done))))
1296 ;; Ignore whitespace, punctuation, quote, endcomment.
1297 ))))
1298 (unless (zerop depth) (lose last-good from))
1299 (return-from &scan-lists nil))
1300 (syntax-done ()
1301 (decf count))))
1303 (while (< count 0)
1304 (handler-case
1305 (progn
1306 (while (> from stop)
1307 (catch :continue
1308 (dec-both from from-aref buffer)
1309 (setf ch (buffer-fetch-char from-aref buffer)
1310 code (&syntax ch table))
1311 (when (= depth min-depth)
1312 (setf last-good from))
1313 (setf comment-style nil
1314 comment-nested (&syntax-comment-nested ch table))
1315 (when (eq code :end-comment)
1316 (setf comment-style (&syntax-comment-style ch table)))
1317 (when (and (> from stop)
1318 (&syntax-comment-end-second ch table)
1319 (prev-char-comment-end-first from from-aref buffer table)
1320 *parse-sexp-ignore-comments*)
1321 ;; We must record the comment style
1322 ;; encountered so that later, we can match
1323 ;; only the proper comment begin sequence of
1324 ;; the same style.
1325 (dec-both from from-aref buffer)
1326 (setf code :end-comment
1327 ch1 (buffer-fetch-char from-aref buffer)
1328 comment-nested (or comment-nested
1329 (&syntax-comment-nested ch1 table))))
1330 ;; Quoting turns anything except a comment-ender
1331 ;; into a word character. Note that this cannot
1332 ;; be true if we decremented FROM in the
1333 ;; if-statement above.
1334 (cond
1335 ((and (not (eq code :end-comment))
1336 (&char-quoted from from-aref buffer table))
1337 (dec-both from from-aref buffer)
1338 (setf code :word))
1339 ((&syntax-prefix ch table)
1340 ;; loop around again. I think this is nasty but fuckit.
1341 (throw :continue nil)))
1342 (case code
1343 ((:word-constituent :symbol-constituent :escape :character-quote)
1344 (unless (or (not (zerop depth))
1345 (not sexpflag))
1346 ;; This word counts as a sexp; count
1347 ;; object finished after passing it.
1348 (while (> from stop)
1349 (setf temp-pos from-aref)
1350 (decf temp-pos)
1351 (setf ch1 (buffer-fetch-char temp-pos buffer)
1352 temp-code (&syntax ch1 table))
1353 ;; Don't allow comment-end to be quoted.
1354 (when (eq temp-code :end-comment)
1355 (signal 'syntax-done))
1356 (setf quoted (&char-quoted (1- from) temp-pos buffer table))
1357 (when quoted
1358 (dec-both from from-aref buffer)
1359 (setf temp-pos (aref-minus-1 temp-pos buffer)))
1360 (setf ch1 (buffer-fetch-char temp-pos buffer)
1361 temp-code (&syntax ch1 table))
1362 (when (not (or quoted
1363 (eq temp-code :word-constituent)
1364 (eq temp-code :symbol-constituent)
1365 (eq temp-code :quote)))
1366 (signal 'syntax-done))
1367 (dec-both from from-aref buffer))
1368 (signal 'syntax-done)))
1369 (:math
1370 (when sexpflag
1371 (setf temp-pos (aref-minus-1 from-aref buffer))
1372 (when (and (/= from stop)
1373 (char= ch (buffer-fetch-char temp-pos buffer)))
1374 (dec-both from from-aref buffer))
1375 (if math-exit
1376 (progn
1377 (setf math-exit nil)
1378 (decf depth)
1379 (when (zerop depth) (signal 'syntax-done))
1380 (when (< depth min-depth)
1381 (signal 'expression-ends-prematurely)))
1382 (progn
1383 (setf math-exit t)
1384 (incf depth)
1385 (when (zerop depth) (signal 'syntax-done))))))
1386 (:close
1387 (incf depth)
1388 (when (zerop depth) (signal 'syntax-done)))
1389 (:open
1390 (decf depth)
1391 (when (zerop depth) (signal 'syntax-done))
1392 (when (< depth min-depth)
1393 (signal 'expression-ends-prematurely)))
1394 (:end-comment
1395 (when *parse-sexp-ignore-comments*
1396 (multiple-value-bind (found char-pos aref-pos)
1397 (&back-comment from from-aref stop comment-nested comment-style buffer table)
1398 (when (eq found :not-comment-end)
1399 (setf from char-pos
1400 from-aref aref-pos)))))
1401 ((:comment-fence :string-fence)
1402 (loop
1403 (when (= from stop) (lose last-good from))
1404 (dec-both from from-aref buffer)
1405 (when (and (not (&char-quoted from from-aref buffer table))
1406 (progn
1407 (setf ch (buffer-fetch-char from-aref buffer))
1408 (eq (&syntax ch table) code)))
1409 (return nil)))
1410 (when (and (eq code :string-fence)
1411 (zerop depth)
1412 sexpflag)
1413 (signal 'syntax-done)))
1414 (:string
1415 (let ((string-term (buffer-fetch-char from-aref buffer)))
1416 (loop
1417 (when (= from stop) (lose last-good from))
1418 (dec-both from from-aref buffer)
1419 (when (and (not (&char-quoted from from-aref buffer table))
1420 (progn
1421 (setf ch (buffer-fetch-char from-aref buffer))
1422 (char= string-term ch))
1423 (eq (&syntax ch table) :string))
1424 (return nil)))
1425 (when (and (zerop depth)
1426 sexpflag)
1427 (signal 'syntax-done))))
1429 ;; Ignore whitespace, punctuation, quote, endcomment.
1430 ))))
1431 (when (not (zerop depth)) (lose last-good from))
1432 (return-from &scan-lists nil))
1433 (syntax-done ()
1434 (incf count))))
1435 from)))
1437 (defun scan-lists (from count depth)
1438 "Scan from character number FROM by COUNT lists.
1439 Returns the character number of the position thus found.
1441 If DEPTH is nonzero, paren depth begins counting from that value,
1442 only places where the depth in parentheses becomes zero
1443 are candidates for stopping; COUNT such places are counted.
1444 Thus, a positive value for DEPTH means go out levels.
1446 Comments are ignored if `*parse-sexp-ignore-comments*' is non-nil.
1448 If the beginning or end of (the accessible part of) the buffer is reached
1449 and the depth is wrong, an error is signaled.
1450 If the depth is right but the count is not used up, nil is returned."
1451 (check-type from number)
1452 (check-type count number)
1453 (check-type depth number)
1454 (&scan-lists from count depth nil))
1456 (defun scan-sexps (from count)
1457 "Scan from character number FROM by COUNT balanced expressions.
1458 If COUNT is negative, scan backwards.
1459 Returns the character number of the position thus found.
1461 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1463 If the beginning or end of (the accessible part of) the buffer is reached
1464 in the middle of a parenthetical grouping, an error is signaled.
1465 If the beginning or end is reached between groupings
1466 but before count is used up, nil is returned."
1467 (check-type from number)
1468 (check-type count number)
1469 (&scan-lists from count 0 t))
1471 (defun backward-prefix-chars (&aux (buffer (current-buffer)) (table (syntax-table)))
1472 "Move point backward over any number of chars with prefix syntax.
1473 This includes chars with \"quote\" or \"prefix\" syntax (' or p)."
1474 (let* ((beg (begv buffer))
1475 (pos (pt buffer))
1476 (pos-aref (buffer-char-to-aref buffer pos))
1477 (opoint (pt buffer))
1478 (opoint-aref (buffer-char-to-aref buffer pos))
1480 (when (<= pos beg)
1481 ;; SET_PT_BOTH (opoint, opoint_byte);
1482 (return-from backward-prefix-chars nil))
1484 (dec-both pos pos-aref buffer)
1485 (while (and (not (&char-quoted pos pos-aref buffer table))
1486 (progn
1487 (setf c (buffer-fetch-char pos-aref buffer))
1488 (or (eq (&syntax c table) :quote)
1489 (&syntax-prefix c table))))
1490 (setf opoint pos
1491 opoint-aref pos-aref)
1492 (when (> (1+ pos) beg)
1493 (dec-both pos pos-aref buffer)))
1494 (set-point-both buffer opoint opoint-aref)
1495 nil))
1497 (defun parse-partial-sexp (from to &key (target-depth -100000) stop-before old-state comment-stop &aux (buffer (current-buffer)) (table (syntax-table)))
1498 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1499 Parsing stops at TO or when certain criteria are met;
1500 point is set to where parsing stops.
1501 If fifth arg OLDSTATE is omitted or nil,
1502 parsing assumes that FROM is the beginning of a function.
1503 Value is a list of elements describing final state of parsing:
1504 0. depth in parens. parse-state-depth
1505 1. character address of start of innermost containing list; nil if none. parse-state-prev-level-start
1506 2. character address of start of last complete sexp terminated. parse-state-this-level-start
1507 3. non-nil if inside a string.
1508 (it is the character that will terminate the string,
1509 or t if the string should be terminated by a generic string delimiter.) parse-state-in-string
1510 4. nil if outside a comment, t if inside a non-nestable comment,
1511 else an integer (the current comment nesting). parse-state-in-comment
1512 5. t if following a quote character. parse-state-quoted
1513 6. the minimum paren-depth encountered during this scan. parse-state-min-depth
1514 7. t if in a comment of style b; symbol `syntax-table' if the comment
1515 should be terminated by a generic comment delimiter. parse-state-comment-style
1516 8. character address of start of comment or string; nil if not in one. parse-state-in-comment
1517 9. Intermediate data for continuation of parsing (subject to change). parse-state-level-starts
1518 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1519 in parentheses becomes equal to TARGETDEPTH.
1520 Fourth arg STOPBEFORE non-nil means stop when come to
1521 any character that starts a sexp.
1522 Fifth arg OLDSTATE is a list like what this function returns.
1523 It is used to initialize the state of the parse. Elements number 1, 2, 6
1524 and 8 are ignored.
1525 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1526 If it is symbol `syntax-table', stop after the start of a comment or a
1527 string, or after end of a comment or a string."
1528 (check-type target-depth number)
1529 (multiple-value-setq (from to) (validate-region from to buffer))
1530 (let ((state (scan-sexps-forward from (buffer-char-to-aref buffer from) to
1531 target-depth (not (null stop-before)) old-state
1532 ;; XXX
1533 (if comment-stop
1534 (if (eq comment-stop 'syntax-table) -1 1)
1536 buffer table)))
1537 (set-point (parse-state-location state) buffer)
1538 state))