[lice @ some bug fixes, a makefile, autoconf support]
[lice.git] / syntax.lisp
blobc4ed99468b0ee9dee102c0fd871714a0d95cc4d6
1 ;;; Cheap syntax functions
3 (in-package "LICE")
5 (defparameter +syntax-classes+
6 '(:whitespace :punctuation :word-constituent :symbol-constituent :open :close :quote :string :math :escape
7 :character-quote :comment :end-comment :inherit :comment-fence :string-fence))
9 (deftype syntax-class ()
10 '(member :whitespace ; for a whitespace character
11 :punctuation ; for random punctuation characters
12 :word-constituent ; for a word constituent
13 :symbol-constituent ; symbol constituent but not word constituent
14 :open ; for a beginning delimiter
15 :close ; for an ending delimiter
16 :quote ; for a prefix character like Lisp '
17 :string ; for a string-grouping character like Lisp "
18 :math ; for delimiters like $ in Tex.
19 :escape ; for a character that begins a C-style escape
20 :character-quote ; for a character that quotes the
21 ; following character
22 :comment ; for a comment-starting character
23 :end-comment ; for a comment-ending character
24 :inherit ; use the standard syntax table for
25 ; this character
26 :comment-fence ; Starts/ends comment which is
27 ; delimited on the other side by any
28 ; char with the same syntaxcode.
29 :string-fence ; Starts/ends string which is delimited
30 ; on the other side by any char with
31 ; the same syntaxcode.
34 (defclass syntax-table ()
35 ((hash :initform (make-hash-table :test 'equal) :initarg :hash :accessor syntax-table-hash)
36 (parent :initform nil :initarg :parent :accessor syntax-table-parent))
37 (:documentation "A syntax table class."))
39 (defstruct syntax-descriptor
40 (class nil :type syntax-class)
41 ;; FIXME: this will be a bitvector at some point but for now just
42 ;; make it a list.
43 (flags nil :type list)
44 ;; this is where the terminator paren char etc is stored
45 extra)
47 (defun set-raw-syntax-entry (table char descriptor)
48 (setf (gethash char (syntax-table-hash table)) descriptor))
50 (defvar *syntax-code-object* (loop for i in +syntax-classes+
51 collect i
52 collect (make-syntax-descriptor :class i :flags nil))
53 "A pool of syntax descriptors to be shared in the standard
54 syntax table in an attempt to save memory. FIXME: premature
55 optimization?")
57 (defvar *standard-syntax-table*
58 (let ((table (make-instance 'syntax-table :parent nil))
59 tmp)
60 ;; Control characters should not be whitespace.
61 (setf tmp (getf *syntax-code-object* :punctuation))
62 (loop for i below (char-code #\Space) do
63 (set-raw-syntax-entry table (code-char i) tmp))
64 ;; Except that a few really are whitespace.
65 (setf tmp (getf *syntax-code-object* :whitespace))
66 (set-raw-syntax-entry table #\Space tmp)
67 (set-raw-syntax-entry table #\Tab tmp)
68 (set-raw-syntax-entry table #\Newline tmp)
69 (set-raw-syntax-entry table #\Return tmp)
70 (set-raw-syntax-entry table #\Page tmp)
72 (setf tmp (getf *syntax-code-object* :word-constituent))
73 (loop
74 for i from (char-code #\a) to (char-code #\z)
75 for j from (char-code #\A) to (char-code #\Z) do
76 (set-raw-syntax-entry table (code-char i) tmp)
77 (set-raw-syntax-entry table (code-char j) tmp))
78 (loop for i from (char-code #\0) to (char-code #\9) do
79 (set-raw-syntax-entry table (code-char i) tmp))
81 (set-raw-syntax-entry table #\$ tmp)
82 (set-raw-syntax-entry table #\% tmp)
84 (set-raw-syntax-entry table #\( (make-syntax-descriptor :class :open :extra ")"))
85 (set-raw-syntax-entry table #\) (make-syntax-descriptor :class :close :extra "("))
86 (set-raw-syntax-entry table #\[ (make-syntax-descriptor :class :open :extra "]"))
87 (set-raw-syntax-entry table #\] (make-syntax-descriptor :class :close :extra "["))
88 (set-raw-syntax-entry table #\{ (make-syntax-descriptor :class :open :extra "}"))
89 (set-raw-syntax-entry table #\} (make-syntax-descriptor :class :close :extra "{"))
91 (set-raw-syntax-entry table #\" (make-syntax-descriptor :class :string))
92 (set-raw-syntax-entry table #\\ (make-syntax-descriptor :class :escape))
94 (setf tmp (getf *syntax-code-object* :symbol-constituent))
95 (loop for i across "_-+*/&|<>=" do
96 (set-raw-syntax-entry table i tmp))
98 (setf tmp (getf *syntax-code-object* :punctuation))
99 (loop for i across ".,;:?!#@~^'`" do
100 (set-raw-syntax-entry table i tmp))
101 ;; TODO: i18n characters
102 table)
103 "The standard syntax table")
105 (defun make-syntax-table (&optional (parent *standard-syntax-table*))
106 "Return a new syntax table.
107 Create a syntax table which inherits from parent (if non-nil) or
108 from `standard-syntax-table' otherwise."
109 (make-instance 'syntax-table
110 :parent parent))
112 (defun copy-syntax-table (&optional (table *standard-syntax-table*))
113 "Construct a new syntax table and return it.
114 It is a copy of the TABLE, which defaults to the standard syntax table."
115 (let* ((hash (make-hash-table)))
116 (maphash (lambda (k v)
117 (setf (gethash k hash) v))
118 (syntax-table-hash table))
119 (make-instance 'syntax-table
120 :hash hash :parent (syntax-table-parent table))))
122 (defun syntax-table (&aux (buffer (current-buffer)))
123 (buffer-syntax-table buffer))
125 (defun modify-syntax-entry (char class &key flags extra (table (syntax-table)))
126 "Set syntax for character CHAR according to CLASS, FLAGS, and EXTRA."
127 (check-type char character)
128 (check-type class syntax-class)
129 (check-type flags list)
130 (check-type table syntax-table)
131 (set-raw-syntax-entry table char
132 (make-syntax-descriptor :class class :flags flags :extra extra)))
134 (defun char-syntax (character &optional (table (syntax-table)))
135 "Return the syntax code of CHARACTER, described by a character.
136 For example, if CHARACTER is a word constituent,
137 the symbol `:WORD-CONSTITUENT' is returned."
138 (let ((descr (gethash character (syntax-table-hash table))))
139 (when descr
140 (syntax-descriptor-class descr))))
142 (defparameter +word-constituents+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
144 (defvar *words-include-escapes* nil
145 "Non-nil means `forward-word', etc., should treat escape chars part of words.")
147 (defun syntax-after (pos &aux (buffer (current-buffer)))
148 "Return the raw syntax of the char after POS.
149 If POS is outside the buffer's accessible portion, return nil."
150 (unless (or (< pos (begv buffer)) (>= pos (zv buffer)))
151 (let* ((ch (buffer-char-after buffer pos))
152 (descr (and ch (gethash ch (syntax-table buffer)))))
153 descr)))
155 ;; FIXME: having the flags as a list is memory intensive. How about a
156 ;; bit vector or number and a function that converts between the two?
158 (defun (setf syntax-table) (value &aux (buffer (current-buffer)))
159 "Select a new syntax table for the current buffer. One argument, a syntax table."
160 (check-type value syntax-table)
161 (setf (buffer-syntax-table buffer) value))
163 (depricate set-syntax-table (setf syntax-table))
164 (defun set-syntax-table (value)
165 "Select a new syntax table for the current buffer. One argument, a syntax table."
166 (setf (syntax-table) value))
168 (defun &syntax-with-flags (ch table &optional (default :whitespace))
169 (or (gethash ch (syntax-table-hash table))
170 ;; try the parent
171 (and (syntax-table-parent table)
172 (&syntax-with-flags ch (syntax-table-parent table) default))
173 ;; return the default
174 (make-syntax-descriptor :class default)))
176 (defun &syntax (ch table &optional (default :whitespace))
177 (let ((descr (&syntax-with-flags ch table default)))
178 (if descr
179 (syntax-descriptor-class descr)
180 default)))
182 (defun &syntax-flags-syntax (syntax)
183 (syntax-descriptor-class syntax))
185 (defun &syntax-comment-start-first (ch table)
186 (let ((descr (&syntax-with-flags ch table)))
187 (when descr
188 (and (find :comment-start-first (syntax-descriptor-flags descr)) t))))
190 (defun &syntax-flags-comment-start-first (syntax)
191 (and (find :comment-start-first (syntax-descriptor-flags syntax)) t))
193 (defun &syntax-comment-start-second (ch table)
194 (let ((descr (&syntax-with-flags ch table)))
195 (when descr
196 (and (find :comment-start-second (syntax-descriptor-flags descr)) t))))
198 (defun &syntax-flags-comment-start-second (syntax)
199 (and (find :comment-start-second (syntax-descriptor-flags syntax)) t))
201 (defun &syntax-comment-end-first (ch table)
202 (let ((descr (&syntax-with-flags ch table)))
203 (when descr
204 (and (find :comment-end-first (syntax-descriptor-flags descr)) t))))
206 (defun &syntax-flags-comment-end-first (syntax)
207 (and (find :comment-end-first (syntax-descriptor-flags syntax)) t))
209 (defun &syntax-comment-end-second (ch table)
210 (let ((descr (&syntax-with-flags ch table)))
211 (when descr
212 (and (find :comment-end-second (syntax-descriptor-flags descr)) t))))
214 (defun &syntax-flags-comment-end-second (syntax)
215 (and (find :comment-end-second (syntax-descriptor-flags syntax)) t))
217 (defun &syntax-prefix (ch table)
218 (let ((descr (&syntax-with-flags ch table)))
219 (when descr
220 (and (find :prefix (syntax-descriptor-flags descr)) t))))
222 (defun &syntax-flags-prefix (syntax)
223 (and (find :prefix (syntax-descriptor-flags syntax)) t))
225 (defun &syntax-comment-style (ch table)
226 (let ((descr (&syntax-with-flags ch table)))
227 (when descr
228 (and (find :comment-style (syntax-descriptor-flags descr)) t))))
230 (defun &syntax-flags-comment-style (syntax)
231 (and (find :comment-style (syntax-descriptor-flags syntax)) t))
233 (defun &syntax-comment-nested (ch table)
234 (let ((descr (&syntax-with-flags ch table)))
235 (when descr
236 (and (find :comment-nested (syntax-descriptor-flags descr)) t))))
238 (defun &syntax-flags-comment-nested (syntax)
239 (and (find :comment-nested (syntax-descriptor-flags syntax)) t))
241 (defmacro with-syntax-table (table &body body)
242 "Evaluate BODY with syntax table of current buffer set to TABLE.
243 The syntax table of the current buffer is saved, BODY is evaluated, and the
244 saved table is restored, even in case of an abnormal exit.
245 Value is what BODY returns."
246 (let ((old-table (gensym "TABLE"))
247 (old-buffer (gensym "BUFFER")))
248 `(let ((,old-table (syntax-table))
249 (,old-buffer (current-buffer)))
250 (unwind-protect
251 (progn
252 (set-syntax-table ,table)
253 ,@body)
254 (save-current-buffer
255 (set-buffer ,old-buffer)
256 (set-syntax-table ,old-table))))))
258 (defun syntax-class (syntax)
259 "Return the syntax class part of the syntax descriptor SYNTAX.
260 If SYNTAX is nil, return nil."
261 (&syntax-flags-syntax syntax))
263 (defun scan-words (from count)
264 "Return the position across COUNT words from FROM.
265 If that many words cannot be found before the end of the buffer,
266 return NIL. COUNT negative means scan backward and stop at word
267 beginning."
268 (let* ((buffer (current-buffer))
269 (beg (begv buffer))
270 (end (zv buffer))
271 (from-aref (buffer-char-to-aref buffer from))
272 (table (syntax-table))
273 ch code)
274 (while (> count 0)
275 (loop
276 (when (= from end)
277 (return-from scan-words nil))
278 (setf ch (buffer-fetch-char from-aref buffer)
279 code (&syntax ch table))
280 (inc-both from from-aref buffer)
281 (when (or (and *words-include-escapes*
282 (or (eq code :escape)
283 (eq code :character-quote)))
284 (eq code :word-constituent))
285 (return nil)))
286 (loop
287 (when (= from end)
288 (return nil))
289 (setf ch (buffer-fetch-char from-aref buffer)
290 code (&syntax from-aref table))
291 (when (and (and (not *words-include-escapes*)
292 (or (eq code :escape)
293 (eq code :character-quote)))
294 (or (not (eq code :word-constituent))))
295 ;; (word-boundary-p ..)
296 (return nil))
297 (inc-both from from-aref buffer))
298 (decf count))
299 (while (< count 0)
300 (loop
301 (when (= from beg)
302 (return-from scan-words nil))
303 (dec-both from from-aref buffer)
304 (setf ch (buffer-fetch-char from-aref buffer)
305 code (&syntax ch table))
306 (when (or (and *words-include-escapes*
307 (or (eq code :escape)
308 (eq code :character-quote)))
309 (eq code :word-constituent))
310 (return nil)))
311 (loop
312 (when (= from beg)
313 (return nil))
314 (setf ch (buffer-fetch-char from-aref buffer)
315 code (&syntax ch table))
316 (when (and (and (not *words-include-escapes*)
317 (or (eq code :escape)
318 (eq code :character-quote)))
319 (not (eq code :word-constituent)))
320 (return nil))
321 (dec-both from from-aref buffer))
322 (incf count))
323 from))
325 (defvar *parse-sexp-ignore-comments* t
326 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.")
328 (defvar *open-paren-in-column-0-is-defun-start* t
329 "*Non-nil means an open paren in column 0 denotes the start of a defun.")
331 ;; Conditions used by the syntax code because the parsing is so bloody
332 ;; hairy.
333 (define-condition syntax-done () ())
334 (define-condition syntax-lossage () ())
336 (define-condition expression-ends-prematurely (lice-condition)
338 (define-condition unbalanced-parenthesis (lice-condition)
339 ((last-good :initarg :last-good :accessor unbalanced-parenthesis-last-good)
340 (from :initarg :from :accessor unbalanced-parenthesis-from)))
342 (defun &char-quoted (char-pos aref-pos buffer table)
343 "Returns TRUE if char at CHAR-POS is quoted.
344 Global syntax-table data should be set up already to be good at CHAR-POS
345 or after. On return global syntax data is good for lookup at CHAR-POS."
346 (let ((beg (begv buffer))
347 quoted)
348 (dec-both char-pos aref-pos buffer)
349 (while (>= char-pos beg)
350 (let* ((ch (buffer-fetch-char aref-pos buffer))
351 (code (&syntax ch table)))
352 (when (not (find code '(:character-quote :escape)))
353 (return nil))
354 (dec-both char-pos aref-pos buffer)
355 (setf quoted (not quoted))))
356 quoted))
358 (defstruct parse-state
359 buffer
360 depth min-depth
361 this-level-start
362 prev-level-start
363 location
364 level-starts
365 quoted
366 in-comment
367 comment-style
368 comment-string-start
369 in-string
370 start-value
371 start-value-aref)
373 (defun find-defun-start (pos pos-aref buffer table)
374 "Return a defun-start position before POS and not too far before.
375 It should be the last one before POS, or nearly the last.
377 When open_paren_in_column_0_is_defun_start is nonzero,
378 only the beginning of the buffer is treated as a defun-start.
380 We record the information about where the scan started
381 and what its result was, so that another call in the same area
382 can return the same value very quickly.
384 There is no promise at which position the global syntax data is
385 valid on return from the subroutine, so the caller should explicitly
386 update the global data."
387 (declare (ignore pos-aref))
388 (unless *open-paren-in-column-0-is-defun-start*
389 (return-from find-defun-start (make-parse-state :start-value (begv buffer)
390 :start-value-aref (begv-aref buffer))))
391 ;; Back up to start of line.
392 (let* ((begv (begv buffer))
393 (pt (buffer-scan-newline buffer pos (begv buffer) -1))
394 (pt-aref (buffer-char-to-aref buffer pt)))
395 (while (> pt begv)
396 (let ((ch (buffer-fetch-char pt-aref buffer)))
397 (when (eq (&syntax ch table) :open)
398 (return nil))
399 ;; Move to beg of previous line.
400 (setf pt (buffer-scan-newline buffer pt (begv buffer) -2)
401 pt-aref (buffer-char-to-aref buffer pt))))
402 ;; Return what we found
403 (make-parse-state :start-value pt
404 :start-value-aref pt-aref
405 :buffer buffer
406 ;; :modiff MODIFF
407 :start-begv (begv buffer)
408 :start-pos pos)))
410 ;; FIXME: doesn't handle ^. Maybe if :not is the first symbol in the list?
411 (defun skip-syntax-forward (syntax-list &optional (lim (zv)))
412 "Move point forward across chars in specified syntax classes.
413 SYNTAX-LIST is a string of syntax code characters.
414 Stop before a char whose syntax is not in SYNTAX-LIST, or at position LIM.
415 If SYNTAX-LIST starts with ^, skip characters whose syntax is NOT in SYNTAX-LIST.
416 This function returns the distance traveled, either zero or positive."
417 (check-type lim integer)
418 (let* ((buffer (current-buffer))
419 (table (syntax-table))
420 (pos (pt))
421 (start pos)
422 (pos-aref (buffer-char-to-aref buffer pos))
423 ch syntax)
424 (while (< pos lim)
425 (setf ch (buffer-fetch-char pos-aref buffer)
426 syntax (&syntax ch table))
427 (unless (find syntax syntax-list)
428 (return nil))
429 (inc-both pos pos-aref buffer))
430 (goto-char pos)
431 (- pos start)))
433 (defun skip-chars (forwardp syntaxp string lim)
434 (declare (ignore syntaxp))
435 (labels ((match-char (c negate ranges chars)
436 ;; check ranges
437 (catch :continue
438 (loop for r in ranges do
439 (if negate
440 (when (<= (car r) (char-code c) (cdr r))
441 (throw :done nil))
442 (when (<= (car r) (char-code c) (cdr r))
443 (throw :continue nil))))
444 ;; check chars
445 (if negate
446 (when (find c chars :test 'char=)
447 (throw :done nil))
448 (when (find c chars :test 'char=)
449 (throw :continue nil)))
450 (unless negate
451 ;; if the char fell through all that then we're done
452 (throw :done nil)))))
453 (check-type string string)
454 (check-number-coerce-marker lim)
455 (let ((idx 0)
456 negate
457 ranges
458 chars
459 (start-point (pt)))
460 ;; don't allow scan outside bounds of buffer.
461 (setf lim (min (max lim (begv)) (zv)))
463 (when (char= (char string 0) #\^)
464 (setf negate t)
465 (incf idx))
467 ;; compile the ranges and chars
468 (while (< idx (length string))
469 (let ((c (char string idx)))
471 ;; (if syntaxp
472 ;; ;; TODO: handle syntaxp
473 ;; nil
474 ;; (progn
475 ;; ;; TODO: handle iso classes
476 ;; ))
477 (when (char= c #\\)
478 (incf idx)
479 (when (= idx (length string))
480 (return nil))
481 (setf c (char string idx)))
482 (incf idx)
484 ;; Treat `-' as range character only if another character
485 ;; follows.
486 (if (and (< (1+ idx) (length string))
487 (char= (char string idx) #\-))
488 (progn
489 (incf idx)
490 (let* ((c2 (char string idx))
491 (code1 (char-code c))
492 (code2 (char-code c2)))
493 (when (<= code1 code2)
494 (push (cons code1 code2) ranges))
495 (incf idx)))
496 (progn
497 (push c chars)))))
498 ;; scan
499 (let* ((buffer (current-buffer))
500 (pos (pt buffer))
501 (pos-aref (buffer-char-to-aref buffer pos)))
502 (catch :done
503 (if forwardp
504 (while (< pos lim)
505 (match-char (buffer-fetch-char pos-aref buffer)
506 negate ranges chars)
507 (inc-both pos pos-aref buffer))
508 (progn
509 ;; do a little dance to end up in the right spot
510 (dec-both pos pos-aref buffer)
511 (unwind-protect
512 (while (> pos lim)
513 (match-char (buffer-fetch-char pos-aref buffer)
514 negate ranges chars)
515 (dec-both pos pos-aref buffer))
516 (inc-both pos pos-aref buffer)))))
517 (set-point pos buffer)
518 ;; return the number of chars we scanned
519 (- pos start-point)))))
521 (defun skip-chars-forward (string &optional (lim (zv)))
522 "Move point forward, stopping before a char not in string."
523 (skip-chars t nil string lim))
525 (defun skip-chars-backward (string &optional (lim (begv)))
526 "Move point backward, stopping after a char not in string."
527 (skip-chars nil nil string lim))
529 (defun skip-whitespace-forward (&optional (lim (zv)))
530 "Move point forward, stopping before a char that is not a space or tab."
531 (skip-chars-forward (coerce '(#\Space #\Tab) 'string) lim))
533 (defun &forward-comment (from from-aref stop nesting style prev-syntax buffer table)
534 "Jump over a comment, assuming we are at the beginning of one.
535 FROM is the current position.
536 FROM_BYTE is the bytepos corresponding to FROM.
537 Do not move past STOP (a charpos).
538 The comment over which we have to jump is of style STYLE
539 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
540 NESTING should be positive to indicate the nesting at the beginning
541 for nested comments and should be zero or negative else.
542 ST_COMMENT_STYLE cannot be nested.
543 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
544 (or nil If the search cannot start in the middle of a two-character).
546 If successful, return 1 and store the charpos of the comment's end
547 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
548 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
549 corresponding bytepos into *BYTEPOS_PTR and the current nesting
550 (as defined for state.incomment) in *INCOMMENT_PTR.
552 The comment end is the last character of the comment rather than the
553 character just after the comment.
555 Global syntax data is assumed to initially be valid for FROM and
556 remains valid for forward search starting at the returned position."
557 (let (c
559 code
560 (syntax prev-syntax))
561 (labels ((forward ()
562 (when (= from stop)
563 (return-from &forward-comment
564 (values nil from from-aref nesting)))
566 (setf c (buffer-fetch-char from-aref buffer)
567 code (&syntax c table)
568 syntax (&syntax-with-flags c table))
570 (when (and (eq code :end-comment)
571 (eq (&syntax-flags-comment-style syntax) style)
572 (if (&syntax-flags-comment-nested syntax)
573 (and (> nesting 0)
574 (progn (decf nesting)
575 (zerop nesting)))
576 (< nesting 0)))
577 ;; we have encountered a comment end of the same
578 ;; style as the comment sequence which began this
579 ;; comment section.
580 (throw :done nil))
581 (when (and (eq code :comment-fence)
582 (eq style :st-comment-style))
583 ;; we have encountered a comment end of the same style
584 ;; as the comment sequence which began this comment
585 ;; section.
586 (throw :done nil))
587 (when (and (> nesting 0)
588 (eq code :comment)
589 (&syntax-flags-comment-nested syntax)
590 (eq (&syntax-flags-comment-style syntax) style))
591 ;; we have encountered a nested comment of the same style
592 ;; as the comment sequence which began this comment section
593 (incf nesting))
594 (inc-both from from-aref buffer))
595 (do-comment ()
596 (when (and (< from stop)
597 (&syntax-flags-comment-end-first syntax)
598 (eq (&syntax-flags-comment-style syntax) style)
599 (progn
600 (setf c1 (buffer-fetch-char from-aref buffer))
601 (&syntax-comment-end-second c1 table))
602 (if (or (&syntax-flags-comment-nested syntax)
603 (&syntax-comment-nested c1 table))
604 (> nesting 0)
605 (< nesting 0)))
606 (decf nesting)
607 (if (<= nesting 0)
608 ;; we have encountered a comment end of the same style
609 ;; as the comment sequence which began this comment
610 ;; section
611 (throw :done nil)
612 (inc-both from from-aref buffer)))
613 (when (and (> nesting 0)
614 (< from stop)
615 (&syntax-flags-comment-start-first syntax)
616 (progn
617 (setf c1 (buffer-fetch-char from-aref buffer))
618 (eq (&syntax-comment-style c1 table) style))
619 (or (&syntax-flags-comment-nested syntax)
620 (&syntax-comment-nested c1 table)))
621 ;; we have encountered a nested comment of the same style
622 ;; as the comment sequence which began this comment
623 ;; section
624 (inc-both from from-aref buffer)
625 (incf nesting))))
626 ;; normalize nesting
627 (cond ((or (null nesting)
628 (<= nesting 0))
629 (setf nesting -1))
630 ((not (numberp nesting))
631 (setf nesting 1)))
632 ;; Enter the loop in the middle so that we find
633 ;; a 2-char comment ender if we start in the middle of it.
634 (catch :done
635 (if syntax
636 (progn
637 (do-comment)
638 (loop
639 (forward)
640 (do-comment)))
641 (loop
642 (forward)
643 (do-comment))))
644 (values t from from-aref))))
646 (defstruct syntax-level
647 last prev)
649 ;; this function cries out for continuations. you almost have to look
650 ;; at the C code to understand what's going on here, i bet. Hell, I
651 ;; don't even understand it.
652 (defun scan-sexps-forward (from from-aref end target-depth stop-before old-state comment-stop buffer table)
653 "Parse forward from FROM / FROM_BYTE to END,
654 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
655 and return a description of the state of the parse at END.
656 If STOPBEFORE is nonzero, stop at the start of an atom.
657 If COMMENTSTOP is 1, stop at the start of a comment.
658 If COMMENTSTOP is -1, stop at the start or end of a comment,
659 after the beginning of a string, or after the end of a string."
660 ;;(message "scan-sexps-forward ~@{~a ~}" from from-aref end target-depth stop-before old-state comment-stop buffer table)
661 (let ((state (make-parse-state))
662 (prev-from from)
663 (prev-from-aref from-aref)
664 prev-from-syntax
665 (boundary-stop (null comment-stop))
666 no-fence
668 code
669 comment-nested
670 depth
671 min-depth
672 temp
673 start-quoted
674 levels)
675 (labels ((inc-from ()
676 (setf prev-from from
677 prev-from-aref from-aref
678 temp (buffer-fetch-char prev-from-aref buffer)
679 prev-from-syntax (&syntax-with-flags temp table))
680 (inc-both from from-aref buffer))
681 (cur-level ()
682 "Return the current level struct"
683 (car levels))
684 (do-start-in-comment ()
685 ;; The (from == BEGV) test was to enter the loop in the middle so
686 ;; that we find a 2-char comment ender even if we start in the
687 ;; middle of it. We don't want to do that if we're just at the
688 ;; beginning of the comment (think of (*) ... (*)).
689 (multiple-value-bind (found out-char out-aref in-comment)
690 (&forward-comment from from-aref end
691 (parse-state-in-comment state)
692 (parse-state-comment-style state)
693 (if (or (eq from (begv buffer))
694 (< from (+ (parse-state-comment-string-start state) 3)))
695 nil prev-from-syntax)
696 buffer table)
697 (setf from out-char
698 from-aref out-aref
699 (parse-state-in-comment state) in-comment)
700 ;; Beware! prev_from and friends are invalid now.
701 ;; Luckily, the `done' doesn't use them and the INC_FROM
702 ;; sets them to a sane value without looking at them.
703 (unless found (throw :end :done))
704 (inc-from)
705 (setf (parse-state-in-comment state) nil
706 (parse-state-comment-style state) nil) ; reset the comment style
707 (when boundary-stop (throw :end :done))))
708 (do-sym-done ()
709 ;;(message "do-sym-done ~s" (parse-state-level-starts state))
710 (setf (syntax-level-prev (cur-level)) (syntax-level-last (cur-level))))
711 (do-sym-started ()
712 ;; (message "do-sym-started")
713 (while (< from end)
714 (case (&syntax (buffer-fetch-char from-aref buffer) table)
715 ((:escape :character-quote)
716 (inc-from)
717 (when (= from end)
718 (throw :end :end-quoted)))
719 ((:word-constituent :symbol-constituent :quote))
721 (do-sym-done)
722 (return nil)))
723 (inc-from)))
724 (do-start-quoted ()
725 (when (= from end) (throw :end :end-quoted))
726 (inc-from)
727 (do-sym-started))
728 (do-in-string-loop ()
729 (loop
730 (let (c)
731 (when (>= from end) (throw :end :done))
732 (setf c (buffer-fetch-char from-aref buffer)
733 temp (&syntax c table))
734 ;; Check TEMP here so that if the char has
735 ;; a syntax-table property which says it is NOT
736 ;; a string character, it does not end the string.
737 (when (and no-fence
738 (equal c (parse-state-in-string state))
739 (eq temp :string))
740 (return nil))
741 (case temp
742 (:string-fence
743 (unless no-fence (return nil)))
744 ((:character-quote :escape)
745 (inc-from)
746 (when (>= from end) (throw :end :end-quoted))))
747 (inc-from))))
748 (do-string-end ()
749 ;;(message "do-string-end ~s" (parse-state-level-starts state))
750 (setf (parse-state-in-string state) nil
751 (syntax-level-prev (cur-level)) (syntax-level-last (cur-level)))
752 (inc-from)
753 (when boundary-stop (throw :end :done)))
754 (do-start-in-string ()
755 (setf no-fence (not (eq (parse-state-in-string state) :st-string-style)))
756 (do-in-string-loop)
757 (do-string-end))
758 (do-start-quoted-in-string ()
759 (when (>= from end) (throw :end :end-quoted))
760 (inc-from)
761 (do-in-string-loop)))
763 (when (/= from (begv buffer))
764 (dec-both prev-from prev-from-aref buffer))
766 (if old-state
767 (progn
768 (setf state old-state
769 start-quoted (parse-state-quoted state)
770 depth (or (parse-state-depth state) 0)
771 start-quoted (parse-state-quoted state))
772 (dolist (i (parse-state-level-starts state))
773 (push (make-syntax-level :last i) levels))
774 ;; make sure we have at least one in the list
775 (unless levels
776 (push (make-syntax-level) levels)))
777 (setf depth 0
778 state (make-parse-state)
779 levels (list (make-syntax-level))))
781 ;;(message "top ~s" (parse-state-level-starts state))
783 (setf (parse-state-quoted state) nil
784 min-depth depth)
786 (setf temp (buffer-fetch-char prev-from-aref buffer)
787 prev-from-syntax (&syntax-with-flags temp table))
789 ;; "Enter" the loop at a place appropriate for initial state. In
790 ;; the C code this is a bunch of goto's. Here we call the
791 ;; appropriate function that sync's us so we're ready to enter
792 ;; the loop.
793 (cond ((parse-state-in-comment state)
794 (do-start-quoted))
795 ((parse-state-in-string state)
796 (setf no-fence (not (eq (parse-state-in-string state) :st-string-style)))
797 (if start-quoted
798 (do-start-quoted-in-string)
799 (do-start-in-string)))
800 (start-quoted
801 (do-start-quoted)))
802 ;; (message "sane here")
803 (case
804 (catch :end
805 (while (< from end)
806 (catch :continue
807 (inc-from)
808 (setf code (&syntax-flags-syntax prev-from-syntax))
809 ;; (message "here the code is ~s" code)
810 (cond ((and (< from end)
811 (&syntax-flags-comment-start-first prev-from-syntax)
812 (progn
813 (setf c1 (buffer-fetch-char from-aref buffer))
814 (&syntax-comment-start-second c1 table)))
815 ;; (message "here 1")
816 ;; Record the comment style we have entered so that only
817 ;; the comment-end sequence of the same style actually
818 ;; terminates the comment section.
819 (setf (parse-state-comment-style state) (&syntax-comment-style c1 table)
820 comment-nested (&syntax-flags-comment-nested prev-from-syntax)
821 comment-nested (or comment-nested
822 (&syntax-comment-nested c1 table))
823 (parse-state-in-comment state) comment-nested
824 (parse-state-comment-string-start state) prev-from)
825 (inc-from)
826 (setf code :comment))
827 ((eq code :comment-fence)
828 ;; (message "here 2")
829 ;; Record the comment style we have entered so that only
830 ;; the comment-end sequence of the same style actually
831 ;; terminates the comment section.
832 (setf (parse-state-comment-style state) :st-comment-style
833 (parse-state-in-comment state) -1 ; XXX
834 (parse-state-comment-string-start state) prev-from
835 code :comment))
836 ((eq code :comment)
837 ;; (message "here 3")
838 (setf (parse-state-comment-style state) (&syntax-flags-comment-style prev-from-syntax)
839 (parse-state-in-comment state) (&syntax-flags-comment-nested prev-from-syntax)
840 (parse-state-comment-string-start state) prev-from)))
842 (when (&syntax-flags-prefix prev-from-syntax)
843 (throw :continue nil))
845 ;;(message "code: ~s" code)
846 (case code
847 ((:escape :character-quote)
848 ;; this arg means stop at sexp start
849 (when stop-before (throw :end :stop))
850 ;;(message ":escae ~s" (parse-state-level-starts state))
851 (setf (syntax-level-last (cur-level)) prev-from)
852 (do-start-quoted))
854 ((:word-constituent :symbol-constituent)
855 (when stop-before (throw :end :stop))
856 ;;(message ":word-con ~s" (parse-state-level-starts state))
857 (setf (syntax-level-last (cur-level)) prev-from)
858 (do-sym-started))
860 ((:comment-fence :comment)
861 (when (or comment-stop
862 boundary-stop)
863 (throw :end :done))
864 (do-start-in-comment))
866 (:open
867 (when stop-before (throw :end :stop))
868 (incf depth)
869 ;;(message ":open ~s" (parse-state-level-starts state))
870 (setf (syntax-level-last (cur-level)) prev-from)
871 ;; (message ":open ~a" (parse-state-level-starts state))
872 (push (make-syntax-level) levels)
873 ;; (when (> (length level-list) 100) ; XXX hardcoded
874 ;; (error "nesting too deep for parser"))
875 (when (= target-depth depth) (throw :end :done)))
877 (:close
878 (decf depth)
879 (when (< depth min-depth)
880 (setf min-depth depth))
881 (unless (= (length levels) 1)
882 ;;(message "XXX: popping when levels is 1!")
883 (pop levels))
884 (setf (syntax-level-prev (cur-level)) (syntax-level-last (cur-level)))
885 (when (= target-depth depth)
886 (throw :end :done)))
888 ((:string :string-fence)
889 (setf (parse-state-comment-string-start state) (1- from))
890 (when stop-before
891 (throw :end :stop))
892 (setf (syntax-level-last (cur-level)) prev-from)
893 (setf (parse-state-in-string state) (if (eq code :string)
894 (buffer-fetch-char prev-from-aref buffer)
895 :st-string-style))
896 (when boundary-stop
897 (throw :end :done))
898 (do-start-in-string))
900 (:math
901 ;; FIXME: We should do something with it.
904 ;; Ignore whitespace, punctuation, quote, endcomment.
905 ))))
906 :done)
907 (:stop
908 ;; Here if stopping before start of sexp.
909 ;; We have just fetched the char that starts it
910 ;; but return the position before it.
911 (setf from prev-from))
912 (:end-quoted
913 (setf (parse-state-quoted state) t)))
915 ;;(message ":end ~s" (parse-state-level-starts state))
916 ;; done
917 (setf (parse-state-depth state) depth
918 (parse-state-min-depth state) min-depth
919 (parse-state-this-level-start state) (syntax-level-prev (cur-level))
920 (parse-state-prev-level-start state) (if (<= (length levels) 1)
921 nil (syntax-level-last (second levels)))
922 (parse-state-location state) from
923 (parse-state-level-starts state) (mapcar 'syntax-level-last (cdr levels)))
924 state)))
926 (defun &back-comment (from from-aref stop comment-nested comment-style buffer table)
927 "Checks whether charpos FROM is at the end of a comment.
928 FROM_BYTE is the bytepos corresponding to FROM.
929 Do not move back before STOP.
931 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
932 return -1 otherwise.
934 If successful, return the charpos of the comment's beginning, and the aref pos.
936 **Global syntax data remains valid for backward search starting at
937 **the returned value (or at FROM, if the search was not successful)."
938 ;; Look back, counting the parity of string-quotes,
939 ;; and recording the comment-starters seen.
940 ;; When we reach a safe place, assume that's not in a string;
941 ;; then step the main scan to the earliest comment-starter seen
942 ;; an even number of string quotes away from the safe place.
944 ;; OFROM[I] is position of the earliest comment-starter seen
945 ;; which is I+2X quotes from the comment-end.
946 ;; PARITY is current parity of quotes from the comment end.
947 (let ((string-style :none)
948 (string-lossage nil)
949 ;; Not a real lossage: indicates that we have passed a matching comment
950 ;; starter plus a non-matching comment-ender, meaning that any matching
951 ;; comment-starter we might see later could be a false positive (hidden
952 ;; inside another comment).
953 ;; Test case: { a (* b } c (* d *)
954 (comment-lossage nil)
955 (comment-end from)
956 (comment-end-aref from-aref)
957 (comment-start-pos 0)
958 comment-start-aref
959 ;; Place where the containing defun starts,
960 ;; or nil if we didn't come across it yet.
961 (defun-start 0)
962 (defun-start-aref 0)
963 code
964 (nesting 1) ; current comment nesting
966 (syntax nil))
967 (handler-case
968 (progn
969 ;; FIXME: A }} comment-ender style leads to incorrect behavior
970 ;; in the case of {{ c }}} because we ignore the last two chars which are
971 ;; assumed to be comment-enders although they aren't.
973 ;; At beginning of range to scan, we're outside of strings;
974 ;; that determines quote parity to the comment-end.
975 (while (/= from stop)
976 (catch :continue
977 (let (temp-aref prev-syntax com2start com2end)
978 (dec-both from from-aref buffer)
979 (setf prev-syntax syntax
980 ch (buffer-fetch-char from-aref buffer)
981 syntax (&syntax-with-flags ch table)
982 code (&syntax ch table)
983 ;; Check for 2-char comment markers.
984 com2start (and (&syntax-flags-comment-start-first syntax)
985 (&syntax-flags-comment-start-second prev-syntax)
986 (eq comment-style (&syntax-flags-comment-style prev-syntax))
987 (eq (or (&syntax-flags-comment-nested prev-syntax)
988 (&syntax-flags-comment-nested syntax))
989 comment-nested))
990 com2end (and (&syntax-flags-comment-end-first syntax)
991 (&syntax-flags-comment-end-second prev-syntax)))
992 ;; Nasty cases with overlapping 2-char comment markers:
993 ;; - snmp-mode: -- c -- foo -- c --
994 ;; --- c --
995 ;; ------ c --
996 ;; - c-mode: *||*
997 ;; |* *|* *|
998 ;; |*| |* |*|
999 ;; /// */
1001 ;; If a 2-char comment sequence partly overlaps with
1002 ;; another, we don't try to be clever.
1003 (when (and (> from stop)
1004 (or com2end com2start))
1005 (let ((next from)
1006 (next-aref from-aref)
1007 next-c
1008 next-syntax)
1009 (dec-both next next-aref buffer)
1010 (setf next-c (buffer-fetch-char next-aref buffer)
1011 next-syntax (&syntax-with-flags next-c table))
1012 (when (or (and (or com2start comment-nested)
1013 (&syntax-flags-comment-end-second syntax)
1014 (&syntax-flags-comment-end-first next-syntax))
1015 (and (or com2end comment-nested)
1016 (&syntax-flags-comment-start-second syntax)
1017 (eq comment-style (&syntax-flags-comment-style syntax))
1018 (&syntax-flags-comment-start-first next-syntax)))
1019 (signal 'syntax-lossage))))
1021 (when (and com2start
1022 (= comment-start-pos 0))
1023 ;; We're looking at a comment starter. But it might be a comment
1024 ;; ender as well (see snmp-mode). The first time we see one, we
1025 ;; need to consider it as a comment starter,
1026 ;; and the subsequent times as a comment ender.
1027 (setf com2end 0))
1029 ;; Turn a 2-char comment sequences into the appropriate syntax.
1030 (cond (com2end
1031 (setf code :end-comment))
1032 (com2start
1033 (setf code :comment))
1034 ;; Ignore comment starters of a different style.
1035 ((and (eq code :comment)
1036 (or (not (eq comment-style (&syntax-flags-comment-style syntax)))
1037 (not (eq comment-nested (&syntax-flags-comment-nested syntax)))))
1038 (throw :continue nil)))
1040 ;; Ignore escaped characters, except comment-enders.
1041 (when (and (not (eq code :end-comment))
1042 (&char-quoted from from-aref buffer table))
1043 (throw :continue nil))
1045 (case code
1046 ((:string-fence :comment-fence :string)
1047 (when (find code '(:string-fence :comment-fence))
1048 (setf ch (if (eq code :string-fence)
1049 :string-style
1050 :comment-style)))
1051 ;; Track parity of quotes.
1052 (cond ((eq string-style :none)
1053 ;; Entering a string.
1054 (setf string-style ch))
1055 ((eq string-style ch)
1056 ;; leaving the string
1057 (setf string-style :none))
1059 ;; If we have two kinds of string delimiters.
1060 ;; There's no way to grok this scanning backwards.
1061 (setf string-lossage t))))
1062 (:comment
1063 ;; We've already checked that it is the relevant comstyle.
1064 (when (or (eq string-style :none)
1065 comment-lossage
1066 string-lossage)
1067 ;; There are odd string quotes involved, so let's be careful.
1068 ;; Test case in Pascal: " { " a { " } */
1069 (signal 'syntax-lossage))
1070 (if (not comment-nested)
1071 ;; Record best comment-starter so far.
1072 (setf comment-start-pos from
1073 comment-start-aref from-aref)
1074 (progn
1075 (decf nesting)
1076 (when (<= nesting 0)
1077 ;; nested comments have to be balanced, so we don't need to
1078 ;; keep looking for earlier ones. We use here the same (slightly
1079 ;; incorrect) reasoning as below: since it is followed by uniform
1080 ;; paired string quotes, this comment-start has to be outside of
1081 ;; strings, else the comment-end itself would be inside a string.
1082 (signal 'syntax-done)))))
1083 (:end-comment
1084 (cond ((and (eq comment-style (&syntax-flags-comment-style syntax))
1085 (or (and com2end
1086 (&syntax-flags-comment-nested prev-syntax))
1087 (eq comment-nested (&syntax-flags-comment-nested syntax))))
1088 ;; This is the same style of comment ender as ours.
1089 (if comment-nested
1090 (incf nesting)
1091 ;; Anything before that can't count because it would match
1092 ;; this comment-ender rather than ours.
1093 (setf from stop)))
1094 ((or (/= comment-start-pos 0)
1095 (char/= ch #\Newline))
1096 ;; We're mixing comment styles here, so we'd better be careful.
1097 ;; The (comstart_pos != 0 || c != '\n') check is not quite correct
1098 ;; (we should just always set comment_lossage), but removing it
1099 ;; would imply that any multiline comment in C would go through
1100 ;; lossage, which seems overkill.
1101 ;; The failure should only happen in the rare cases such as
1102 ;; { (* } *)
1103 (setf comment-lossage t))))
1104 (:open
1105 ;; Assume a defun-start point is outside of strings.
1106 (when (and *open-paren-in-column-0-is-defun-start*
1107 (or (= from stop)
1108 (progn
1109 (setf temp-aref (aref-minus-1 from-aref buffer))
1110 (char= (buffer-fetch-char temp-aref buffer) #\Newline))))
1111 (setf defun-start from
1112 defun-start-aref from-aref
1113 ;; Break out of the loop.
1114 from stop)))))))
1116 (if (= comment-start-pos 0)
1117 (setf from comment-end
1118 from-aref comment-end-aref)
1119 ;; If comstart_pos is set and we get here (ie. didn't jump to `lossage'
1120 ;; or `done'), then we've found the beginning of the non-nested comment.
1121 (setf from comment-start-pos
1122 from-aref comment-start-aref)))
1123 (syntax-lossage ()
1124 ;; We had two kinds of string delimiters mixed up
1125 ;; together. Decode this going forwards.
1126 ;; Scan fwd from a known safe place (beginning-of-defun)
1127 ;; to the one in question; this records where we
1128 ;; last passed a comment starter.
1129 ;; If we did not already find the defun start, find it now.
1130 (when (= defun-start 0)
1131 (let ((ret (find-defun-start comment-end comment-end-aref buffer table)))
1132 (setf defun-start (parse-state-start-value ret)
1133 defun-start-aref (parse-state-start-value-aref ret))))
1134 (loop do
1135 (let ((state (scan-sexps-forward defun-start defun-start-aref
1136 comment-end -10000 0 nil 0 buffer table)))
1137 (setf defun-start comment-end)
1138 (if (and (eq (parse-state-in-comment state) comment-nested)
1139 (eq (parse-state-comment-style state) comment-style))
1140 (setf from (parse-state-comment-string-start state))
1141 (progn
1142 (setf from comment-end)
1143 (when (parse-state-in-comment state) ; XXX
1144 ;; If comment_end is inside some other comment, maybe ours
1145 ;; is nested, so we need to try again from within the
1146 ;; surrounding comment. Example: { a (* " *)
1147 (setf defun-start (+ (parse-state-comment-string-start state) 2)
1148 defun-start-aref (buffer-char-to-aref buffer defun-start))))))
1149 while (< defun-start comment-end))
1150 (setf from-aref (buffer-char-to-aref buffer from))))
1151 (values (if (= from comment-end) -1 from)
1152 from-aref)))
1154 (defun prev-char-comment-end-first (pos pos-aref buffer table)
1155 "Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE."
1156 (dec-both pos pos-aref buffer)
1157 (&syntax-comment-end-first (buffer-fetch-char pos-aref buffer)
1158 table))
1160 (defun &scan-lists (from count depth sexpflag &aux (buffer (current-buffer)))
1161 "This is from the emacs function"
1162 ;; the big TODO here is to use the CL readtable
1163 (labels ((lose (last-good from)
1164 (signal 'unbalanced-parenthesis :last-good last-good :from from)))
1165 (let ((stop (if (> count 0) (zv buffer) (begv buffer)))
1166 (from-aref (buffer-char-to-aref buffer from))
1167 (min-depth (min 0 depth))
1168 (table (syntax-table))
1169 (last-good from)
1170 quoted
1171 math-exit
1172 comment-start-first
1173 code
1174 ch ch1
1175 temp-code
1176 temp-pos
1177 comment-nested
1178 comment-style
1179 found
1180 prefix)
1181 ;; normalize FROM
1182 (setf from (max (min (zv buffer) from)
1183 (begv buffer)))
1184 (while (> count 0)
1185 ;; the code needs to be able to jump out of the mess it got
1186 ;; in.
1187 (handler-case
1188 (progn
1189 (while (< from stop)
1190 (catch :continue
1191 (setf ch (buffer-fetch-char from-aref buffer)
1192 code (&syntax ch table)
1193 comment-start-first (&syntax-comment-start-first ch table)
1194 comment-nested (&syntax-comment-nested ch table)
1195 comment-style (&syntax-comment-style ch table)
1196 prefix (&syntax-prefix ch table))
1197 (when (= depth min-depth)
1198 (setf last-good from))
1199 (inc-both from from-aref buffer)
1200 (when (and (< from stop) comment-start-first
1201 (progn (setf ch (buffer-fetch-char from-aref buffer))
1202 (&syntax-comment-start-second ch table))
1203 *parse-sexp-ignore-comments*)
1204 ;; we have encountered a comment start sequence and
1205 ;; we are ignoring all text inside comments. We
1206 ;; must record the comment style this sequence
1207 ;; begins so that later, only a comment end of the
1208 ;; same style actually ends the comment section
1209 (setf code :comment
1210 ch1 (buffer-fetch-char from-aref buffer)
1211 comment-style (&syntax-comment-style ch1 table)
1212 comment-nested (or comment-nested
1213 (&syntax-comment-nested ch1 table)))
1214 (inc-both from from-aref buffer))
1216 (when prefix
1217 (throw :continue nil))
1219 (when (or (eq code :escape)
1220 (eq code :character-quote))
1221 (when (= from stop) (lose last-good from))
1222 (inc-both from from-aref buffer)
1223 ;; treat following character as a word constituent
1224 (setf code :word-constituent))
1226 (case code
1227 ((:word-constituent :symbol-constituent)
1228 (unless (or (not (zerop depth))
1229 (not sexpflag))
1230 (let (temp)
1231 (while (< from stop)
1232 (setf ch (buffer-fetch-char from-aref buffer)
1233 temp (&syntax ch table))
1234 (case temp
1235 ((:escape :character-quote)
1236 (inc-both from from-aref buffer)
1237 (when (= from stop) (lose last-good from)))
1238 ((:word-constituent :symbol-constituent :quote))
1240 (signal 'syntax-done)))
1241 (inc-both from from-aref buffer)))
1242 (signal 'syntax-done)))
1243 ((:comment-fence :comment)
1244 (when (eq code :comment-fence)
1245 (setf comment-style :st-comment-style))
1246 (multiple-value-setq (found from from-aref) (&forward-comment from from-aref stop comment-nested comment-style nil buffer table))
1247 (unless found
1248 (when (zerop depth) (signal 'syntax-done))
1249 (lose last-good from))
1250 (inc-both from from-aref buffer))
1251 (:math
1252 (when sexpflag
1253 (when (and (/= from stop)
1254 (char= ch (buffer-fetch-char from-aref buffer)))
1255 (inc-both from from-aref buffer))
1256 (if math-exit
1257 (progn
1258 (setf math-exit nil)
1259 (decf depth)
1260 (when (zerop depth) (signal 'syntax-done))
1261 (when (< depth min-depth)
1262 (signal 'expression-ends-prematurely))) ; XXX
1263 (progn
1264 (setf math-exit t)
1265 (incf depth)
1266 (when (zerop depth) (signal 'syntax-done))))))
1267 (:open
1268 (incf depth)
1269 (when (zerop depth) (signal 'syntax-done)))
1270 (:close
1271 (decf depth)
1272 (when (zerop depth) (signal 'syntax-done))
1273 (when (< depth min-depth)
1274 (signal 'expression-ends-prematurely)))
1275 ((:string :string-fence)
1276 (let* ((tmp-pos (aref-minus-1 from-aref buffer))
1277 (string-term (buffer-fetch-char tmp-pos buffer))
1278 temp)
1279 (loop
1280 (when (>= from stop) (lose last-good from))
1281 (setf ch (buffer-fetch-char from-aref buffer))
1282 (when (if (eq code :string)
1283 (and (char= ch string-term)
1284 (eq (&syntax ch table) :string))
1285 (eq (&syntax ch table) :string-fence))
1286 (return nil))
1287 (setf temp (&syntax ch table))
1288 (case temp
1289 ((:character-quote :escape)
1290 (inc-both from from-aref buffer)))
1291 (inc-both from from-aref buffer))
1292 (inc-both from from-aref buffer)
1293 (when (and (zerop depth)
1294 sexpflag)
1295 (signal 'syntax-done))))
1297 ;; Ignore whitespace, punctuation, quote, endcomment.
1298 ))))
1299 (unless (zerop depth) (lose last-good from))
1300 (return-from &scan-lists nil))
1301 (syntax-done ()
1302 (decf count))))
1304 (while (< count 0)
1305 (handler-case
1306 (progn
1307 (while (> from stop)
1308 (catch :continue
1309 (dec-both from from-aref buffer)
1310 (setf ch (buffer-fetch-char from-aref buffer)
1311 code (&syntax ch table))
1312 (when (= depth min-depth)
1313 (setf last-good from))
1314 (setf comment-style nil
1315 comment-nested (&syntax-comment-nested ch table))
1316 (when (eq code :end-comment)
1317 (setf comment-style (&syntax-comment-style ch table)))
1318 (when (and (> from stop)
1319 (&syntax-comment-end-second ch table)
1320 (prev-char-comment-end-first from from-aref buffer table)
1321 *parse-sexp-ignore-comments*)
1322 ;; We must record the comment style
1323 ;; encountered so that later, we can match
1324 ;; only the proper comment begin sequence of
1325 ;; the same style.
1326 (dec-both from from-aref buffer)
1327 (setf code :end-comment
1328 ch1 (buffer-fetch-char from-aref buffer)
1329 comment-nested (or comment-nested
1330 (&syntax-comment-nested ch1 table))))
1331 ;; Quoting turns anything except a comment-ender
1332 ;; into a word character. Note that this cannot
1333 ;; be true if we decremented FROM in the
1334 ;; if-statement above.
1335 (cond
1336 ((and (not (eq code :end-comment))
1337 (&char-quoted from from-aref buffer table))
1338 (dec-both from from-aref buffer)
1339 (setf code :word))
1340 ((&syntax-prefix ch table)
1341 ;; loop around again. I think this is nasty but fuckit.
1342 (throw :continue nil)))
1343 (case code
1344 ((:word-constituent :symbol-constituent :escape :character-quote)
1345 (unless (or (not (zerop depth))
1346 (not sexpflag))
1347 ;; This word counts as a sexp; count
1348 ;; object finished after passing it.
1349 (while (> from stop)
1350 (setf temp-pos from-aref)
1351 (decf temp-pos)
1352 (setf ch1 (buffer-fetch-char temp-pos buffer)
1353 temp-code (&syntax ch1 table))
1354 ;; Don't allow comment-end to be quoted.
1355 (when (eq temp-code :end-comment)
1356 (signal 'syntax-done))
1357 (setf quoted (&char-quoted (1- from) temp-pos buffer table))
1358 (when quoted
1359 (dec-both from from-aref buffer)
1360 (setf temp-pos (aref-minus-1 temp-pos buffer)))
1361 (setf ch1 (buffer-fetch-char temp-pos buffer)
1362 temp-code (&syntax ch1 table))
1363 (when (not (or quoted
1364 (eq temp-code :word-constituent)
1365 (eq temp-code :symbol-constituent)
1366 (eq temp-code :quote)))
1367 (signal 'syntax-done))
1368 (dec-both from from-aref buffer))
1369 (signal 'syntax-done)))
1370 (:math
1371 (when sexpflag
1372 (setf temp-pos (aref-minus-1 from-aref buffer))
1373 (when (and (/= from stop)
1374 (char= ch (buffer-fetch-char temp-pos buffer)))
1375 (dec-both from from-aref buffer))
1376 (if math-exit
1377 (progn
1378 (setf math-exit nil)
1379 (decf depth)
1380 (when (zerop depth) (signal 'syntax-done))
1381 (when (< depth min-depth)
1382 (signal 'expression-ends-prematurely)))
1383 (progn
1384 (setf math-exit t)
1385 (incf depth)
1386 (when (zerop depth) (signal 'syntax-done))))))
1387 (:close
1388 (incf depth)
1389 (when (zerop depth) (signal 'syntax-done)))
1390 (:open
1391 (decf depth)
1392 (when (zerop depth) (signal 'syntax-done))
1393 (when (< depth min-depth)
1394 (signal 'expression-ends-prematurely)))
1395 (:end-comment
1396 (when *parse-sexp-ignore-comments*
1397 (multiple-value-bind (found char-pos aref-pos)
1398 (&back-comment from from-aref stop comment-nested comment-style buffer table)
1399 (when (eq found :not-comment-end)
1400 (setf from char-pos
1401 from-aref aref-pos)))))
1402 ((:comment-fence :string-fence)
1403 (loop
1404 (when (= from stop) (lose last-good from))
1405 (dec-both from from-aref buffer)
1406 (when (and (not (&char-quoted from from-aref buffer table))
1407 (progn
1408 (setf ch (buffer-fetch-char from-aref buffer))
1409 (eq (&syntax ch table) code)))
1410 (return nil)))
1411 (when (and (eq code :string-fence)
1412 (zerop depth)
1413 sexpflag)
1414 (signal 'syntax-done)))
1415 (:string
1416 (let ((string-term (buffer-fetch-char from-aref buffer)))
1417 (loop
1418 (when (= from stop) (lose last-good from))
1419 (dec-both from from-aref buffer)
1420 (when (and (not (&char-quoted from from-aref buffer table))
1421 (progn
1422 (setf ch (buffer-fetch-char from-aref buffer))
1423 (char= string-term ch))
1424 (eq (&syntax ch table) :string))
1425 (return nil)))
1426 (when (and (zerop depth)
1427 sexpflag)
1428 (signal 'syntax-done))))
1430 ;; Ignore whitespace, punctuation, quote, endcomment.
1431 ))))
1432 (when (not (zerop depth)) (lose last-good from))
1433 (return-from &scan-lists nil))
1434 (syntax-done ()
1435 (incf count))))
1436 from)))
1438 (defun scan-lists (from count depth)
1439 "Scan from character number FROM by COUNT lists.
1440 Returns the character number of the position thus found.
1442 If DEPTH is nonzero, paren depth begins counting from that value,
1443 only places where the depth in parentheses becomes zero
1444 are candidates for stopping; COUNT such places are counted.
1445 Thus, a positive value for DEPTH means go out levels.
1447 Comments are ignored if `*parse-sexp-ignore-comments*' is non-nil.
1449 If the beginning or end of (the accessible part of) the buffer is reached
1450 and the depth is wrong, an error is signaled.
1451 If the depth is right but the count is not used up, nil is returned."
1452 (check-type from number)
1453 (check-type count number)
1454 (check-type depth number)
1455 (&scan-lists from count depth nil))
1457 (defun scan-sexps (from count)
1458 "Scan from character number FROM by COUNT balanced expressions.
1459 If COUNT is negative, scan backwards.
1460 Returns the character number of the position thus found.
1462 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1464 If the beginning or end of (the accessible part of) the buffer is reached
1465 in the middle of a parenthetical grouping, an error is signaled.
1466 If the beginning or end is reached between groupings
1467 but before count is used up, nil is returned."
1468 (check-type from number)
1469 (check-type count number)
1470 (&scan-lists from count 0 t))
1472 (defun backward-prefix-chars (&aux (buffer (current-buffer)) (table (syntax-table)))
1473 "Move point backward over any number of chars with prefix syntax.
1474 This includes chars with \"quote\" or \"prefix\" syntax (' or p)."
1475 (let* ((beg (begv buffer))
1476 (pos (pt buffer))
1477 (pos-aref (buffer-char-to-aref buffer pos))
1478 (opoint (pt buffer))
1479 (opoint-aref (buffer-char-to-aref buffer pos))
1481 (when (<= pos beg)
1482 ;; SET_PT_BOTH (opoint, opoint_byte);
1483 (return-from backward-prefix-chars nil))
1485 (dec-both pos pos-aref buffer)
1486 (while (and (not (&char-quoted pos pos-aref buffer table))
1487 (progn
1488 (setf c (buffer-fetch-char pos-aref buffer))
1489 (or (eq (&syntax c table) :quote)
1490 (&syntax-prefix c table))))
1491 (setf opoint pos
1492 opoint-aref pos-aref)
1493 (when (> (1+ pos) beg)
1494 (dec-both pos pos-aref buffer)))
1495 (set-point-both buffer opoint opoint-aref)
1496 nil))
1498 (defun parse-partial-sexp (from to &key (target-depth -100000) stop-before old-state comment-stop &aux (buffer (current-buffer)) (table (syntax-table)))
1499 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1500 Parsing stops at TO or when certain criteria are met;
1501 point is set to where parsing stops.
1502 If fifth arg OLDSTATE is omitted or nil,
1503 parsing assumes that FROM is the beginning of a function.
1504 Value is a list of elements describing final state of parsing:
1505 0. depth in parens. parse-state-depth
1506 1. character address of start of innermost containing list; nil if none. parse-state-prev-level-start
1507 2. character address of start of last complete sexp terminated. parse-state-this-level-start
1508 3. non-nil if inside a string.
1509 (it is the character that will terminate the string,
1510 or t if the string should be terminated by a generic string delimiter.) parse-state-in-string
1511 4. nil if outside a comment, t if inside a non-nestable comment,
1512 else an integer (the current comment nesting). parse-state-in-comment
1513 5. t if following a quote character. parse-state-quoted
1514 6. the minimum paren-depth encountered during this scan. parse-state-min-depth
1515 7. t if in a comment of style b; symbol `syntax-table' if the comment
1516 should be terminated by a generic comment delimiter. parse-state-comment-style
1517 8. character address of start of comment or string; nil if not in one. parse-state-in-comment
1518 9. Intermediate data for continuation of parsing (subject to change). parse-state-level-starts
1519 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1520 in parentheses becomes equal to TARGETDEPTH.
1521 Fourth arg STOPBEFORE non-nil means stop when come to
1522 any character that starts a sexp.
1523 Fifth arg OLDSTATE is a list like what this function returns.
1524 It is used to initialize the state of the parse. Elements number 1, 2, 6
1525 and 8 are ignored.
1526 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1527 If it is symbol `syntax-table', stop after the start of a comment or a
1528 string, or after end of a comment or a string."
1529 (check-type target-depth number)
1530 (multiple-value-setq (from to) (validate-region from to buffer))
1531 (let ((state (scan-sexps-forward from (buffer-char-to-aref buffer from) to
1532 target-depth (not (null stop-before)) old-state
1533 ;; XXX
1534 (if comment-stop
1535 (if (eq comment-stop 'syntax-table) -1 1)
1537 buffer table)))
1538 (set-point (parse-state-location state) buffer)
1539 state))