1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2015 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
23 #include <sys/types.h>
26 #include "character.h"
30 #include "intervals.h"
33 /* Make syntax table lookup grant data in gl_state. */
34 #define SYNTAX(c) syntax_property (c, 1)
35 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
36 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
38 /* Eight single-bit flags have the following meanings:
39 1. This character is the first of a two-character comment-start sequence.
40 2. This character is the second of a two-character comment-start sequence.
41 3. This character is the first of a two-character comment-end sequence.
42 4. This character is the second of a two-character comment-end sequence.
43 5. This character is a prefix, for backward-prefix-chars.
44 6. The char is part of a delimiter for comments of style "b".
45 7. This character is part of a nestable comment sequence.
46 8. The char is part of a delimiter for comments of style "c".
47 Note that any two-character sequence whose first character has flag 1
48 and whose second character has flag 2 will be interpreted as a comment start.
50 Bits 6 and 8 discriminate among different comment styles.
51 Languages such as C++ allow two orthogonal syntax start/end pairs
52 and bit 6 determines whether a comment-end or Scommentend
53 ends style a or b. Comment markers can start style a, b, c, or bc.
54 Style a is always the default.
55 For 2-char comment markers, the style b flag is looked up only on the second
56 char of the comment marker and on the first char of the comment ender.
57 For style c (like the nested flag), the flag can be placed on any of
60 /* These functions extract specific flags from an integer
61 that holds the syntax code and the flags. */
64 SYNTAX_FLAGS_COMSTART_FIRST (int flags
)
66 return (flags
>> 16) & 1;
69 SYNTAX_FLAGS_COMSTART_SECOND (int flags
)
71 return (flags
>> 17) & 1;
74 SYNTAX_FLAGS_COMEND_FIRST (int flags
)
76 return (flags
>> 18) & 1;
79 SYNTAX_FLAGS_COMEND_SECOND (int flags
)
81 return (flags
>> 19) & 1;
84 SYNTAX_FLAGS_PREFIX (int flags
)
86 return (flags
>> 20) & 1;
89 SYNTAX_FLAGS_COMMENT_STYLEB (int flags
)
91 return (flags
>> 21) & 1;
94 SYNTAX_FLAGS_COMMENT_STYLEC (int flags
)
96 return (flags
>> 23) & 1;
99 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags
)
101 return (flags
>> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
104 SYNTAX_FLAGS_COMMENT_NESTED (int flags
)
106 return (flags
>> 22) & 1;
109 /* FLAGS should be the flags of the main char of the comment marker, e.g.
110 the second for comstart and the first for comend. */
112 SYNTAX_FLAGS_COMMENT_STYLE (int flags
, int other_flags
)
114 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags
)
115 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags
)
116 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags
));
119 /* Extract a particular flag for a given character. */
122 SYNTAX_COMEND_FIRST (int c
)
124 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c
));
127 /* We use these constants in place for comment-style and
128 string-ender-char to distinguish comments/strings started by
129 comment_fence and string_fence codes. */
133 ST_COMMENT_STYLE
= 256 + 1,
134 ST_STRING_STYLE
= 256 + 2
137 /* This is the internal form of the parse state used in parse-partial-sexp. */
139 struct lisp_parse_state
141 EMACS_INT depth
; /* Depth at end of parsing. */
142 int instring
; /* -1 if not within string, else desired terminator. */
143 EMACS_INT incomment
; /* -1 if in unnestable comment else comment nesting */
144 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
145 bool quoted
; /* True if just after an escape char at end of parsing. */
146 EMACS_INT mindepth
; /* Minimum depth seen while scanning. */
147 /* Char number of most recent start-of-expression at current level */
148 ptrdiff_t thislevelstart
;
149 /* Char number of start of containing expression */
150 ptrdiff_t prevlevelstart
;
151 ptrdiff_t location
; /* Char number at which parsing stopped. */
152 ptrdiff_t location_byte
; /* Corresponding byte position. */
153 ptrdiff_t comstr_start
; /* Position of last comment/string starter. */
154 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
155 of levels (starting from outermost). */
158 /* These variables are a cache for finding the start of a defun.
159 find_start_pos is the place for which the defun start was found.
160 find_start_value is the defun start position found for it.
161 find_start_value_byte is the corresponding byte position.
162 find_start_buffer is the buffer it was found in.
163 find_start_begv is the BEGV value when it was found.
164 find_start_modiff is the value of MODIFF when it was found. */
166 static ptrdiff_t find_start_pos
;
167 static ptrdiff_t find_start_value
;
168 static ptrdiff_t find_start_value_byte
;
169 static struct buffer
*find_start_buffer
;
170 static ptrdiff_t find_start_begv
;
171 static EMACS_INT find_start_modiff
;
174 static Lisp_Object
skip_chars (bool, Lisp_Object
, Lisp_Object
, bool);
175 static Lisp_Object
skip_syntaxes (bool, Lisp_Object
, Lisp_Object
);
176 static Lisp_Object
scan_lists (EMACS_INT
, EMACS_INT
, EMACS_INT
, bool);
177 static void scan_sexps_forward (struct lisp_parse_state
*,
178 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT
,
179 bool, Lisp_Object
, int);
180 static bool in_classes (int, Lisp_Object
);
181 static void parse_sexp_propertize (ptrdiff_t charpos
);
183 /* This setter is used only in this file, so it can be private. */
185 bset_syntax_table (struct buffer
*b
, Lisp_Object val
)
187 b
->syntax_table_
= val
;
190 /* Whether the syntax of the character C has the prefix flag set. */
192 syntax_prefix_flag_p (int c
)
194 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c
));
197 struct gl_state_s gl_state
; /* Global state of syntax parser. */
199 enum { INTERVALS_AT_ONCE
= 10 }; /* 1 + max-number of intervals
200 to scan to property-change. */
202 /* Set the syntax entry VAL for char C in table TABLE. */
205 SET_RAW_SYNTAX_ENTRY (Lisp_Object table
, int c
, Lisp_Object val
)
207 CHAR_TABLE_SET (table
, c
, val
);
210 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
211 RANGE is a cons (FROM . TO) specifying the range of characters. */
214 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table
, Lisp_Object range
,
217 Fset_char_table_range (table
, range
, val
);
220 /* Extract the information from the entry for character C
221 in the current syntax table. */
226 Lisp_Object ent
= SYNTAX_ENTRY (c
);
227 return CONSP (ent
) ? XCDR (ent
) : Qnil
;
230 /* This should be called with FROM at the start of forward
231 search, or after the last position of the backward search. It
232 makes sure that the first char is picked up with correct table, so
233 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
235 Sign of COUNT gives the direction of the search.
239 SETUP_SYNTAX_TABLE (ptrdiff_t from
, ptrdiff_t count
)
241 SETUP_BUFFER_SYNTAX_TABLE ();
242 gl_state
.b_property
= BEGV
;
243 gl_state
.e_property
= ZV
+ 1;
244 gl_state
.object
= Qnil
;
246 if (parse_sexp_lookup_properties
)
249 update_syntax_table_forward (from
, true, Qnil
);
250 else if (from
> BEGV
)
252 update_syntax_table (from
- 1, count
, true, Qnil
);
253 parse_sexp_propertize (from
- 1);
258 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
259 If it is t (which is only used in fast_c_string_match_ignore_case),
260 ignore properties altogether.
262 This is meant for regex.c to use. For buffers, regex.c passes arguments
263 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
264 So if it is a buffer, we set the offset field to BEGV. */
267 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object
,
268 ptrdiff_t from
, ptrdiff_t count
)
270 SETUP_BUFFER_SYNTAX_TABLE ();
271 gl_state
.object
= object
;
272 if (BUFFERP (gl_state
.object
))
274 struct buffer
*buf
= XBUFFER (gl_state
.object
);
275 gl_state
.b_property
= 1;
276 gl_state
.e_property
= BUF_ZV (buf
) - BUF_BEGV (buf
) + 1;
277 gl_state
.offset
= BUF_BEGV (buf
) - 1;
279 else if (NILP (gl_state
.object
))
281 gl_state
.b_property
= 1;
282 gl_state
.e_property
= ZV
- BEGV
+ 1;
283 gl_state
.offset
= BEGV
- 1;
285 else if (EQ (gl_state
.object
, Qt
))
287 gl_state
.b_property
= 0;
288 gl_state
.e_property
= PTRDIFF_MAX
;
293 gl_state
.b_property
= 0;
294 gl_state
.e_property
= 1 + SCHARS (gl_state
.object
);
297 if (parse_sexp_lookup_properties
)
298 update_syntax_table (from
+ gl_state
.offset
- (count
<= 0),
299 count
, 1, gl_state
.object
);
302 /* Update gl_state to an appropriate interval which contains CHARPOS. The
303 sign of COUNT give the relative position of CHARPOS wrt the previously
304 valid interval. If INIT, only [be]_property fields of gl_state are
305 valid at start, the rest is filled basing on OBJECT.
307 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
308 direction than the intervals - or in an interval. We update the
309 current syntax-table basing on the property of this interval, and
310 update the interval to start further than CHARPOS - or be
311 NULL. We also update lim_property to be the next value of
312 charpos to call this subroutine again - or be before/after the
313 start/end of OBJECT. */
316 update_syntax_table (ptrdiff_t charpos
, EMACS_INT count
, bool init
,
319 Lisp_Object tmp_table
;
321 bool invalidate
= true;
326 gl_state
.old_prop
= Qnil
;
327 gl_state
.start
= gl_state
.b_property
;
328 gl_state
.stop
= gl_state
.e_property
;
329 i
= interval_of (charpos
, object
);
330 gl_state
.backward_i
= gl_state
.forward_i
= i
;
334 /* interval_of updates only ->position of the return value, so
335 update the parents manually to speed up update_interval. */
336 while (!NULL_PARENT (i
))
338 if (AM_RIGHT_CHILD (i
))
339 INTERVAL_PARENT (i
)->position
= i
->position
340 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
341 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
342 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
344 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
346 i
= INTERVAL_PARENT (i
);
348 i
= gl_state
.forward_i
;
349 gl_state
.b_property
= i
->position
- gl_state
.offset
;
350 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
353 i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
355 /* We are guaranteed to be called with CHARPOS either in i,
358 error ("Error in syntax_table logic for to-the-end intervals");
359 else if (charpos
< i
->position
) /* Move left. */
362 error ("Error in syntax_table logic for intervals <-");
363 /* Update the interval. */
364 i
= update_interval (i
, charpos
);
365 if (INTERVAL_LAST_POS (i
) != gl_state
.b_property
)
368 gl_state
.forward_i
= i
;
369 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
372 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
375 error ("Error in syntax_table logic for intervals ->");
376 /* Update the interval. */
377 i
= update_interval (i
, charpos
);
378 if (i
->position
!= gl_state
.e_property
)
381 gl_state
.backward_i
= i
;
382 gl_state
.b_property
= i
->position
- gl_state
.offset
;
387 tmp_table
= textget (i
->plist
, Qsyntax_table
);
390 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
392 if (invalidate
) /* Did not get to adjacent interval. */
393 { /* with the same table => */
394 /* invalidate the old range. */
397 gl_state
.backward_i
= i
;
398 gl_state
.b_property
= i
->position
- gl_state
.offset
;
402 gl_state
.forward_i
= i
;
403 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
407 if (!EQ (tmp_table
, gl_state
.old_prop
))
409 gl_state
.current_syntax_table
= tmp_table
;
410 gl_state
.old_prop
= tmp_table
;
411 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
413 gl_state
.use_global
= 0;
415 else if (CONSP (tmp_table
))
417 gl_state
.use_global
= 1;
418 gl_state
.global_code
= tmp_table
;
422 gl_state
.use_global
= 0;
423 gl_state
.current_syntax_table
= BVAR (current_buffer
, syntax_table
);
429 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
433 gl_state
.e_property
= i
->position
- gl_state
.offset
;
434 gl_state
.forward_i
= i
;
439 = i
->position
+ LENGTH (i
) - gl_state
.offset
;
440 gl_state
.backward_i
= i
;
444 else if (cnt
== INTERVALS_AT_ONCE
)
449 = i
->position
+ LENGTH (i
) - gl_state
.offset
450 /* e_property at EOB is not set to ZV but to ZV+1, so that
451 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
452 having to check eob between the two. */
453 + (next_interval (i
) ? 0 : 1);
454 gl_state
.forward_i
= i
;
458 gl_state
.b_property
= i
->position
- gl_state
.offset
;
459 gl_state
.backward_i
= i
;
464 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
466 eassert (i
== NULL
); /* This property goes to the end. */
469 gl_state
.e_property
= gl_state
.stop
;
470 gl_state
.forward_i
= i
;
473 gl_state
.b_property
= gl_state
.start
;
477 parse_sexp_propertize (ptrdiff_t charpos
)
480 if (syntax_propertize__done
<= charpos
481 && syntax_propertize__done
< zv
)
483 EMACS_INT modiffs
= CHARS_MODIFF
;
484 safe_call1 (Qinternal__syntax_propertize
,
485 make_number (min (zv
, 1 + charpos
)));
486 if (modiffs
!= CHARS_MODIFF
)
487 error ("parse-sexp-propertize-function modified the buffer!");
488 if (syntax_propertize__done
<= charpos
489 && syntax_propertize__done
< zv
)
490 error ("parse-sexp-propertize-function did not move"
491 " syntax-propertize--done");
492 SETUP_SYNTAX_TABLE (charpos
, 1);
494 else if (gl_state
.e_property
> syntax_propertize__done
)
496 gl_state
.e_property
= syntax_propertize__done
;
497 gl_state
.e_property_truncated
= true;
499 else if (gl_state
.e_property_truncated
500 && gl_state
.e_property
< syntax_propertize__done
)
501 { /* When moving backward, e_property might be set without resetting
502 e_property_truncated, so the e_property_truncated flag may
503 occasionally be left raised spuriously. This should be rare. */
504 gl_state
.e_property_truncated
= false;
505 update_syntax_table_forward (charpos
, false, Qnil
);
510 update_syntax_table_forward (ptrdiff_t charpos
, bool init
,
513 if (gl_state
.e_property_truncated
)
515 eassert (NILP (object
));
516 eassert (charpos
>= gl_state
.e_property
);
520 update_syntax_table (charpos
, 1, init
, object
);
521 if (NILP (object
) && gl_state
.e_property
> syntax_propertize__done
)
522 parse_sexp_propertize (charpos
);
526 /* Returns true if char at CHARPOS is quoted.
527 Global syntax-table data should be set up already to be good at CHARPOS
528 or after. On return global syntax data is good for lookup at CHARPOS. */
531 char_quoted (ptrdiff_t charpos
, ptrdiff_t bytepos
)
533 enum syntaxcode code
;
534 ptrdiff_t beg
= BEGV
;
536 ptrdiff_t orig
= charpos
;
538 while (charpos
> beg
)
541 DEC_BOTH (charpos
, bytepos
);
543 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
544 c
= FETCH_CHAR_AS_MULTIBYTE (bytepos
);
546 if (! (code
== Scharquote
|| code
== Sescape
))
552 UPDATE_SYNTAX_TABLE (orig
);
556 /* Return the bytepos one character before BYTEPOS.
557 We assume that BYTEPOS is not at the start of the buffer. */
560 dec_bytepos (ptrdiff_t bytepos
)
562 if (NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
569 /* Return a defun-start position before POS and not too far before.
570 It should be the last one before POS, or nearly the last.
572 When open_paren_in_column_0_is_defun_start is nonzero,
573 only the beginning of the buffer is treated as a defun-start.
575 We record the information about where the scan started
576 and what its result was, so that another call in the same area
577 can return the same value very quickly.
579 There is no promise at which position the global syntax data is
580 valid on return from the subroutine, so the caller should explicitly
581 update the global data. */
584 find_defun_start (ptrdiff_t pos
, ptrdiff_t pos_byte
)
586 ptrdiff_t opoint
= PT
, opoint_byte
= PT_BYTE
;
588 /* Use previous finding, if it's valid and applies to this inquiry. */
589 if (current_buffer
== find_start_buffer
590 /* Reuse the defun-start even if POS is a little farther on.
591 POS might be in the next defun, but that's ok.
592 Our value may not be the best possible, but will still be usable. */
593 && pos
<= find_start_pos
+ 1000
594 && pos
>= find_start_value
595 && BEGV
== find_start_begv
596 && MODIFF
== find_start_modiff
)
597 return find_start_value
;
599 if (!open_paren_in_column_0_is_defun_start
)
601 find_start_value
= BEGV
;
602 find_start_value_byte
= BEGV_BYTE
;
606 /* Back up to start of line. */
607 scan_newline (pos
, pos_byte
, BEGV
, BEGV_BYTE
, -1, 1);
609 /* We optimize syntax-table lookup for rare updates. Thus we accept
610 only those `^\s(' which are good in global _and_ text-property
612 SETUP_BUFFER_SYNTAX_TABLE ();
617 /* Open-paren at start of line means we may have found our
619 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
620 if (SYNTAX (c
) == Sopen
)
622 SETUP_SYNTAX_TABLE (PT
+ 1, -1); /* Try again... */
623 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
624 if (SYNTAX (c
) == Sopen
)
626 /* Now fallback to the default value. */
627 SETUP_BUFFER_SYNTAX_TABLE ();
629 /* Move to beg of previous line. */
630 scan_newline (PT
, PT_BYTE
, BEGV
, BEGV_BYTE
, -2, 1);
633 /* Record what we found, for the next try. */
634 find_start_value
= PT
;
635 find_start_value_byte
= PT_BYTE
;
636 TEMP_SET_PT_BOTH (opoint
, opoint_byte
);
639 find_start_buffer
= current_buffer
;
640 find_start_modiff
= MODIFF
;
641 find_start_begv
= BEGV
;
642 find_start_pos
= pos
;
644 return find_start_value
;
647 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
650 prev_char_comend_first (ptrdiff_t pos
, ptrdiff_t pos_byte
)
655 DEC_BOTH (pos
, pos_byte
);
656 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
657 c
= FETCH_CHAR (pos_byte
);
658 val
= SYNTAX_COMEND_FIRST (c
);
659 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
663 /* Check whether charpos FROM is at the end of a comment.
664 FROM_BYTE is the bytepos corresponding to FROM.
665 Do not move back before STOP.
667 Return true if we find a comment ending at FROM/FROM_BYTE.
669 If successful, store the charpos of the comment's beginning
670 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
672 Global syntax data remains valid for backward search starting at
673 the returned value (or at FROM, if the search was not successful). */
676 back_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
677 bool comnested
, int comstyle
, ptrdiff_t *charpos_ptr
,
678 ptrdiff_t *bytepos_ptr
)
680 /* Look back, counting the parity of string-quotes,
681 and recording the comment-starters seen.
682 When we reach a safe place, assume that's not in a string;
683 then step the main scan to the earliest comment-starter seen
684 an even number of string quotes away from the safe place.
686 OFROM[I] is position of the earliest comment-starter seen
687 which is I+2X quotes from the comment-end.
688 PARITY is current parity of quotes from the comment end. */
689 int string_style
= -1; /* Presumed outside of any string. */
690 bool string_lossage
= 0;
691 /* Not a real lossage: indicates that we have passed a matching comment
692 starter plus a non-matching comment-ender, meaning that any matching
693 comment-starter we might see later could be a false positive (hidden
694 inside another comment).
695 Test case: { a (* b } c (* d *) */
696 bool comment_lossage
= 0;
697 ptrdiff_t comment_end
= from
;
698 ptrdiff_t comment_end_byte
= from_byte
;
699 ptrdiff_t comstart_pos
= 0;
700 ptrdiff_t comstart_byte
IF_LINT (= 0);
701 /* Place where the containing defun starts,
702 or 0 if we didn't come across it yet. */
703 ptrdiff_t defun_start
= 0;
704 ptrdiff_t defun_start_byte
= 0;
705 enum syntaxcode code
;
706 ptrdiff_t nesting
= 1; /* Current comment nesting. */
710 /* FIXME: A }} comment-ender style leads to incorrect behavior
711 in the case of {{ c }}} because we ignore the last two chars which are
712 assumed to be comment-enders although they aren't. */
714 /* At beginning of range to scan, we're outside of strings;
715 that determines quote parity to the comment-end. */
720 bool com2start
, com2end
, comstart
;
722 /* Move back and examine a character. */
723 DEC_BOTH (from
, from_byte
);
724 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
726 prev_syntax
= syntax
;
727 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
728 syntax
= SYNTAX_WITH_FLAGS (c
);
731 /* Check for 2-char comment markers. */
732 com2start
= (SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
733 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax
)
735 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax
, syntax
))
736 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
)
737 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
);
738 com2end
= (SYNTAX_FLAGS_COMEND_FIRST (syntax
)
739 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax
));
740 comstart
= (com2start
|| code
== Scomment
);
742 /* Nasty cases with overlapping 2-char comment markers:
743 - snmp-mode: -- c -- foo -- c --
751 /* If a 2-char comment sequence partly overlaps with another,
752 we don't try to be clever. E.g. |*| in C, or }% in modes that
753 have %..\n and %{..}%. */
754 if (from
> stop
&& (com2end
|| comstart
))
756 ptrdiff_t next
= from
, next_byte
= from_byte
;
757 int next_c
, next_syntax
;
758 DEC_BOTH (next
, next_byte
);
759 UPDATE_SYNTAX_TABLE_BACKWARD (next
);
760 next_c
= FETCH_CHAR_AS_MULTIBYTE (next_byte
);
761 next_syntax
= SYNTAX_WITH_FLAGS (next_c
);
762 if (((comstart
|| comnested
)
763 && SYNTAX_FLAGS_COMEND_SECOND (syntax
)
764 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax
))
765 || ((com2end
|| comnested
)
766 && SYNTAX_FLAGS_COMSTART_SECOND (syntax
)
768 == SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_syntax
))
769 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax
)))
771 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
774 if (com2start
&& comstart_pos
== 0)
775 /* We're looking at a comment starter. But it might be a comment
776 ender as well (see snmp-mode). The first time we see one, we
777 need to consider it as a comment starter,
778 and the subsequent times as a comment ender. */
781 /* Turn a 2-char comment sequences into the appropriate syntax. */
786 /* Ignore comment starters of a different style. */
787 else if (code
== Scomment
788 && (comstyle
!= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0)
789 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
) != comnested
))
792 /* Ignore escaped characters, except comment-enders. */
793 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
800 c
= (code
== Sstring_fence
? ST_STRING_STYLE
: ST_COMMENT_STYLE
);
802 /* Track parity of quotes. */
803 if (string_style
== -1)
804 /* Entering a string. */
806 else if (string_style
== c
)
807 /* Leaving the string. */
810 /* If we have two kinds of string delimiters.
811 There's no way to grok this scanning backwards. */
816 /* We've already checked that it is the relevant comstyle. */
817 if (string_style
!= -1 || comment_lossage
|| string_lossage
)
818 /* There are odd string quotes involved, so let's be careful.
819 Test case in Pascal: " { " a { " } */
824 /* Record best comment-starter so far. */
826 comstart_byte
= from_byte
;
828 else if (--nesting
<= 0)
829 /* nested comments have to be balanced, so we don't need to
830 keep looking for earlier ones. We use here the same (slightly
831 incorrect) reasoning as below: since it is followed by uniform
832 paired string quotes, this comment-start has to be outside of
833 strings, else the comment-end itself would be inside a string. */
838 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == comstyle
839 && ((com2end
&& SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
))
840 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
)
841 /* This is the same style of comment ender as ours. */
846 /* Anything before that can't count because it would match
847 this comment-ender rather than ours. */
848 from
= stop
; /* Break out of the loop. */
850 else if (comstart_pos
!= 0 || c
!= '\n')
851 /* We're mixing comment styles here, so we'd better be careful.
852 The (comstart_pos != 0 || c != '\n') check is not quite correct
853 (we should just always set comment_lossage), but removing it
854 would imply that any multiline comment in C would go through
855 lossage, which seems overkill.
856 The failure should only happen in the rare cases such as
862 /* Assume a defun-start point is outside of strings. */
863 if (open_paren_in_column_0_is_defun_start
865 || (temp_byte
= dec_bytepos (from_byte
),
866 FETCH_CHAR (temp_byte
) == '\n')))
869 defun_start_byte
= from_byte
;
870 from
= stop
; /* Break out of the loop. */
879 if (comstart_pos
== 0)
882 from_byte
= comment_end_byte
;
883 UPDATE_SYNTAX_TABLE_FORWARD (comment_end
);
885 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
886 or `done'), then we've found the beginning of the non-nested comment. */
887 else if (1) /* !comnested */
890 from_byte
= comstart_byte
;
891 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
895 struct lisp_parse_state state
;
896 bool adjusted
= true;
897 /* We had two kinds of string delimiters mixed up
898 together. Decode this going forwards.
899 Scan fwd from a known safe place (beginning-of-defun)
900 to the one in question; this records where we
901 last passed a comment starter. */
902 /* If we did not already find the defun start, find it now. */
903 if (defun_start
== 0)
905 defun_start
= find_defun_start (comment_end
, comment_end_byte
);
906 defun_start_byte
= find_start_value_byte
;
907 adjusted
= (defun_start
> BEGV
);
911 scan_sexps_forward (&state
,
912 defun_start
, defun_start_byte
,
913 comment_end
, TYPE_MINIMUM (EMACS_INT
),
915 defun_start
= comment_end
;
920 = CONSP (state
.levelstarts
) ? XINT (XCAR (state
.levelstarts
))
921 : state
.thislevelstart
>= 0 ? state
.thislevelstart
923 find_start_value_byte
= CHAR_TO_BYTE (find_start_value
);
926 if (state
.incomment
== (comnested
? 1 : -1)
927 && state
.comstyle
== comstyle
)
928 from
= state
.comstr_start
;
933 /* If comment_end is inside some other comment, maybe ours
934 is nested, so we need to try again from within the
935 surrounding comment. Example: { a (* " *) */
937 /* FIXME: We should advance by one or two chars. */
938 defun_start
= state
.comstr_start
+ 2;
939 defun_start_byte
= CHAR_TO_BYTE (defun_start
);
942 } while (defun_start
< comment_end
);
944 from_byte
= CHAR_TO_BYTE (from
);
945 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
950 *bytepos_ptr
= from_byte
;
952 return from
!= comment_end
;
955 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
956 doc
: /* Return t if OBJECT is a syntax table.
957 Currently, any char-table counts as a syntax table. */)
960 if (CHAR_TABLE_P (object
)
961 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
967 check_syntax_table (Lisp_Object obj
)
969 CHECK_TYPE (CHAR_TABLE_P (obj
) && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
),
970 Qsyntax_table_p
, obj
);
973 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
974 doc
: /* Return the current syntax table.
975 This is the one specified by the current buffer. */)
978 return BVAR (current_buffer
, syntax_table
);
981 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
982 Sstandard_syntax_table
, 0, 0, 0,
983 doc
: /* Return the standard syntax table.
984 This is the one used for new buffers. */)
987 return Vstandard_syntax_table
;
990 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
991 doc
: /* Construct a new syntax table and return it.
992 It is a copy of the TABLE, which defaults to the standard syntax table. */)
998 check_syntax_table (table
);
1000 table
= Vstandard_syntax_table
;
1002 copy
= Fcopy_sequence (table
);
1004 /* Only the standard syntax table should have a default element.
1005 Other syntax tables should inherit from parents instead. */
1006 set_char_table_defalt (copy
, Qnil
);
1008 /* Copied syntax tables should all have parents.
1009 If we copied one with no parent, such as the standard syntax table,
1010 use the standard syntax table as the copy's parent. */
1011 if (NILP (XCHAR_TABLE (copy
)->parent
))
1012 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
1016 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
1017 doc
: /* Select a new syntax table for the current buffer.
1018 One argument, a syntax table. */)
1022 check_syntax_table (table
);
1023 bset_syntax_table (current_buffer
, table
);
1024 /* Indicate that this buffer now has a specified syntax table. */
1025 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
1026 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
1030 /* Convert a letter which signifies a syntax code
1031 into the code it signifies.
1032 This is used by modify-syntax-entry, and other things. */
1034 unsigned char const syntax_spec_code
[0400] =
1035 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1036 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1037 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1038 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1039 Swhitespace
, Scomment_fence
, Sstring
, 0377, Smath
, 0377, 0377, Squote
,
1040 Sopen
, Sclose
, 0377, 0377, 0377, Swhitespace
, Spunct
, Scharquote
,
1041 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1042 0377, 0377, 0377, 0377, Scomment
, 0377, Sendcomment
, 0377,
1043 Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1044 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1045 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1046 0377, 0377, 0377, 0377, Sescape
, 0377, 0377, Ssymbol
,
1047 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1048 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1049 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1050 0377, 0377, 0377, 0377, Sstring_fence
, 0377, 0377, 0377
1053 /* Indexed by syntax code, give the letter that describes it. */
1055 char const syntax_code_spec
[16] =
1057 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1061 /* Indexed by syntax code, give the object (cons of syntax code and
1062 nil) to be stored in syntax table. Since these objects can be
1063 shared among syntax tables, we generate them in advance. By
1064 sharing objects, the function `describe-syntax' can give a more
1066 static Lisp_Object Vsyntax_code_object
;
1069 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
1070 doc
: /* Return the syntax code of CHARACTER, described by a character.
1071 For example, if CHARACTER is a word constituent, the
1072 character `w' (119) is returned.
1073 The characters that correspond to various syntax codes
1074 are listed in the documentation of `modify-syntax-entry'. */)
1075 (Lisp_Object character
)
1078 CHECK_CHARACTER (character
);
1079 char_int
= XINT (character
);
1080 SETUP_BUFFER_SYNTAX_TABLE ();
1081 return make_number (syntax_code_spec
[SYNTAX (char_int
)]);
1084 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
1085 doc
: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1086 (Lisp_Object character
)
1089 enum syntaxcode code
;
1090 CHECK_CHARACTER (character
);
1091 char_int
= XINT (character
);
1092 SETUP_BUFFER_SYNTAX_TABLE ();
1093 code
= SYNTAX (char_int
);
1094 if (code
== Sopen
|| code
== Sclose
)
1095 return SYNTAX_MATCH (char_int
);
1099 DEFUN ("string-to-syntax", Fstring_to_syntax
, Sstring_to_syntax
, 1, 1, 0,
1100 doc
: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1101 STRING should be a string of the form allowed as argument of
1102 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1103 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1104 the value of a `syntax-table' text property. */)
1105 (Lisp_Object string
)
1107 const unsigned char *p
;
1111 CHECK_STRING (string
);
1114 val
= syntax_spec_code
[*p
++];
1116 error ("Invalid syntax description letter: %c", p
[-1]);
1118 if (val
== Sinherit
)
1124 int character
= STRING_CHAR_AND_LENGTH (p
, len
);
1125 XSETINT (match
, character
);
1126 if (XFASTINT (match
) == ' ')
1169 if (val
< ASIZE (Vsyntax_code_object
) && NILP (match
))
1170 return AREF (Vsyntax_code_object
, val
);
1172 /* Since we can't use a shared object, let's make a new one. */
1173 return Fcons (make_number (val
), match
);
1176 /* I really don't know why this is interactive
1177 help-form should at least be made useful whilst reading the second arg. */
1178 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
1179 "cSet syntax for character: \nsSet syntax for %s to: ",
1180 doc
: /* Set syntax for character CHAR according to string NEWENTRY.
1181 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1182 the current buffer's syntax table.
1183 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1184 in the range MIN to MAX are changed.
1185 The first character of NEWENTRY should be one of the following:
1186 Space or - whitespace syntax. w word constituent.
1187 _ symbol constituent. . punctuation.
1188 ( open-parenthesis. ) close-parenthesis.
1189 " string quote. \\ escape.
1190 $ paired delimiter. \\=' expression quote or prefix operator.
1191 < comment starter. > comment ender.
1192 / character-quote. @ inherit from parent table.
1193 | generic string fence. ! generic comment fence.
1195 Only single-character comment start and end sequences are represented thus.
1196 Two-character sequences are represented as described below.
1197 The second character of NEWENTRY is the matching parenthesis,
1198 used only if the first character is `(' or `)'.
1199 Any additional characters are flags.
1200 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1201 1 means CHAR is the start of a two-char comment start sequence.
1202 2 means CHAR is the second character of such a sequence.
1203 3 means CHAR is the start of a two-char comment end sequence.
1204 4 means CHAR is the second character of such a sequence.
1206 There can be several orthogonal comment sequences. This is to support
1207 language modes such as C++. By default, all comment sequences are of style
1208 a, but you can set the comment sequence style to b (on the second character
1209 of a comment-start, and the first character of a comment-end sequence) and/or
1210 c (on any of its chars) using this flag:
1211 b means CHAR is part of comment sequence b.
1212 c means CHAR is part of comment sequence c.
1213 n means CHAR is part of a nestable comment sequence.
1215 p means CHAR is a prefix character for `backward-prefix-chars';
1216 such characters are treated as whitespace when they occur
1217 between expressions.
1218 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1219 (Lisp_Object c
, Lisp_Object newentry
, Lisp_Object syntax_table
)
1223 CHECK_CHARACTER_CAR (c
);
1224 CHECK_CHARACTER_CDR (c
);
1227 CHECK_CHARACTER (c
);
1229 if (NILP (syntax_table
))
1230 syntax_table
= BVAR (current_buffer
, syntax_table
);
1232 check_syntax_table (syntax_table
);
1234 newentry
= Fstring_to_syntax (newentry
);
1236 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table
, c
, newentry
);
1238 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
1240 /* We clear the regexp cache, since character classes can now have
1241 different values from those in the compiled regexps.*/
1242 clear_regexp_cache ();
1247 /* Dump syntax table to buffer in human-readable format */
1249 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value
,
1250 Sinternal_describe_syntax_value
, 1, 1, 0,
1251 doc
: /* Insert a description of the internal syntax description SYNTAX at point. */)
1252 (Lisp_Object syntax
)
1254 int code
, syntax_code
;
1255 bool start1
, start2
, end1
, end2
, prefix
, comstyleb
, comstylec
, comnested
;
1257 Lisp_Object first
, match_lisp
, value
= syntax
;
1261 insert_string ("default");
1265 if (CHAR_TABLE_P (value
))
1267 insert_string ("deeper char-table ...");
1273 insert_string ("invalid");
1277 first
= XCAR (value
);
1278 match_lisp
= XCDR (value
);
1280 if (!INTEGERP (first
) || !(NILP (match_lisp
) || CHARACTERP (match_lisp
)))
1282 insert_string ("invalid");
1286 syntax_code
= XINT (first
) & INT_MAX
;
1287 code
= syntax_code
& 0377;
1288 start1
= SYNTAX_FLAGS_COMSTART_FIRST (syntax_code
);
1289 start2
= SYNTAX_FLAGS_COMSTART_SECOND (syntax_code
);
1290 end1
= SYNTAX_FLAGS_COMEND_FIRST (syntax_code
);
1291 end2
= SYNTAX_FLAGS_COMEND_SECOND (syntax_code
);
1292 prefix
= SYNTAX_FLAGS_PREFIX (syntax_code
);
1293 comstyleb
= SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code
);
1294 comstylec
= SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code
);
1295 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax_code
);
1299 insert_string ("invalid");
1303 str
[0] = syntax_code_spec
[code
], str
[1] = 0;
1306 if (NILP (match_lisp
))
1309 insert_char (XINT (match_lisp
));
1330 insert_string ("\twhich means: ");
1335 insert_string ("whitespace"); break;
1337 insert_string ("punctuation"); break;
1339 insert_string ("word"); break;
1341 insert_string ("symbol"); break;
1343 insert_string ("open"); break;
1345 insert_string ("close"); break;
1347 insert_string ("prefix"); break;
1349 insert_string ("string"); break;
1351 insert_string ("math"); break;
1353 insert_string ("escape"); break;
1355 insert_string ("charquote"); break;
1357 insert_string ("comment"); break;
1359 insert_string ("endcomment"); break;
1361 insert_string ("inherit"); break;
1362 case Scomment_fence
:
1363 insert_string ("comment fence"); break;
1365 insert_string ("string fence"); break;
1367 insert_string ("invalid");
1371 if (!NILP (match_lisp
))
1373 insert_string (", matches ");
1374 insert_char (XINT (match_lisp
));
1378 insert_string (",\n\t is the first character of a comment-start sequence");
1380 insert_string (",\n\t is the second character of a comment-start sequence");
1383 insert_string (",\n\t is the first character of a comment-end sequence");
1385 insert_string (",\n\t is the second character of a comment-end sequence");
1387 insert_string (" (comment style b)");
1389 insert_string (" (comment style c)");
1391 insert_string (" (nestable)");
1395 AUTO_STRING (prefixdoc
,
1396 ",\n\t is a prefix character for `backward-prefix-chars'");
1397 insert1 (Fsubstitute_command_keys (prefixdoc
));
1403 /* Return the position across COUNT words from FROM.
1404 If that many words cannot be found before the end of the buffer, return 0.
1405 COUNT negative means scan backward and stop at word beginning. */
1408 scan_words (register ptrdiff_t from
, register EMACS_INT count
)
1410 register ptrdiff_t beg
= BEGV
;
1411 register ptrdiff_t end
= ZV
;
1412 register ptrdiff_t from_byte
= CHAR_TO_BYTE (from
);
1413 register enum syntaxcode code
;
1415 Lisp_Object func
, pos
;
1420 SETUP_SYNTAX_TABLE (from
, count
);
1431 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1432 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1433 code
= SYNTAX (ch0
);
1434 INC_BOTH (from
, from_byte
);
1435 if (words_include_escapes
1436 && (code
== Sescape
|| code
== Scharquote
))
1441 /* Now CH0 is a character which begins a word and FROM is the
1442 position of the next character. */
1443 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch0
);
1444 if (! NILP (Ffboundp (func
)))
1446 pos
= call2 (func
, make_number (from
- 1), make_number (end
));
1447 if (INTEGERP (pos
) && from
< XINT (pos
) && XINT (pos
) <= ZV
)
1450 from_byte
= CHAR_TO_BYTE (from
);
1457 if (from
== end
) break;
1458 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1459 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1460 code
= SYNTAX (ch1
);
1462 && (! words_include_escapes
1463 || (code
!= Sescape
&& code
!= Scharquote
)))
1464 || word_boundary_p (ch0
, ch1
))
1466 INC_BOTH (from
, from_byte
);
1481 DEC_BOTH (from
, from_byte
);
1482 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1483 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1484 code
= SYNTAX (ch1
);
1485 if (words_include_escapes
1486 && (code
== Sescape
|| code
== Scharquote
))
1491 /* Now CH1 is a character which ends a word and FROM is the
1493 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch1
);
1494 if (! NILP (Ffboundp (func
)))
1496 pos
= call2 (func
, make_number (from
), make_number (beg
));
1497 if (INTEGERP (pos
) && BEGV
<= XINT (pos
) && XINT (pos
) < from
)
1500 from_byte
= CHAR_TO_BYTE (from
);
1509 DEC_BOTH (from
, from_byte
);
1510 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1511 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1512 code
= SYNTAX (ch0
);
1514 && (! words_include_escapes
1515 || (code
!= Sescape
&& code
!= Scharquote
)))
1516 || word_boundary_p (ch0
, ch1
))
1518 INC_BOTH (from
, from_byte
);
1532 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 0, 1, "^p",
1533 doc
: /* Move point forward ARG words (backward if ARG is negative).
1534 If ARG is omitted or nil, move point forward one word.
1536 If an edge of the buffer or a field boundary is reached, point is left there
1537 and the function returns nil. Field boundaries are not noticed if
1538 `inhibit-field-text-motion' is non-nil. */)
1542 ptrdiff_t orig_val
, val
;
1545 XSETFASTINT (arg
, 1);
1549 val
= orig_val
= scan_words (PT
, XINT (arg
));
1551 val
= XINT (arg
) > 0 ? ZV
: BEGV
;
1553 /* Avoid jumping out of an input field. */
1554 tmp
= Fconstrain_to_field (make_number (val
), make_number (PT
),
1556 val
= XFASTINT (tmp
);
1559 return val
== orig_val
? Qt
: Qnil
;
1562 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1563 doc
: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1564 STRING is like the inside of a `[...]' in a regular expression
1565 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1566 (but not at the end of a range; quoting is never needed there).
1567 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1568 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1569 Char classes, e.g. `[:alpha:]', are supported.
1571 Returns the distance traveled, either zero or positive. */)
1572 (Lisp_Object string
, Lisp_Object lim
)
1574 return skip_chars (1, string
, lim
, 1);
1577 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1578 doc
: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1579 See `skip-chars-forward' for details.
1580 Returns the distance traveled, either zero or negative. */)
1581 (Lisp_Object string
, Lisp_Object lim
)
1583 return skip_chars (0, string
, lim
, 1);
1586 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1587 doc
: /* Move point forward across chars in specified syntax classes.
1588 SYNTAX is a string of syntax code characters.
1589 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1590 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1591 This function returns the distance traveled, either zero or positive. */)
1592 (Lisp_Object syntax
, Lisp_Object lim
)
1594 return skip_syntaxes (1, syntax
, lim
);
1597 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1598 doc
: /* Move point backward across chars in specified syntax classes.
1599 SYNTAX is a string of syntax code characters.
1600 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1601 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1602 This function returns either zero or a negative number, and the absolute value
1603 of this is the distance traveled. */)
1604 (Lisp_Object syntax
, Lisp_Object lim
)
1606 return skip_syntaxes (0, syntax
, lim
);
1610 skip_chars (bool forwardp
, Lisp_Object string
, Lisp_Object lim
,
1611 bool handle_iso_classes
)
1615 /* Store the ranges of non-ASCII characters. */
1616 int *char_ranges
IF_LINT (= NULL
);
1617 int n_char_ranges
= 0;
1619 ptrdiff_t i
, i_byte
;
1620 /* True if the current buffer is multibyte and the region contains
1623 /* True if STRING is multibyte and it contains non-ASCII chars. */
1624 bool string_multibyte
;
1625 ptrdiff_t size_byte
;
1626 const unsigned char *str
;
1628 Lisp_Object iso_classes
;
1631 CHECK_STRING (string
);
1635 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1637 CHECK_NUMBER_COERCE_MARKER (lim
);
1639 /* In any case, don't allow scan outside bounds of buffer. */
1640 if (XINT (lim
) > ZV
)
1641 XSETFASTINT (lim
, ZV
);
1642 if (XINT (lim
) < BEGV
)
1643 XSETFASTINT (lim
, BEGV
);
1645 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
1646 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
1647 string_multibyte
= SBYTES (string
) > SCHARS (string
);
1649 memset (fastmap
, 0, sizeof fastmap
);
1651 str
= SDATA (string
);
1652 size_byte
= SBYTES (string
);
1655 if (i_byte
< size_byte
1656 && SREF (string
, 0) == '^')
1658 negate
= 1; i_byte
++;
1661 /* Find the characters specified and set their elements of fastmap.
1662 Handle backslashes and ranges specially.
1664 If STRING contains non-ASCII characters, setup char_ranges for
1665 them and use fastmap only for their leading codes. */
1667 if (! string_multibyte
)
1669 bool string_has_eight_bit
= 0;
1671 /* At first setup fastmap. */
1672 while (i_byte
< size_byte
)
1676 if (handle_iso_classes
&& c
== '['
1677 && i_byte
< size_byte
1678 && str
[i_byte
] == ':')
1680 const unsigned char *class_beg
= str
+ i_byte
+ 1;
1681 const unsigned char *class_end
= class_beg
;
1682 const unsigned char *class_limit
= str
+ size_byte
- 2;
1683 /* Leave room for the null. */
1684 unsigned char class_name
[CHAR_CLASS_MAX_LENGTH
+ 1];
1687 if (class_limit
- class_beg
> CHAR_CLASS_MAX_LENGTH
)
1688 class_limit
= class_beg
+ CHAR_CLASS_MAX_LENGTH
;
1690 while (class_end
< class_limit
1691 && *class_end
>= 'a' && *class_end
<= 'z')
1694 if (class_end
== class_beg
1695 || *class_end
!= ':' || class_end
[1] != ']')
1696 goto not_a_class_name
;
1698 memcpy (class_name
, class_beg
, class_end
- class_beg
);
1699 class_name
[class_end
- class_beg
] = 0;
1701 cc
= re_wctype (class_name
);
1703 error ("Invalid ISO C character class");
1705 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1707 i_byte
= class_end
+ 2 - str
;
1714 if (i_byte
== size_byte
)
1719 /* Treat `-' as range character only if another character
1721 if (i_byte
+ 1 < size_byte
1722 && str
[i_byte
] == '-')
1726 /* Skip over the dash. */
1729 /* Get the end of the range. */
1732 && i_byte
< size_byte
)
1740 if (! ASCII_CHAR_P (c2
))
1741 string_has_eight_bit
= 1;
1747 if (! ASCII_CHAR_P (c
))
1748 string_has_eight_bit
= 1;
1752 /* If the current range is multibyte and STRING contains
1753 eight-bit chars, arrange fastmap and setup char_ranges for
1754 the corresponding multibyte chars. */
1755 if (multibyte
&& string_has_eight_bit
)
1758 char himap
[0200 + 1];
1759 memcpy (himap
, fastmap
+ 0200, 0200);
1761 memset (fastmap
+ 0200, 0, 0200);
1762 SAFE_NALLOCA (char_ranges
, 2, 128);
1765 while ((p1
= memchr (himap
+ i
, 1, 0200 - i
)))
1767 /* Deduce the next range C..C2 from the next clump of 1s
1768 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1769 order half of the old FASTMAP. */
1770 int c2
, leading_code
;
1772 c
= BYTE8_TO_CHAR (i
+ 0200);
1774 c2
= BYTE8_TO_CHAR (i
+ 0200 - 1);
1776 char_ranges
[n_char_ranges
++] = c
;
1777 char_ranges
[n_char_ranges
++] = c2
;
1778 leading_code
= CHAR_LEADING_CODE (c
);
1779 memset (fastmap
+ leading_code
, 1,
1780 CHAR_LEADING_CODE (c2
) - leading_code
+ 1);
1784 else /* STRING is multibyte */
1786 SAFE_NALLOCA (char_ranges
, 2, SCHARS (string
));
1788 while (i_byte
< size_byte
)
1790 int leading_code
= str
[i_byte
];
1791 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1794 if (handle_iso_classes
&& c
== '['
1795 && i_byte
< size_byte
1796 && STRING_CHAR (str
+ i_byte
) == ':')
1798 const unsigned char *class_beg
= str
+ i_byte
+ 1;
1799 const unsigned char *class_end
= class_beg
;
1800 const unsigned char *class_limit
= str
+ size_byte
- 2;
1801 /* Leave room for the null. */
1802 unsigned char class_name
[CHAR_CLASS_MAX_LENGTH
+ 1];
1805 if (class_limit
- class_beg
> CHAR_CLASS_MAX_LENGTH
)
1806 class_limit
= class_beg
+ CHAR_CLASS_MAX_LENGTH
;
1808 while (class_end
< class_limit
1809 && *class_end
>= 'a' && *class_end
<= 'z')
1812 if (class_end
== class_beg
1813 || *class_end
!= ':' || class_end
[1] != ']')
1814 goto not_a_class_name_multibyte
;
1816 memcpy (class_name
, class_beg
, class_end
- class_beg
);
1817 class_name
[class_end
- class_beg
] = 0;
1819 cc
= re_wctype (class_name
);
1821 error ("Invalid ISO C character class");
1823 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1825 i_byte
= class_end
+ 2 - str
;
1829 not_a_class_name_multibyte
:
1832 if (i_byte
== size_byte
)
1835 leading_code
= str
[i_byte
];
1836 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1839 /* Treat `-' as range character only if another character
1841 if (i_byte
+ 1 < size_byte
1842 && str
[i_byte
] == '-')
1844 int c2
, leading_code2
;
1846 /* Skip over the dash. */
1849 /* Get the end of the range. */
1850 leading_code2
= str
[i_byte
];
1851 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1855 && i_byte
< size_byte
)
1857 leading_code2
= str
[i_byte
];
1858 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1864 if (ASCII_CHAR_P (c
))
1866 while (c
<= c2
&& c
< 0x80)
1868 leading_code
= CHAR_LEADING_CODE (c
);
1870 if (! ASCII_CHAR_P (c
))
1872 int lim2
= leading_code2
+ 1;
1873 while (leading_code
< lim2
)
1874 fastmap
[leading_code
++] = 1;
1877 char_ranges
[n_char_ranges
++] = c
;
1878 char_ranges
[n_char_ranges
++] = c2
;
1884 if (ASCII_CHAR_P (c
))
1888 fastmap
[leading_code
] = 1;
1889 char_ranges
[n_char_ranges
++] = c
;
1890 char_ranges
[n_char_ranges
++] = c
;
1895 /* If the current range is unibyte and STRING contains non-ASCII
1896 chars, arrange fastmap for the corresponding unibyte
1899 if (! multibyte
&& n_char_ranges
> 0)
1901 memset (fastmap
+ 0200, 0, 0200);
1902 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1904 int c1
= char_ranges
[i
];
1905 int lim2
= char_ranges
[i
+ 1] + 1;
1907 for (; c1
< lim2
; c1
++)
1909 int b
= CHAR_TO_BYTE_SAFE (c1
);
1917 /* If ^ was the first character, complement the fastmap. */
1921 for (i
= 0; i
< sizeof fastmap
; i
++)
1925 for (i
= 0; i
< 0200; i
++)
1927 /* All non-ASCII chars possibly match. */
1928 for (; i
< sizeof fastmap
; i
++)
1934 ptrdiff_t start_point
= PT
;
1936 ptrdiff_t pos_byte
= PT_BYTE
;
1937 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
1941 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
1942 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
1946 endp
= CHAR_POS_ADDR (XINT (lim
));
1947 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
1951 /* This code may look up syntax tables using functions that rely on the
1952 gl_state object. To make sure this object is not out of date,
1953 let's initialize it manually.
1954 We ignore syntax-table text-properties for now, since that's
1955 what we've done in the past. */
1956 SETUP_BUFFER_SYNTAX_TABLE ();
1971 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
1972 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
1982 if (! ASCII_CHAR_P (c
))
1984 /* As we are looking at a multibyte character, we
1985 must look up the character in the table
1986 CHAR_RANGES. If there's no data in the table,
1987 that character is not what we want to skip. */
1989 /* The following code do the right thing even if
1990 n_char_ranges is zero (i.e. no data in
1992 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1993 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1995 if (!(negate
^ (i
< n_char_ranges
)))
1999 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
2012 if (!NILP (iso_classes
) && in_classes (*p
, iso_classes
))
2017 goto fwd_unibyte_ok
;
2023 p
++, pos
++, pos_byte
++;
2031 unsigned char *prev_p
;
2041 while (--p
>= stop
&& ! CHAR_HEAD_P (*p
));
2042 c
= STRING_CHAR (p
);
2044 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
2054 if (! ASCII_CHAR_P (c
))
2056 /* See the comment in the previous similar code. */
2057 for (i
= 0; i
< n_char_ranges
; i
+= 2)
2058 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
2060 if (!(negate
^ (i
< n_char_ranges
)))
2064 pos
--, pos_byte
-= prev_p
- p
;
2077 if (! NILP (iso_classes
) && in_classes (p
[-1], iso_classes
))
2082 goto back_unibyte_ok
;
2085 if (!fastmap
[p
[-1]])
2088 p
--, pos
--, pos_byte
--;
2092 SET_PT_BOTH (pos
, pos_byte
);
2096 return make_number (PT
- start_point
);
2102 skip_syntaxes (bool forwardp
, Lisp_Object string
, Lisp_Object lim
)
2105 unsigned char fastmap
[0400];
2107 ptrdiff_t i
, i_byte
;
2109 ptrdiff_t size_byte
;
2112 CHECK_STRING (string
);
2115 XSETINT (lim
, forwardp
? ZV
: BEGV
);
2117 CHECK_NUMBER_COERCE_MARKER (lim
);
2119 /* In any case, don't allow scan outside bounds of buffer. */
2120 if (XINT (lim
) > ZV
)
2121 XSETFASTINT (lim
, ZV
);
2122 if (XINT (lim
) < BEGV
)
2123 XSETFASTINT (lim
, BEGV
);
2125 if (forwardp
? (PT
>= XFASTINT (lim
)) : (PT
<= XFASTINT (lim
)))
2126 return make_number (0);
2128 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
2129 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
2131 memset (fastmap
, 0, sizeof fastmap
);
2133 if (SBYTES (string
) > SCHARS (string
))
2134 /* As this is very rare case (syntax spec is ASCII only), don't
2135 consider efficiency. */
2136 string
= string_make_unibyte (string
);
2138 str
= SDATA (string
);
2139 size_byte
= SBYTES (string
);
2142 if (i_byte
< size_byte
2143 && SREF (string
, 0) == '^')
2145 negate
= 1; i_byte
++;
2148 /* Find the syntaxes specified and set their elements of fastmap. */
2150 while (i_byte
< size_byte
)
2153 fastmap
[syntax_spec_code
[c
]] = 1;
2156 /* If ^ was the first character, complement the fastmap. */
2158 for (i
= 0; i
< sizeof fastmap
; i
++)
2162 ptrdiff_t start_point
= PT
;
2164 ptrdiff_t pos_byte
= PT_BYTE
;
2165 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
2169 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
2170 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
2174 endp
= CHAR_POS_ADDR (XINT (lim
));
2175 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
2179 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
2195 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
2196 if (! fastmap
[SYNTAX (c
)])
2198 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
2199 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
2213 if (! fastmap
[SYNTAX (*p
)])
2215 p
++, pos
++, pos_byte
++;
2216 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
2226 unsigned char *prev_p
;
2235 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2237 while (--p
>= stop
&& ! CHAR_HEAD_P (*p
));
2238 c
= STRING_CHAR (p
);
2239 if (! fastmap
[SYNTAX (c
)])
2241 pos
--, pos_byte
-= prev_p
- p
;
2255 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2256 if (! fastmap
[SYNTAX (p
[-1])])
2258 p
--, pos
--, pos_byte
--;
2263 SET_PT_BOTH (pos
, pos_byte
);
2266 return make_number (PT
- start_point
);
2270 /* Return true if character C belongs to one of the ISO classes
2271 in the list ISO_CLASSES. Each class is represented by an
2272 integer which is its type according to re_wctype. */
2275 in_classes (int c
, Lisp_Object iso_classes
)
2277 bool fits_class
= 0;
2279 while (CONSP (iso_classes
))
2282 elt
= XCAR (iso_classes
);
2283 iso_classes
= XCDR (iso_classes
);
2285 if (re_iswctype (c
, XFASTINT (elt
)))
2292 /* Jump over a comment, assuming we are at the beginning of one.
2293 FROM is the current position.
2294 FROM_BYTE is the bytepos corresponding to FROM.
2295 Do not move past STOP (a charpos).
2296 The comment over which we have to jump is of style STYLE
2297 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2298 NESTING should be positive to indicate the nesting at the beginning
2299 for nested comments and should be zero or negative else.
2300 ST_COMMENT_STYLE cannot be nested.
2301 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2302 (or 0 If the search cannot start in the middle of a two-character).
2304 If successful, return true and store the charpos of the comment's end
2305 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2306 Else, return false and store the charpos STOP into *CHARPOS_PTR, the
2307 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2308 (as defined for state.incomment) in *INCOMMENT_PTR.
2310 The comment end is the last character of the comment rather than the
2311 character just after the comment.
2313 Global syntax data is assumed to initially be valid for FROM and
2314 remains valid for forward search starting at the returned position. */
2317 forw_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
2318 EMACS_INT nesting
, int style
, int prev_syntax
,
2319 ptrdiff_t *charpos_ptr
, ptrdiff_t *bytepos_ptr
,
2320 EMACS_INT
*incomment_ptr
)
2323 register enum syntaxcode code
;
2324 register int syntax
, other_syntax
;
2326 if (nesting
<= 0) nesting
= -1;
2328 /* Enter the loop in the middle so that we find
2329 a 2-char comment ender if we start in the middle of it. */
2330 syntax
= prev_syntax
;
2331 if (syntax
!= 0) goto forw_incomment
;
2337 *incomment_ptr
= nesting
;
2338 *charpos_ptr
= from
;
2339 *bytepos_ptr
= from_byte
;
2342 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2343 syntax
= SYNTAX_WITH_FLAGS (c
);
2344 code
= syntax
& 0xff;
2345 if (code
== Sendcomment
2346 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
2347 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ?
2348 (nesting
> 0 && --nesting
== 0) : nesting
< 0))
2349 /* We have encountered a comment end of the same style
2350 as the comment sequence which began this comment
2353 if (code
== Scomment_fence
2354 && style
== ST_COMMENT_STYLE
)
2355 /* We have encountered a comment end of the same style
2356 as the comment sequence which began this comment
2361 && SYNTAX_FLAGS_COMMENT_NESTED (syntax
)
2362 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
)
2363 /* We have encountered a nested comment of the same style
2364 as the comment sequence which began this comment section. */
2366 INC_BOTH (from
, from_byte
);
2367 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2370 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2371 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2372 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2373 SYNTAX_FLAGS_COMEND_SECOND (other_syntax
))
2374 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, other_syntax
) == style
2375 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2376 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
))
2377 ? nesting
> 0 : nesting
< 0))
2380 /* We have encountered a comment end of the same style
2381 as the comment sequence which began this comment section. */
2385 INC_BOTH (from
, from_byte
);
2386 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2391 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
2392 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2393 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2394 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
) == style
2395 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2396 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2397 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
)))
2398 /* We have encountered a nested comment of the same style
2399 as the comment sequence which began this comment section. */
2401 INC_BOTH (from
, from_byte
);
2402 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2406 *charpos_ptr
= from
;
2407 *bytepos_ptr
= from_byte
;
2411 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
2413 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2414 Stop scanning if we find something other than a comment or whitespace.
2415 Set point to where scanning stops.
2416 If COUNT comments are found as expected, with nothing except whitespace
2417 between them, return t; otherwise return nil. */)
2420 ptrdiff_t from
, from_byte
, stop
;
2422 enum syntaxcode code
;
2423 int comstyle
= 0; /* style of comment encountered */
2424 bool comnested
= 0; /* whether the comment is nestable or not */
2427 ptrdiff_t out_charpos
, out_bytepos
;
2430 CHECK_NUMBER (count
);
2431 count1
= XINT (count
);
2432 stop
= count1
> 0 ? ZV
: BEGV
;
2438 from_byte
= PT_BYTE
;
2440 SETUP_SYNTAX_TABLE (from
, count1
);
2445 bool comstart_first
;
2446 int syntax
, other_syntax
;
2450 SET_PT_BOTH (from
, from_byte
);
2454 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2455 syntax
= SYNTAX_WITH_FLAGS (c
);
2457 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2458 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2459 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2460 INC_BOTH (from
, from_byte
);
2461 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2462 if (from
< stop
&& comstart_first
2463 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2464 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2465 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
)))
2467 /* We have encountered a comment start sequence and we
2468 are ignoring all text inside comments. We must record
2469 the comment style this sequence begins so that later,
2470 only a comment end of the same style actually ends
2471 the comment section. */
2473 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2474 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2475 INC_BOTH (from
, from_byte
);
2476 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2479 while (code
== Swhitespace
|| (code
== Sendcomment
&& c
== '\n'));
2481 if (code
== Scomment_fence
)
2482 comstyle
= ST_COMMENT_STYLE
;
2483 else if (code
!= Scomment
)
2486 DEC_BOTH (from
, from_byte
);
2487 SET_PT_BOTH (from
, from_byte
);
2490 /* We're at the start of a comment. */
2491 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
2492 &out_charpos
, &out_bytepos
, &dummy
);
2493 from
= out_charpos
; from_byte
= out_bytepos
;
2497 SET_PT_BOTH (from
, from_byte
);
2500 INC_BOTH (from
, from_byte
);
2501 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2502 /* We have skipped one comment. */
2515 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
2520 DEC_BOTH (from
, from_byte
);
2521 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2522 quoted
= char_quoted (from
, from_byte
);
2523 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2524 syntax
= SYNTAX_WITH_FLAGS (c
);
2527 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2528 if (code
== Sendcomment
)
2529 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2530 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2531 && prev_char_comend_first (from
, from_byte
)
2532 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
2535 /* We must record the comment style encountered so that
2536 later, we can match only the proper comment begin
2537 sequence of the same style. */
2538 DEC_BOTH (from
, from_byte
);
2540 /* Calling char_quoted, above, set up global syntax position
2541 at the new value of FROM. */
2542 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2543 other_syntax
= SYNTAX_WITH_FLAGS (c1
);
2544 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2545 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2548 if (code
== Scomment_fence
)
2550 /* Skip until first preceding unquoted comment_fence. */
2551 bool fence_found
= 0;
2552 ptrdiff_t ini
= from
, ini_byte
= from_byte
;
2556 DEC_BOTH (from
, from_byte
);
2557 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2558 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2559 if (SYNTAX (c
) == Scomment_fence
2560 && !char_quoted (from
, from_byte
))
2565 else if (from
== stop
)
2568 if (fence_found
== 0)
2570 from
= ini
; /* Set point to ini + 1. */
2571 from_byte
= ini_byte
;
2575 /* We have skipped one comment. */
2578 else if (code
== Sendcomment
)
2580 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2581 &out_charpos
, &out_bytepos
);
2585 /* This end-of-line is not an end-of-comment.
2586 Treat it like a whitespace.
2587 CC-mode (and maybe others) relies on this behavior. */
2591 /* Failure: we should go back to the end of this
2592 not-quite-endcomment. */
2593 if (SYNTAX (c
) != code
)
2594 /* It was a two-char Sendcomment. */
2595 INC_BOTH (from
, from_byte
);
2601 /* We have skipped one comment. */
2602 from
= out_charpos
, from_byte
= out_bytepos
;
2606 else if (code
!= Swhitespace
|| quoted
)
2610 INC_BOTH (from
, from_byte
);
2611 SET_PT_BOTH (from
, from_byte
);
2619 SET_PT_BOTH (from
, from_byte
);
2624 /* Return syntax code of character C if C is an ASCII character
2625 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2627 static enum syntaxcode
2628 syntax_multibyte (int c
, bool multibyte_symbol_p
)
2630 return ASCII_CHAR_P (c
) || !multibyte_symbol_p
? SYNTAX (c
) : Ssymbol
;
2634 scan_lists (EMACS_INT from
, EMACS_INT count
, EMACS_INT depth
, bool sexpflag
)
2637 ptrdiff_t stop
= count
> 0 ? ZV
: BEGV
;
2642 enum syntaxcode code
;
2643 EMACS_INT min_depth
= depth
; /* Err out if depth gets less than this. */
2644 int comstyle
= 0; /* Style of comment encountered. */
2645 bool comnested
= 0; /* Whether the comment is nestable or not. */
2647 EMACS_INT last_good
= from
;
2649 ptrdiff_t from_byte
;
2650 ptrdiff_t out_bytepos
, out_charpos
;
2652 bool multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
2654 if (depth
> 0) min_depth
= 0;
2656 if (from
> ZV
) from
= ZV
;
2657 if (from
< BEGV
) from
= BEGV
;
2659 from_byte
= CHAR_TO_BYTE (from
);
2664 SETUP_SYNTAX_TABLE (from
, count
);
2669 bool comstart_first
, prefix
;
2670 int syntax
, other_syntax
;
2671 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2672 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2673 syntax
= SYNTAX_WITH_FLAGS (c
);
2674 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2675 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2676 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2677 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2678 prefix
= SYNTAX_FLAGS_PREFIX (syntax
);
2679 if (depth
== min_depth
)
2681 INC_BOTH (from
, from_byte
);
2682 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2683 if (from
< stop
&& comstart_first
2684 && (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2685 other_syntax
= SYNTAX_WITH_FLAGS (c
),
2686 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2687 && parse_sexp_ignore_comments
)
2689 /* We have encountered a comment start sequence and we
2690 are ignoring all text inside comments. We must record
2691 the comment style this sequence begins so that later,
2692 only a comment end of the same style actually ends
2693 the comment section. */
2695 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2696 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2697 INC_BOTH (from
, from_byte
);
2698 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2710 INC_BOTH (from
, from_byte
);
2711 /* Treat following character as a word constituent. */
2714 if (depth
|| !sexpflag
) break;
2715 /* This word counts as a sexp; return at end of it. */
2718 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2720 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2721 switch (syntax_multibyte (c
, multibyte_symbol_p
))
2725 INC_BOTH (from
, from_byte
);
2736 INC_BOTH (from
, from_byte
);
2740 case Scomment_fence
:
2741 comstyle
= ST_COMMENT_STYLE
;
2744 if (!parse_sexp_ignore_comments
) break;
2745 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2746 found
= forw_comment (from
, from_byte
, stop
,
2747 comnested
, comstyle
, 0,
2748 &out_charpos
, &out_bytepos
, &dummy
);
2749 from
= out_charpos
, from_byte
= out_bytepos
;
2756 INC_BOTH (from
, from_byte
);
2757 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2763 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (from_byte
))
2765 INC_BOTH (from
, from_byte
);
2775 if (!++depth
) goto done
;
2780 if (!--depth
) goto done
;
2781 if (depth
< min_depth
)
2782 xsignal3 (Qscan_error
,
2783 build_string ("Containing expression ends prematurely"),
2784 make_number (last_good
), make_number (from
));
2789 temp_pos
= dec_bytepos (from_byte
);
2790 stringterm
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2793 enum syntaxcode c_code
;
2796 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2797 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2798 c_code
= syntax_multibyte (c
, multibyte_symbol_p
);
2800 ? c
== stringterm
&& c_code
== Sstring
2801 : c_code
== Sstring_fence
)
2804 if (c_code
== Scharquote
|| c_code
== Sescape
)
2805 INC_BOTH (from
, from_byte
);
2806 INC_BOTH (from
, from_byte
);
2808 INC_BOTH (from
, from_byte
);
2809 if (!depth
&& sexpflag
) goto done
;
2812 /* Ignore whitespace, punctuation, quote, endcomment. */
2817 /* Reached end of buffer. Error if within object, return nil if between */
2824 /* End of object reached */
2834 DEC_BOTH (from
, from_byte
);
2835 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2836 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2837 syntax
= SYNTAX_WITH_FLAGS (c
);
2838 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2839 if (depth
== min_depth
)
2842 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2843 if (code
== Sendcomment
)
2844 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2845 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2846 && prev_char_comend_first (from
, from_byte
)
2847 && parse_sexp_ignore_comments
)
2849 /* We must record the comment style encountered so that
2850 later, we can match only the proper comment begin
2851 sequence of the same style. */
2852 int c2
, other_syntax
;
2853 DEC_BOTH (from
, from_byte
);
2854 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2856 c2
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2857 other_syntax
= SYNTAX_WITH_FLAGS (c2
);
2858 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2859 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2862 /* Quoting turns anything except a comment-ender
2863 into a word character. Note that this cannot be true
2864 if we decremented FROM in the if-statement above. */
2865 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
2867 DEC_BOTH (from
, from_byte
);
2870 else if (SYNTAX_FLAGS_PREFIX (syntax
))
2879 if (depth
|| !sexpflag
) break;
2880 /* This word counts as a sexp; count object finished
2881 after passing it. */
2884 temp_pos
= from_byte
;
2885 if (! NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
2889 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2890 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2891 /* Don't allow comment-end to be quoted. */
2892 if (syntax_multibyte (c1
, multibyte_symbol_p
) == Sendcomment
)
2894 quoted
= char_quoted (from
- 1, temp_pos
);
2897 DEC_BOTH (from
, from_byte
);
2898 temp_pos
= dec_bytepos (temp_pos
);
2899 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2901 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2903 switch (syntax_multibyte (c1
, multibyte_symbol_p
))
2905 case Sword
: case Ssymbol
: case Squote
: break;
2906 default: goto done2
;
2908 DEC_BOTH (from
, from_byte
);
2917 temp_pos
= dec_bytepos (from_byte
);
2918 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2919 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (temp_pos
))
2920 DEC_BOTH (from
, from_byte
);
2930 if (!++depth
) goto done2
;
2935 if (!--depth
) goto done2
;
2936 if (depth
< min_depth
)
2937 xsignal3 (Qscan_error
,
2938 build_string ("Containing expression ends prematurely"),
2939 make_number (last_good
), make_number (from
));
2943 if (!parse_sexp_ignore_comments
)
2945 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2946 &out_charpos
, &out_bytepos
);
2947 /* FIXME: if !found, it really wasn't a comment-end.
2948 For single-char Sendcomment, we can't do much about it apart
2949 from skipping the char.
2950 For 2-char endcomments, we could try again, taking both
2951 chars as separate entities, but it's a lot of trouble
2952 for very little gain, so we don't bother either. -sm */
2954 from
= out_charpos
, from_byte
= out_bytepos
;
2957 case Scomment_fence
:
2963 DEC_BOTH (from
, from_byte
);
2964 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2965 if (!char_quoted (from
, from_byte
))
2967 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2968 if (syntax_multibyte (c
, multibyte_symbol_p
) == code
)
2972 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
2976 stringterm
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2981 DEC_BOTH (from
, from_byte
);
2982 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2983 if (!char_quoted (from
, from_byte
))
2985 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2987 && (syntax_multibyte (c
, multibyte_symbol_p
)
2992 if (!depth
&& sexpflag
) goto done2
;
2995 /* Ignore whitespace, punctuation, quote, endcomment. */
3000 /* Reached start of buffer. Error if within object, return nil if between */
3013 XSETFASTINT (val
, from
);
3017 xsignal3 (Qscan_error
,
3018 build_string ("Unbalanced parentheses"),
3019 make_number (last_good
), make_number (from
));
3022 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
3023 doc
: /* Scan from character number FROM by COUNT lists.
3024 Scan forward if COUNT is positive, backward if COUNT is negative.
3025 Return the character number of the position thus found.
3027 A \"list", in this context, refers to a balanced parenthetical
3028 grouping, as determined by the syntax table.
3030 If DEPTH is nonzero, treat that as the nesting depth of the starting
3031 point (i.e. the starting point is DEPTH parentheses deep). This
3032 function scans over parentheses until the depth goes to zero COUNT
3033 times. Hence, positive DEPTH moves out that number of levels of
3034 parentheses, while negative DEPTH moves to a deeper level.
3036 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3038 If we reach the beginning or end of the accessible part of the buffer
3039 before we have scanned over COUNT lists, return nil if the depth at
3040 that point is zero, and signal a error if the depth is nonzero. */)
3041 (Lisp_Object from
, Lisp_Object count
, Lisp_Object depth
)
3043 CHECK_NUMBER (from
);
3044 CHECK_NUMBER (count
);
3045 CHECK_NUMBER (depth
);
3047 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
3050 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
3051 doc
: /* Scan from character number FROM by COUNT balanced expressions.
3052 If COUNT is negative, scan backwards.
3053 Returns the character number of the position thus found.
3055 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3057 If the beginning or end of (the accessible part of) the buffer is reached
3058 in the middle of a parenthetical grouping, an error is signaled.
3059 If the beginning or end is reached between groupings
3060 but before count is used up, nil is returned. */)
3061 (Lisp_Object from
, Lisp_Object count
)
3063 CHECK_NUMBER (from
);
3064 CHECK_NUMBER (count
);
3066 return scan_lists (XINT (from
), XINT (count
), 0, 1);
3069 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
3071 doc
: /* Move point backward over any number of chars with prefix syntax.
3072 This includes chars with expression prefix syntax class (') and those with
3073 the prefix syntax flag (p). */)
3076 ptrdiff_t beg
= BEGV
;
3077 ptrdiff_t opoint
= PT
;
3078 ptrdiff_t opoint_byte
= PT_BYTE
;
3080 ptrdiff_t pos_byte
= PT_BYTE
;
3085 SET_PT_BOTH (opoint
, opoint_byte
);
3090 SETUP_SYNTAX_TABLE (pos
, -1);
3092 DEC_BOTH (pos
, pos_byte
);
3094 while (!char_quoted (pos
, pos_byte
)
3095 /* Previous statement updates syntax table. */
3096 && ((c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
), SYNTAX (c
) == Squote
)
3097 || syntax_prefix_flag_p (c
)))
3100 opoint_byte
= pos_byte
;
3103 DEC_BOTH (pos
, pos_byte
);
3106 SET_PT_BOTH (opoint
, opoint_byte
);
3111 /* Parse forward from FROM / FROM_BYTE to END,
3112 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
3113 and return a description of the state of the parse at END.
3114 If STOPBEFORE, stop at the start of an atom.
3115 If COMMENTSTOP is 1, stop at the start of a comment.
3116 If COMMENTSTOP is -1, stop at the start or end of a comment,
3117 after the beginning of a string, or after the end of a string. */
3120 scan_sexps_forward (struct lisp_parse_state
*stateptr
,
3121 ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t end
,
3122 EMACS_INT targetdepth
, bool stopbefore
,
3123 Lisp_Object oldstate
, int commentstop
)
3125 struct lisp_parse_state state
;
3126 enum syntaxcode code
;
3129 struct level
{ ptrdiff_t last
, prev
; };
3130 struct level levelstart
[100];
3131 struct level
*curlevel
= levelstart
;
3132 struct level
*endlevel
= levelstart
+ 100;
3133 EMACS_INT depth
; /* Paren depth of current scanning location.
3134 level - levelstart equals this except
3135 when the depth becomes negative. */
3136 EMACS_INT mindepth
; /* Lowest DEPTH value seen. */
3137 bool start_quoted
= 0; /* True means starting after a char quote. */
3139 ptrdiff_t prev_from
; /* Keep one character before FROM. */
3140 ptrdiff_t prev_from_byte
;
3141 int prev_from_syntax
;
3142 bool boundary_stop
= commentstop
== -1;
3145 ptrdiff_t out_bytepos
, out_charpos
;
3149 prev_from_byte
= from_byte
;
3151 DEC_BOTH (prev_from
, prev_from_byte
);
3153 /* Use this macro instead of `from++'. */
3155 do { prev_from = from; \
3156 prev_from_byte = from_byte; \
3157 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3158 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3159 INC_BOTH (from, from_byte); \
3161 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3167 if (NILP (oldstate
))
3170 state
.instring
= -1;
3171 state
.incomment
= 0;
3172 state
.comstyle
= 0; /* comment style a by default. */
3173 state
.comstr_start
= -1; /* no comment/string seen. */
3177 tem
= Fcar (oldstate
);
3183 oldstate
= Fcdr (oldstate
);
3184 oldstate
= Fcdr (oldstate
);
3185 oldstate
= Fcdr (oldstate
);
3186 tem
= Fcar (oldstate
);
3187 /* Check whether we are inside string_fence-style string: */
3188 state
.instring
= (!NILP (tem
)
3189 ? (CHARACTERP (tem
) ? XFASTINT (tem
) : ST_STRING_STYLE
)
3192 oldstate
= Fcdr (oldstate
);
3193 tem
= Fcar (oldstate
);
3194 state
.incomment
= (!NILP (tem
)
3195 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
3198 oldstate
= Fcdr (oldstate
);
3199 tem
= Fcar (oldstate
);
3200 start_quoted
= !NILP (tem
);
3202 /* if the eighth element of the list is nil, we are in comment
3203 style a. If it is non-nil, we are in comment style b */
3204 oldstate
= Fcdr (oldstate
);
3205 oldstate
= Fcdr (oldstate
);
3206 tem
= Fcar (oldstate
);
3207 state
.comstyle
= (NILP (tem
)
3209 : (RANGED_INTEGERP (0, tem
, ST_COMMENT_STYLE
)
3211 : ST_COMMENT_STYLE
));
3213 oldstate
= Fcdr (oldstate
);
3214 tem
= Fcar (oldstate
);
3215 state
.comstr_start
=
3216 RANGED_INTEGERP (PTRDIFF_MIN
, tem
, PTRDIFF_MAX
) ? XINT (tem
) : -1;
3217 oldstate
= Fcdr (oldstate
);
3218 tem
= Fcar (oldstate
);
3219 while (!NILP (tem
)) /* >= second enclosing sexps. */
3221 Lisp_Object temhd
= Fcar (tem
);
3222 if (RANGED_INTEGERP (PTRDIFF_MIN
, temhd
, PTRDIFF_MAX
))
3223 curlevel
->last
= XINT (temhd
);
3224 if (++curlevel
== endlevel
)
3225 curlevel
--; /* error ("Nesting too deep for parser"); */
3226 curlevel
->prev
= -1;
3227 curlevel
->last
= -1;
3234 curlevel
->prev
= -1;
3235 curlevel
->last
= -1;
3237 SETUP_SYNTAX_TABLE (prev_from
, 1);
3238 temp
= FETCH_CHAR (prev_from_byte
);
3239 prev_from_syntax
= SYNTAX_WITH_FLAGS (temp
);
3240 UPDATE_SYNTAX_TABLE_FORWARD (from
);
3242 /* Enter the loop at a place appropriate for initial state. */
3244 if (state
.incomment
)
3245 goto startincomment
;
3246 if (state
.instring
>= 0)
3248 nofence
= state
.instring
!= ST_STRING_STYLE
;
3250 goto startquotedinstring
;
3253 else if (start_quoted
)
3260 code
= prev_from_syntax
& 0xff;
3263 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
)
3264 && (c1
= FETCH_CHAR (from_byte
),
3265 syntax
= SYNTAX_WITH_FLAGS (c1
),
3266 SYNTAX_FLAGS_COMSTART_SECOND (syntax
)))
3267 /* Duplicate code to avoid a complex if-expression
3268 which causes trouble for the SGI compiler. */
3270 /* Record the comment style we have entered so that only
3271 the comment-end sequence of the same style actually
3272 terminates the comment section. */
3274 = SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_from_syntax
);
3275 comnested
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
)
3276 | SYNTAX_FLAGS_COMMENT_NESTED (syntax
));
3277 state
.incomment
= comnested
? 1 : -1;
3278 state
.comstr_start
= prev_from
;
3282 else if (code
== Scomment_fence
)
3284 /* Record the comment style we have entered so that only
3285 the comment-end sequence of the same style actually
3286 terminates the comment section. */
3287 state
.comstyle
= ST_COMMENT_STYLE
;
3288 state
.incomment
= -1;
3289 state
.comstr_start
= prev_from
;
3292 else if (code
== Scomment
)
3294 state
.comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax
, 0);
3295 state
.incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
3297 state
.comstr_start
= prev_from
;
3300 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
3306 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3307 curlevel
->last
= prev_from
;
3309 if (from
== end
) goto endquoted
;
3312 /* treat following character as a word constituent */
3315 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3316 curlevel
->last
= prev_from
;
3320 int symchar
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3321 switch (SYNTAX (symchar
))
3326 if (from
== end
) goto endquoted
;
3338 curlevel
->prev
= curlevel
->last
;
3341 case Scomment_fence
: /* Can't happen because it's handled above. */
3343 if (commentstop
|| boundary_stop
) goto done
;
3345 /* The (from == BEGV) test was to enter the loop in the middle so
3346 that we find a 2-char comment ender even if we start in the
3347 middle of it. We don't want to do that if we're just at the
3348 beginning of the comment (think of (*) ... (*)). */
3349 found
= forw_comment (from
, from_byte
, end
,
3350 state
.incomment
, state
.comstyle
,
3351 (from
== BEGV
|| from
< state
.comstr_start
+ 3)
3352 ? 0 : prev_from_syntax
,
3353 &out_charpos
, &out_bytepos
, &state
.incomment
);
3354 from
= out_charpos
; from_byte
= out_bytepos
;
3355 /* Beware! prev_from and friends are invalid now.
3356 Luckily, the `done' doesn't use them and the INC_FROM
3357 sets them to a sane value without looking at them. */
3358 if (!found
) goto done
;
3360 state
.incomment
= 0;
3361 state
.comstyle
= 0; /* reset the comment style */
3362 if (boundary_stop
) goto done
;
3366 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3368 /* curlevel++->last ran into compiler bug on Apollo */
3369 curlevel
->last
= prev_from
;
3370 if (++curlevel
== endlevel
)
3371 curlevel
--; /* error ("Nesting too deep for parser"); */
3372 curlevel
->prev
= -1;
3373 curlevel
->last
= -1;
3374 if (targetdepth
== depth
) goto done
;
3379 if (depth
< mindepth
)
3381 if (curlevel
!= levelstart
)
3383 curlevel
->prev
= curlevel
->last
;
3384 if (targetdepth
== depth
) goto done
;
3389 state
.comstr_start
= from
- 1;
3390 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3391 curlevel
->last
= prev_from
;
3392 state
.instring
= (code
== Sstring
3393 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte
))
3395 if (boundary_stop
) goto done
;
3398 nofence
= state
.instring
!= ST_STRING_STYLE
;
3403 enum syntaxcode c_code
;
3405 if (from
>= end
) goto done
;
3406 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3407 c_code
= SYNTAX (c
);
3409 /* Check C_CODE here so that if the char has
3410 a syntax-table property which says it is NOT
3411 a string character, it does not end the string. */
3412 if (nofence
&& c
== state
.instring
&& c_code
== Sstring
)
3418 if (!nofence
) goto string_end
;
3424 startquotedinstring
:
3425 if (from
>= end
) goto endquoted
;
3435 state
.instring
= -1;
3436 curlevel
->prev
= curlevel
->last
;
3438 if (boundary_stop
) goto done
;
3442 /* FIXME: We should do something with it. */
3445 /* Ignore whitespace, punctuation, quote, endcomment. */
3451 stop
: /* Here if stopping before start of sexp. */
3452 from
= prev_from
; /* We have just fetched the char that starts it; */
3453 from_byte
= prev_from_byte
;
3454 goto done
; /* but return the position before it. */
3459 state
.depth
= depth
;
3460 state
.mindepth
= mindepth
;
3461 state
.thislevelstart
= curlevel
->prev
;
3462 state
.prevlevelstart
3463 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
3464 state
.location
= from
;
3465 state
.location_byte
= from_byte
;
3466 state
.levelstarts
= Qnil
;
3467 while (curlevel
> levelstart
)
3468 state
.levelstarts
= Fcons (make_number ((--curlevel
)->last
),
3475 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
3476 doc
: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3477 Parsing stops at TO or when certain criteria are met;
3478 point is set to where parsing stops.
3479 If fifth arg OLDSTATE is omitted or nil,
3480 parsing assumes that FROM is the beginning of a function.
3481 Value is a list of elements describing final state of parsing:
3483 1. character address of start of innermost containing list; nil if none.
3484 2. character address of start of last complete sexp terminated.
3485 3. non-nil if inside a string.
3486 (it is the character that will terminate the string,
3487 or t if the string should be terminated by a generic string delimiter.)
3488 4. nil if outside a comment, t if inside a non-nestable comment,
3489 else an integer (the current comment nesting).
3490 5. t if following a quote character.
3491 6. the minimum paren-depth encountered during this scan.
3492 7. style of comment, if any.
3493 8. character address of start of comment or string; nil if not in one.
3494 9. Intermediate data for continuation of parsing (subject to change).
3495 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3496 in parentheses becomes equal to TARGETDEPTH.
3497 Fourth arg STOPBEFORE non-nil means stop when come to
3498 any character that starts a sexp.
3499 Fifth arg OLDSTATE is a list like what this function returns.
3500 It is used to initialize the state of the parse. Elements number 1, 2, 6
3502 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3503 If it is symbol `syntax-table', stop after the start of a comment or a
3504 string, or after end of a comment or a string. */)
3505 (Lisp_Object from
, Lisp_Object to
, Lisp_Object targetdepth
,
3506 Lisp_Object stopbefore
, Lisp_Object oldstate
, Lisp_Object commentstop
)
3508 struct lisp_parse_state state
;
3511 if (!NILP (targetdepth
))
3513 CHECK_NUMBER (targetdepth
);
3514 target
= XINT (targetdepth
);
3517 target
= TYPE_MINIMUM (EMACS_INT
); /* We won't reach this depth. */
3519 validate_region (&from
, &to
);
3520 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
3522 target
, !NILP (stopbefore
), oldstate
,
3524 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
3526 SET_PT_BOTH (state
.location
, state
.location_byte
);
3528 return Fcons (make_number (state
.depth
),
3529 Fcons (state
.prevlevelstart
< 0
3530 ? Qnil
: make_number (state
.prevlevelstart
),
3531 Fcons (state
.thislevelstart
< 0
3532 ? Qnil
: make_number (state
.thislevelstart
),
3533 Fcons (state
.instring
>= 0
3534 ? (state
.instring
== ST_STRING_STYLE
3535 ? Qt
: make_number (state
.instring
)) : Qnil
,
3536 Fcons (state
.incomment
< 0 ? Qt
:
3537 (state
.incomment
== 0 ? Qnil
:
3538 make_number (state
.incomment
)),
3539 Fcons (state
.quoted
? Qt
: Qnil
,
3540 Fcons (make_number (state
.mindepth
),
3541 Fcons ((state
.comstyle
3542 ? (state
.comstyle
== ST_COMMENT_STYLE
3544 : make_number (state
.comstyle
))
3546 Fcons (((state
.incomment
3547 || (state
.instring
>= 0))
3548 ? make_number (state
.comstr_start
)
3550 Fcons (state
.levelstarts
, Qnil
))))))))));
3554 init_syntax_once (void)
3559 /* This has to be done here, before we call Fmake_char_table. */
3560 DEFSYM (Qsyntax_table
, "syntax-table");
3562 /* Create objects which can be shared among syntax tables. */
3563 Vsyntax_code_object
= make_uninit_vector (Smax
);
3564 for (i
= 0; i
< Smax
; i
++)
3565 ASET (Vsyntax_code_object
, i
, Fcons (make_number (i
), Qnil
));
3567 /* Now we are ready to set up this property, so we can
3568 create syntax tables. */
3569 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
3571 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3573 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
3575 /* Control characters should not be whitespace. */
3576 temp
= AREF (Vsyntax_code_object
, Spunct
);
3577 for (i
= 0; i
<= ' ' - 1; i
++)
3578 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3579 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 0177, temp
);
3581 /* Except that a few really are whitespace. */
3582 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3583 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ' ', temp
);
3584 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\t', temp
);
3585 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\n', temp
);
3586 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 015, temp
);
3587 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 014, temp
);
3589 temp
= AREF (Vsyntax_code_object
, Sword
);
3590 for (i
= 'a'; i
<= 'z'; i
++)
3591 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3592 for (i
= 'A'; i
<= 'Z'; i
++)
3593 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3594 for (i
= '0'; i
<= '9'; i
++)
3595 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3597 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
3598 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
3600 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
3601 Fcons (make_number (Sopen
), make_number (')')));
3602 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
3603 Fcons (make_number (Sclose
), make_number ('(')));
3604 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
3605 Fcons (make_number (Sopen
), make_number (']')));
3606 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
3607 Fcons (make_number (Sclose
), make_number ('[')));
3608 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
3609 Fcons (make_number (Sopen
), make_number ('}')));
3610 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
3611 Fcons (make_number (Sclose
), make_number ('{')));
3612 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
3613 Fcons (make_number (Sstring
), Qnil
));
3614 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
3615 Fcons (make_number (Sescape
), Qnil
));
3617 temp
= AREF (Vsyntax_code_object
, Ssymbol
);
3618 for (i
= 0; i
< 10; i
++)
3620 c
= "_-+*/&|<>="[i
];
3621 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3624 temp
= AREF (Vsyntax_code_object
, Spunct
);
3625 for (i
= 0; i
< 12; i
++)
3627 c
= ".,;:?!#@~^'`"[i
];
3628 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3631 /* All multibyte characters have syntax `word' by default. */
3632 temp
= AREF (Vsyntax_code_object
, Sword
);
3633 char_table_set_range (Vstandard_syntax_table
, 0x80, MAX_CHAR
, temp
);
3637 syms_of_syntax (void)
3639 DEFSYM (Qsyntax_table_p
, "syntax-table-p");
3641 staticpro (&Vsyntax_code_object
);
3643 staticpro (&gl_state
.object
);
3644 staticpro (&gl_state
.global_code
);
3645 staticpro (&gl_state
.current_syntax_table
);
3646 staticpro (&gl_state
.old_prop
);
3648 /* Defined in regex.c. */
3649 staticpro (&re_match_object
);
3651 DEFSYM (Qscan_error
, "scan-error");
3652 Fput (Qscan_error
, Qerror_conditions
,
3653 listn (CONSTYPE_PURE
, 2, Qscan_error
, Qerror
));
3654 Fput (Qscan_error
, Qerror_message
,
3655 build_pure_c_string ("Scan error"));
3657 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments
,
3658 doc
: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3660 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties
,
3661 doc
: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3662 Otherwise, that text property is simply ignored.
3663 See the info node `(elisp)Syntax Properties' for a description of the
3664 `syntax-table' property. */);
3666 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done
,
3667 doc
: /* Position up to which syntax-table properties have been set. */);
3668 syntax_propertize__done
= -1;
3669 DEFSYM (Qinternal__syntax_propertize
, "internal--syntax-propertize");
3670 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3672 words_include_escapes
= 0;
3673 DEFVAR_BOOL ("words-include-escapes", words_include_escapes
,
3674 doc
: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3676 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol
,
3677 doc
: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3678 multibyte_syntax_as_symbol
= 0;
3680 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3681 open_paren_in_column_0_is_defun_start
,
3682 doc
: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3683 open_paren_in_column_0_is_defun_start
= 1;
3686 DEFVAR_LISP ("find-word-boundary-function-table",
3687 Vfind_word_boundary_function_table
,
3689 Char table of functions to search for the word boundary.
3690 Each function is called with two arguments; POS and LIMIT.
3691 POS and LIMIT are character positions in the current buffer.
3693 If POS is less than LIMIT, POS is at the first character of a word,
3694 and the return value of a function is a position after the last
3695 character of that word.
3697 If POS is not less than LIMIT, POS is at the last character of a word,
3698 and the return value of a function is a position at the first
3699 character of that word.
3701 In both cases, LIMIT bounds the search. */);
3702 Vfind_word_boundary_function_table
= Fmake_char_table (Qnil
, Qnil
);
3704 defsubr (&Ssyntax_table_p
);
3705 defsubr (&Ssyntax_table
);
3706 defsubr (&Sstandard_syntax_table
);
3707 defsubr (&Scopy_syntax_table
);
3708 defsubr (&Sset_syntax_table
);
3709 defsubr (&Schar_syntax
);
3710 defsubr (&Smatching_paren
);
3711 defsubr (&Sstring_to_syntax
);
3712 defsubr (&Smodify_syntax_entry
);
3713 defsubr (&Sinternal_describe_syntax_value
);
3715 defsubr (&Sforward_word
);
3717 defsubr (&Sskip_chars_forward
);
3718 defsubr (&Sskip_chars_backward
);
3719 defsubr (&Sskip_syntax_forward
);
3720 defsubr (&Sskip_syntax_backward
);
3722 defsubr (&Sforward_comment
);
3723 defsubr (&Sscan_lists
);
3724 defsubr (&Sscan_sexps
);
3725 defsubr (&Sbackward_prefix_chars
);
3726 defsubr (&Sparse_partial_sexp
);