1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2019 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
24 #include "character.h"
28 #include "intervals.h"
31 /* Make syntax table lookup grant data in gl_state. */
32 #define SYNTAX(c) syntax_property (c, 1)
33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
36 /* Eight single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence.
40 4. This character is the second of a two-character comment-end sequence.
41 5. This character is a prefix, for backward-prefix-chars.
42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c".
45 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start.
48 Bits 6 and 8 discriminate among different comment styles.
49 Languages such as C++ allow two orthogonal syntax start/end pairs
50 and bit 6 determines whether a comment-end or Scommentend
51 ends style a or b. Comment markers can start style a, b, c, or bc.
52 Style a is always the default.
53 For 2-char comment markers, the style b flag is looked up only on the second
54 char of the comment marker and on the first char of the comment ender.
55 For style c (like the nested flag), the flag can be placed on any of
58 /* These functions extract specific flags from an integer
59 that holds the syntax code and the flags. */
62 SYNTAX_FLAGS_COMSTART_FIRST (int flags
)
64 return (flags
>> 16) & 1;
67 SYNTAX_FLAGS_COMSTART_SECOND (int flags
)
69 return (flags
>> 17) & 1;
72 SYNTAX_FLAGS_COMEND_FIRST (int flags
)
74 return (flags
>> 18) & 1;
77 SYNTAX_FLAGS_COMEND_SECOND (int flags
)
79 return (flags
>> 19) & 1;
82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags
)
84 return (flags
& 0x50000) != 0;
87 SYNTAX_FLAGS_PREFIX (int flags
)
89 return (flags
>> 20) & 1;
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags
)
94 return (flags
>> 21) & 1;
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags
)
99 return (flags
>> 23) & 1;
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags
)
104 return (flags
>> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags
)
109 return (flags
>> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags
, int other_flags
)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags
)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags
)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags
));
122 /* Extract a particular flag for a given character. */
125 SYNTAX_COMEND_FIRST (int c
)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c
));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
136 ST_COMMENT_STYLE
= 256 + 1,
137 ST_STRING_STYLE
= 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth
; /* Depth at end of parsing. */
145 int instring
; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment
; /* -1 if in unnestable comment else comment nesting */
147 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted
; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth
; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart
;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart
;
154 ptrdiff_t location
; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte
; /* Corresponding byte position. */
156 ptrdiff_t comstr_start
; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
159 int prev_syntax
; /* Syntax of previous position scanned, when
160 that position (potentially) holds the first char
161 of a 2-char construct, i.e. comment delimiter
162 or Sescape, etc. Smax otherwise. */
165 /* These variables are a cache for finding the start of a defun.
166 find_start_pos is the place for which the defun start was found.
167 find_start_value is the defun start position found for it.
168 find_start_value_byte is the corresponding byte position.
169 find_start_buffer is the buffer it was found in.
170 find_start_begv is the BEGV value when it was found.
171 find_start_modiff is the value of MODIFF when it was found. */
173 static ptrdiff_t find_start_pos
;
174 static ptrdiff_t find_start_value
;
175 static ptrdiff_t find_start_value_byte
;
176 static struct buffer
*find_start_buffer
;
177 static ptrdiff_t find_start_begv
;
178 static EMACS_INT find_start_modiff
;
181 static Lisp_Object
skip_chars (bool, Lisp_Object
, Lisp_Object
, bool);
182 static Lisp_Object
skip_syntaxes (bool, Lisp_Object
, Lisp_Object
);
183 static Lisp_Object
scan_lists (EMACS_INT
, EMACS_INT
, EMACS_INT
, bool);
184 static void scan_sexps_forward (struct lisp_parse_state
*,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT
,
187 static void internalize_parse_state (Lisp_Object
, struct lisp_parse_state
*);
188 static bool in_classes (int, Lisp_Object
);
189 static void parse_sexp_propertize (ptrdiff_t charpos
);
191 /* This setter is used only in this file, so it can be private. */
193 bset_syntax_table (struct buffer
*b
, Lisp_Object val
)
195 b
->syntax_table_
= val
;
198 /* Whether the syntax of the character C has the prefix flag set. */
200 syntax_prefix_flag_p (int c
)
202 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c
));
205 struct gl_state_s gl_state
; /* Global state of syntax parser. */
207 enum { INTERVALS_AT_ONCE
= 10 }; /* 1 + max-number of intervals
208 to scan to property-change. */
210 /* Set the syntax entry VAL for char C in table TABLE. */
213 SET_RAW_SYNTAX_ENTRY (Lisp_Object table
, int c
, Lisp_Object val
)
215 CHAR_TABLE_SET (table
, c
, val
);
218 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
219 RANGE is a cons (FROM . TO) specifying the range of characters. */
222 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table
, Lisp_Object range
,
225 Fset_char_table_range (table
, range
, val
);
228 /* Extract the information from the entry for character C
229 in the current syntax table. */
234 Lisp_Object ent
= SYNTAX_ENTRY (c
);
235 return CONSP (ent
) ? XCDR (ent
) : Qnil
;
238 /* This should be called with FROM at the start of forward
239 search, or after the last position of the backward search. It
240 makes sure that the first char is picked up with correct table, so
241 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
243 Sign of COUNT gives the direction of the search.
247 SETUP_SYNTAX_TABLE (ptrdiff_t from
, ptrdiff_t count
)
249 SETUP_BUFFER_SYNTAX_TABLE ();
250 gl_state
.b_property
= BEGV
;
251 gl_state
.e_property
= ZV
+ 1;
252 gl_state
.object
= Qnil
;
254 if (parse_sexp_lookup_properties
)
257 update_syntax_table_forward (from
, true, Qnil
);
258 else if (from
> BEGV
)
260 update_syntax_table (from
- 1, count
, true, Qnil
);
261 parse_sexp_propertize (from
- 1);
266 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
267 If it is t (which is only used in fast_c_string_match_ignore_case),
268 ignore properties altogether.
270 This is meant for regex.c to use. For buffers, regex.c passes arguments
271 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
272 So if it is a buffer, we set the offset field to BEGV. */
275 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object
,
276 ptrdiff_t from
, ptrdiff_t count
)
278 SETUP_BUFFER_SYNTAX_TABLE ();
279 gl_state
.object
= object
;
280 if (BUFFERP (gl_state
.object
))
282 struct buffer
*buf
= XBUFFER (gl_state
.object
);
283 gl_state
.b_property
= 1;
284 gl_state
.e_property
= BUF_ZV (buf
) - BUF_BEGV (buf
) + 1;
285 gl_state
.offset
= BUF_BEGV (buf
) - 1;
287 else if (NILP (gl_state
.object
))
289 gl_state
.b_property
= 1;
290 gl_state
.e_property
= ZV
- BEGV
+ 1;
291 gl_state
.offset
= BEGV
- 1;
293 else if (EQ (gl_state
.object
, Qt
))
295 gl_state
.b_property
= 0;
296 gl_state
.e_property
= PTRDIFF_MAX
;
301 gl_state
.b_property
= 0;
302 gl_state
.e_property
= 1 + SCHARS (gl_state
.object
);
305 if (parse_sexp_lookup_properties
)
306 update_syntax_table (from
+ gl_state
.offset
- (count
<= 0),
307 count
, 1, gl_state
.object
);
310 /* Update gl_state to an appropriate interval which contains CHARPOS. The
311 sign of COUNT give the relative position of CHARPOS wrt the previously
312 valid interval. If INIT, only [be]_property fields of gl_state are
313 valid at start, the rest is filled basing on OBJECT.
315 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
316 direction than the intervals - or in an interval. We update the
317 current syntax-table basing on the property of this interval, and
318 update the interval to start further than CHARPOS - or be
319 NULL. We also update lim_property to be the next value of
320 charpos to call this subroutine again - or be before/after the
321 start/end of OBJECT. */
324 update_syntax_table (ptrdiff_t charpos
, EMACS_INT count
, bool init
,
327 Lisp_Object tmp_table
;
329 bool invalidate
= true;
334 gl_state
.old_prop
= Qnil
;
335 gl_state
.start
= gl_state
.b_property
;
336 gl_state
.stop
= gl_state
.e_property
;
337 i
= interval_of (charpos
, object
);
338 gl_state
.backward_i
= gl_state
.forward_i
= i
;
342 /* interval_of updates only ->position of the return value, so
343 update the parents manually to speed up update_interval. */
344 while (!NULL_PARENT (i
))
346 if (AM_RIGHT_CHILD (i
))
347 INTERVAL_PARENT (i
)->position
= i
->position
348 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
349 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
350 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
352 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
354 i
= INTERVAL_PARENT (i
);
356 i
= gl_state
.forward_i
;
357 gl_state
.b_property
= i
->position
- gl_state
.offset
;
358 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
361 i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
363 /* We are guaranteed to be called with CHARPOS either in i,
366 error ("Error in syntax_table logic for to-the-end intervals");
367 else if (charpos
< i
->position
) /* Move left. */
370 error ("Error in syntax_table logic for intervals <-");
371 /* Update the interval. */
372 i
= update_interval (i
, charpos
);
373 if (INTERVAL_LAST_POS (i
) != gl_state
.b_property
)
376 gl_state
.forward_i
= i
;
377 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
380 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
383 error ("Error in syntax_table logic for intervals ->");
384 /* Update the interval. */
385 i
= update_interval (i
, charpos
);
386 if (i
->position
!= gl_state
.e_property
)
389 gl_state
.backward_i
= i
;
390 gl_state
.b_property
= i
->position
- gl_state
.offset
;
395 tmp_table
= textget (i
->plist
, Qsyntax_table
);
398 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
400 if (invalidate
) /* Did not get to adjacent interval. */
401 { /* with the same table => */
402 /* invalidate the old range. */
405 gl_state
.backward_i
= i
;
406 gl_state
.b_property
= i
->position
- gl_state
.offset
;
410 gl_state
.forward_i
= i
;
411 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
415 if (!EQ (tmp_table
, gl_state
.old_prop
))
417 gl_state
.current_syntax_table
= tmp_table
;
418 gl_state
.old_prop
= tmp_table
;
419 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
421 gl_state
.use_global
= 0;
423 else if (CONSP (tmp_table
))
425 gl_state
.use_global
= 1;
426 gl_state
.global_code
= tmp_table
;
430 gl_state
.use_global
= 0;
431 gl_state
.current_syntax_table
= BVAR (current_buffer
, syntax_table
);
437 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
441 gl_state
.e_property
= i
->position
- gl_state
.offset
;
442 gl_state
.forward_i
= i
;
447 = i
->position
+ LENGTH (i
) - gl_state
.offset
;
448 gl_state
.backward_i
= i
;
452 else if (cnt
== INTERVALS_AT_ONCE
)
457 = i
->position
+ LENGTH (i
) - gl_state
.offset
458 /* e_property at EOB is not set to ZV but to ZV+1, so that
459 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
460 having to check eob between the two. */
461 + (next_interval (i
) ? 0 : 1);
462 gl_state
.forward_i
= i
;
466 gl_state
.b_property
= i
->position
- gl_state
.offset
;
467 gl_state
.backward_i
= i
;
472 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
474 eassert (i
== NULL
); /* This property goes to the end. */
477 gl_state
.e_property
= gl_state
.stop
;
478 gl_state
.forward_i
= i
;
481 gl_state
.b_property
= gl_state
.start
;
485 parse_sexp_propertize (ptrdiff_t charpos
)
488 if (syntax_propertize__done
<= charpos
489 && syntax_propertize__done
< zv
)
491 EMACS_INT modiffs
= CHARS_MODIFF
;
492 safe_call1 (Qinternal__syntax_propertize
,
493 make_number (min (zv
, 1 + charpos
)));
494 if (modiffs
!= CHARS_MODIFF
)
495 error ("parse-sexp-propertize-function modified the buffer!");
496 if (syntax_propertize__done
<= charpos
497 && syntax_propertize__done
< zv
)
498 error ("parse-sexp-propertize-function did not move"
499 " syntax-propertize--done");
500 SETUP_SYNTAX_TABLE (charpos
, 1);
502 else if (gl_state
.e_property
> syntax_propertize__done
)
504 gl_state
.e_property
= syntax_propertize__done
;
505 gl_state
.e_property_truncated
= true;
507 else if (gl_state
.e_property_truncated
508 && gl_state
.e_property
< syntax_propertize__done
)
509 { /* When moving backward, e_property might be set without resetting
510 e_property_truncated, so the e_property_truncated flag may
511 occasionally be left raised spuriously. This should be rare. */
512 gl_state
.e_property_truncated
= false;
513 update_syntax_table_forward (charpos
, false, Qnil
);
518 update_syntax_table_forward (ptrdiff_t charpos
, bool init
,
521 if (gl_state
.e_property_truncated
)
523 eassert (NILP (object
));
524 eassert (charpos
>= gl_state
.e_property
);
525 parse_sexp_propertize (charpos
);
529 update_syntax_table (charpos
, 1, init
, object
);
530 if (NILP (object
) && gl_state
.e_property
> syntax_propertize__done
)
531 parse_sexp_propertize (charpos
);
535 /* Returns true if char at CHARPOS is quoted.
536 Global syntax-table data should be set up already to be good at CHARPOS
537 or after. On return global syntax data is good for lookup at CHARPOS. */
540 char_quoted (ptrdiff_t charpos
, ptrdiff_t bytepos
)
542 enum syntaxcode code
;
543 ptrdiff_t beg
= BEGV
;
545 ptrdiff_t orig
= charpos
;
547 while (charpos
> beg
)
550 DEC_BOTH (charpos
, bytepos
);
552 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
553 c
= FETCH_CHAR_AS_MULTIBYTE (bytepos
);
555 if (! (code
== Scharquote
|| code
== Sescape
))
561 UPDATE_SYNTAX_TABLE (orig
);
565 /* Return the bytepos one character before BYTEPOS.
566 We assume that BYTEPOS is not at the start of the buffer. */
569 dec_bytepos (ptrdiff_t bytepos
)
571 if (NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
578 /* Return a defun-start position before POS and not too far before.
579 It should be the last one before POS, or nearly the last.
581 When open_paren_in_column_0_is_defun_start is nonzero,
582 only the beginning of the buffer is treated as a defun-start.
584 We record the information about where the scan started
585 and what its result was, so that another call in the same area
586 can return the same value very quickly.
588 There is no promise at which position the global syntax data is
589 valid on return from the subroutine, so the caller should explicitly
590 update the global data. */
593 find_defun_start (ptrdiff_t pos
, ptrdiff_t pos_byte
)
595 ptrdiff_t opoint
= PT
, opoint_byte
= PT_BYTE
;
597 /* Use previous finding, if it's valid and applies to this inquiry. */
598 if (current_buffer
== find_start_buffer
599 /* Reuse the defun-start even if POS is a little farther on.
600 POS might be in the next defun, but that's ok.
601 Our value may not be the best possible, but will still be usable. */
602 && pos
<= find_start_pos
+ 1000
603 && pos
>= find_start_value
604 && BEGV
== find_start_begv
605 && MODIFF
== find_start_modiff
)
606 return find_start_value
;
608 if (!open_paren_in_column_0_is_defun_start
)
610 find_start_value
= BEGV
;
611 find_start_value_byte
= BEGV_BYTE
;
615 /* Back up to start of line. */
616 scan_newline (pos
, pos_byte
, BEGV
, BEGV_BYTE
, -1, 1);
618 /* We optimize syntax-table lookup for rare updates. Thus we accept
619 only those `^\s(' which are good in global _and_ text-property
621 SETUP_BUFFER_SYNTAX_TABLE ();
624 /* Open-paren at start of line means we may have found our
626 int c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
627 if (SYNTAX (c
) == Sopen
)
629 SETUP_SYNTAX_TABLE (PT
+ 1, -1); /* Try again... */
630 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
631 if (SYNTAX (c
) == Sopen
)
633 /* Now fallback to the default value. */
634 SETUP_BUFFER_SYNTAX_TABLE ();
636 /* Move to beg of previous line. */
637 scan_newline (PT
, PT_BYTE
, BEGV
, BEGV_BYTE
, -2, 1);
640 /* Record what we found, for the next try. */
641 find_start_value
= PT
;
642 find_start_value_byte
= PT_BYTE
;
643 TEMP_SET_PT_BOTH (opoint
, opoint_byte
);
646 find_start_buffer
= current_buffer
;
647 find_start_modiff
= MODIFF
;
648 find_start_begv
= BEGV
;
649 find_start_pos
= pos
;
651 return find_start_value
;
654 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
657 prev_char_comend_first (ptrdiff_t pos
, ptrdiff_t pos_byte
)
662 DEC_BOTH (pos
, pos_byte
);
663 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
664 c
= FETCH_CHAR (pos_byte
);
665 val
= SYNTAX_COMEND_FIRST (c
);
666 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
670 /* Check whether charpos FROM is at the end of a comment.
671 FROM_BYTE is the bytepos corresponding to FROM.
672 Do not move back before STOP.
674 Return true if we find a comment ending at FROM/FROM_BYTE.
676 If successful, store the charpos of the comment's beginning
677 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
679 Global syntax data remains valid for backward search starting at
680 the returned value (or at FROM, if the search was not successful). */
683 back_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
684 bool comnested
, int comstyle
, ptrdiff_t *charpos_ptr
,
685 ptrdiff_t *bytepos_ptr
)
687 /* Look back, counting the parity of string-quotes,
688 and recording the comment-starters seen.
689 When we reach a safe place, assume that's not in a string;
690 then step the main scan to the earliest comment-starter seen
691 an even number of string quotes away from the safe place.
693 OFROM[I] is position of the earliest comment-starter seen
694 which is I+2X quotes from the comment-end.
695 PARITY is current parity of quotes from the comment end. */
696 int string_style
= -1; /* Presumed outside of any string. */
697 bool string_lossage
= 0;
698 /* Not a real lossage: indicates that we have passed a matching comment
699 starter plus a non-matching comment-ender, meaning that any matching
700 comment-starter we might see later could be a false positive (hidden
701 inside another comment).
702 Test case: { a (* b } c (* d *) */
703 bool comment_lossage
= 0;
704 ptrdiff_t comment_end
= from
;
705 ptrdiff_t comment_end_byte
= from_byte
;
706 ptrdiff_t comstart_pos
= 0;
707 ptrdiff_t comstart_byte
;
708 /* Place where the containing defun starts,
709 or 0 if we didn't come across it yet. */
710 ptrdiff_t defun_start
= 0;
711 ptrdiff_t defun_start_byte
= 0;
712 enum syntaxcode code
;
713 ptrdiff_t nesting
= 1; /* Current comment nesting. */
716 unsigned short int quit_count
= 0;
718 /* FIXME: A }} comment-ender style leads to incorrect behavior
719 in the case of {{ c }}} because we ignore the last two chars which are
720 assumed to be comment-enders although they aren't. */
722 /* At beginning of range to scan, we're outside of strings;
723 that determines quote parity to the comment-end. */
726 rarely_quit (++quit_count
);
730 bool com2start
, com2end
, comstart
;
732 /* Move back and examine a character. */
733 DEC_BOTH (from
, from_byte
);
734 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
736 prev_syntax
= syntax
;
737 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
738 syntax
= SYNTAX_WITH_FLAGS (c
);
741 /* Check for 2-char comment markers. */
742 com2start
= (SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
743 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax
)
745 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax
, syntax
))
746 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
)
747 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
);
748 com2end
= (SYNTAX_FLAGS_COMEND_FIRST (syntax
)
749 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax
));
750 comstart
= (com2start
|| code
== Scomment
);
752 /* Nasty cases with overlapping 2-char comment markers:
753 - snmp-mode: -- c -- foo -- c --
761 /* If a 2-char comment sequence partly overlaps with another,
762 we don't try to be clever. E.g. |*| in C, or }% in modes that
763 have %..\n and %{..}%. */
764 if (from
> stop
&& (com2end
|| comstart
))
766 ptrdiff_t next
= from
, next_byte
= from_byte
;
767 int next_c
, next_syntax
;
768 DEC_BOTH (next
, next_byte
);
769 UPDATE_SYNTAX_TABLE_BACKWARD (next
);
770 next_c
= FETCH_CHAR_AS_MULTIBYTE (next_byte
);
771 next_syntax
= SYNTAX_WITH_FLAGS (next_c
);
772 if (((comstart
|| comnested
)
773 && SYNTAX_FLAGS_COMEND_SECOND (syntax
)
774 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax
))
775 || ((com2end
|| comnested
)
776 && SYNTAX_FLAGS_COMSTART_SECOND (syntax
)
778 == SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_syntax
))
779 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax
)))
781 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
784 if (com2start
&& comstart_pos
== 0)
785 /* We're looking at a comment starter. But it might be a comment
786 ender as well (see snmp-mode). The first time we see one, we
787 need to consider it as a comment starter,
788 and the subsequent times as a comment ender. */
791 /* Turn a 2-char comment sequences into the appropriate syntax. */
796 /* Ignore comment starters of a different style. */
797 else if (code
== Scomment
798 && (comstyle
!= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0)
799 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
) != comnested
))
802 /* Ignore escaped characters, except comment-enders which cannot
804 if ((Vcomment_end_can_be_escaped
|| code
!= Sendcomment
)
805 && char_quoted (from
, from_byte
))
812 c
= (code
== Sstring_fence
? ST_STRING_STYLE
: ST_COMMENT_STYLE
);
815 /* Track parity of quotes. */
816 if (string_style
== -1)
817 /* Entering a string. */
819 else if (string_style
== c
)
820 /* Leaving the string. */
823 /* If we have two kinds of string delimiters.
824 There's no way to grok this scanning backwards. */
829 /* We've already checked that it is the relevant comstyle. */
830 if (string_style
!= -1 || comment_lossage
|| string_lossage
)
831 /* There are odd string quotes involved, so let's be careful.
832 Test case in Pascal: " { " a { " } */
837 /* Record best comment-starter so far. */
839 comstart_byte
= from_byte
;
841 else if (--nesting
<= 0)
842 /* nested comments have to be balanced, so we don't need to
843 keep looking for earlier ones. We use here the same (slightly
844 incorrect) reasoning as below: since it is followed by uniform
845 paired string quotes, this comment-start has to be outside of
846 strings, else the comment-end itself would be inside a string. */
851 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == comstyle
852 && ((com2end
&& SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
))
853 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
)
854 /* This is the same style of comment ender as ours. */
859 /* Anything before that can't count because it would match
860 this comment-ender rather than ours. */
861 from
= stop
; /* Break out of the loop. */
863 else if (comstart_pos
!= 0 || c
!= '\n')
864 /* We're mixing comment styles here, so we'd better be careful.
865 The (comstart_pos != 0 || c != '\n') check is not quite correct
866 (we should just always set comment_lossage), but removing it
867 would imply that any multiline comment in C would go through
868 lossage, which seems overkill.
869 The failure should only happen in the rare cases such as
875 /* Assume a defun-start point is outside of strings. */
876 if (open_paren_in_column_0_is_defun_start
878 || (temp_byte
= dec_bytepos (from_byte
),
879 FETCH_CHAR (temp_byte
) == '\n')))
882 defun_start_byte
= from_byte
;
883 from
= stop
; /* Break out of the loop. */
892 if (comstart_pos
== 0)
895 from_byte
= comment_end_byte
;
896 UPDATE_SYNTAX_TABLE_FORWARD (comment_end
);
898 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
899 or `done'), then we've found the beginning of the non-nested comment. */
900 else if (1) /* !comnested */
903 from_byte
= comstart_byte
;
904 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
908 struct lisp_parse_state state
;
909 bool adjusted
= true;
910 /* We had two kinds of string delimiters mixed up
911 together. Decode this going forwards.
912 Scan fwd from a known safe place (beginning-of-defun)
913 to the one in question; this records where we
914 last passed a comment starter. */
915 /* If we did not already find the defun start, find it now. */
916 if (defun_start
== 0)
918 defun_start
= find_defun_start (comment_end
, comment_end_byte
);
919 defun_start_byte
= find_start_value_byte
;
920 adjusted
= (defun_start
> BEGV
);
924 internalize_parse_state (Qnil
, &state
);
925 scan_sexps_forward (&state
,
926 defun_start
, defun_start_byte
,
927 comment_end
, TYPE_MINIMUM (EMACS_INT
),
929 defun_start
= comment_end
;
934 = CONSP (state
.levelstarts
) ? XINT (XCAR (state
.levelstarts
))
935 : state
.thislevelstart
>= 0 ? state
.thislevelstart
937 find_start_value_byte
= CHAR_TO_BYTE (find_start_value
);
940 if (state
.incomment
== (comnested
? 1 : -1)
941 && state
.comstyle
== comstyle
)
942 from
= state
.comstr_start
;
947 /* If comment_end is inside some other comment, maybe ours
948 is nested, so we need to try again from within the
949 surrounding comment. Example: { a (* " *) */
951 /* FIXME: We should advance by one or two chars. */
952 defun_start
= state
.comstr_start
+ 2;
953 defun_start_byte
= CHAR_TO_BYTE (defun_start
);
956 rarely_quit (++quit_count
);
958 while (defun_start
< comment_end
);
960 from_byte
= CHAR_TO_BYTE (from
);
961 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
966 *bytepos_ptr
= from_byte
;
968 return from
!= comment_end
;
971 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
972 doc
: /* Return t if OBJECT is a syntax table.
973 Currently, any char-table counts as a syntax table. */)
976 if (CHAR_TABLE_P (object
)
977 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
983 check_syntax_table (Lisp_Object obj
)
985 CHECK_TYPE (CHAR_TABLE_P (obj
) && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
),
986 Qsyntax_table_p
, obj
);
989 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
990 doc
: /* Return the current syntax table.
991 This is the one specified by the current buffer. */)
994 return BVAR (current_buffer
, syntax_table
);
997 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
998 Sstandard_syntax_table
, 0, 0, 0,
999 doc
: /* Return the standard syntax table.
1000 This is the one used for new buffers. */)
1003 return Vstandard_syntax_table
;
1006 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
1007 doc
: /* Construct a new syntax table and return it.
1008 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1014 check_syntax_table (table
);
1016 table
= Vstandard_syntax_table
;
1018 copy
= Fcopy_sequence (table
);
1020 /* Only the standard syntax table should have a default element.
1021 Other syntax tables should inherit from parents instead. */
1022 set_char_table_defalt (copy
, Qnil
);
1024 /* Copied syntax tables should all have parents.
1025 If we copied one with no parent, such as the standard syntax table,
1026 use the standard syntax table as the copy's parent. */
1027 if (NILP (XCHAR_TABLE (copy
)->parent
))
1028 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
1032 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
1033 doc
: /* Select a new syntax table for the current buffer.
1034 One argument, a syntax table. */)
1038 check_syntax_table (table
);
1039 bset_syntax_table (current_buffer
, table
);
1040 /* Indicate that this buffer now has a specified syntax table. */
1041 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
1042 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
1046 /* Convert a letter which signifies a syntax code
1047 into the code it signifies.
1048 This is used by modify-syntax-entry, and other things. */
1050 unsigned char const syntax_spec_code
[0400] =
1051 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1052 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1053 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1054 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1055 Swhitespace
, Scomment_fence
, Sstring
, 0377, Smath
, 0377, 0377, Squote
,
1056 Sopen
, Sclose
, 0377, 0377, 0377, Swhitespace
, Spunct
, Scharquote
,
1057 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1058 0377, 0377, 0377, 0377, Scomment
, 0377, Sendcomment
, 0377,
1059 Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1060 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1061 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1062 0377, 0377, 0377, 0377, Sescape
, 0377, 0377, Ssymbol
,
1063 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1064 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1065 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1066 0377, 0377, 0377, 0377, Sstring_fence
, 0377, 0377, 0377
1069 /* Indexed by syntax code, give the letter that describes it. */
1071 char const syntax_code_spec
[16] =
1073 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1077 /* Indexed by syntax code, give the object (cons of syntax code and
1078 nil) to be stored in syntax table. Since these objects can be
1079 shared among syntax tables, we generate them in advance. By
1080 sharing objects, the function `describe-syntax' can give a more
1082 static Lisp_Object Vsyntax_code_object
;
1085 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
1086 doc
: /* Return the syntax code of CHARACTER, described by a character.
1087 For example, if CHARACTER is a word constituent, the
1088 character `w' (119) is returned.
1089 The characters that correspond to various syntax codes
1090 are listed in the documentation of `modify-syntax-entry'.
1092 If you're trying to determine the syntax of characters in the buffer,
1093 this is probably the wrong function to use, because it can't take
1094 `syntax-table' text properties into account. Consider using
1095 `syntax-after' instead. */)
1096 (Lisp_Object character
)
1099 CHECK_CHARACTER (character
);
1100 char_int
= XINT (character
);
1101 SETUP_BUFFER_SYNTAX_TABLE ();
1102 return make_number (syntax_code_spec
[SYNTAX (char_int
)]);
1105 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
1106 doc
: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1107 (Lisp_Object character
)
1110 enum syntaxcode code
;
1111 CHECK_CHARACTER (character
);
1112 char_int
= XINT (character
);
1113 SETUP_BUFFER_SYNTAX_TABLE ();
1114 code
= SYNTAX (char_int
);
1115 if (code
== Sopen
|| code
== Sclose
)
1116 return SYNTAX_MATCH (char_int
);
1120 DEFUN ("string-to-syntax", Fstring_to_syntax
, Sstring_to_syntax
, 1, 1, 0,
1121 doc
: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1122 STRING should be a string of the form allowed as argument of
1123 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1124 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1125 the value of a `syntax-table' text property. */)
1126 (Lisp_Object string
)
1128 const unsigned char *p
;
1132 CHECK_STRING (string
);
1135 val
= syntax_spec_code
[*p
++];
1137 error ("Invalid syntax description letter: %c", p
[-1]);
1139 if (val
== Sinherit
)
1145 int character
= STRING_CHAR_AND_LENGTH (p
, len
);
1146 XSETINT (match
, character
);
1147 if (XFASTINT (match
) == ' ')
1190 if (val
< ASIZE (Vsyntax_code_object
) && NILP (match
))
1191 return AREF (Vsyntax_code_object
, val
);
1193 /* Since we can't use a shared object, let's make a new one. */
1194 return Fcons (make_number (val
), match
);
1197 /* I really don't know why this is interactive
1198 help-form should at least be made useful whilst reading the second arg. */
1199 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
1200 "cSet syntax for character: \nsSet syntax for %s to: ",
1201 doc
: /* Set syntax for character CHAR according to string NEWENTRY.
1202 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1203 the current buffer's syntax table.
1204 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1205 in the range MIN to MAX are changed.
1206 The first character of NEWENTRY should be one of the following:
1207 Space or - whitespace syntax. w word constituent.
1208 _ symbol constituent. . punctuation.
1209 ( open-parenthesis. ) close-parenthesis.
1210 " string quote. \\ escape.
1211 $ paired delimiter. \\=' expression quote or prefix operator.
1212 < comment starter. > comment ender.
1213 / character-quote. @ inherit from parent table.
1214 | generic string fence. ! generic comment fence.
1216 Only single-character comment start and end sequences are represented thus.
1217 Two-character sequences are represented as described below.
1218 The second character of NEWENTRY is the matching parenthesis,
1219 used only if the first character is `(' or `)'.
1220 Any additional characters are flags.
1221 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1222 1 means CHAR is the start of a two-char comment start sequence.
1223 2 means CHAR is the second character of such a sequence.
1224 3 means CHAR is the start of a two-char comment end sequence.
1225 4 means CHAR is the second character of such a sequence.
1227 There can be several orthogonal comment sequences. This is to support
1228 language modes such as C++. By default, all comment sequences are of style
1229 a, but you can set the comment sequence style to b (on the second character
1230 of a comment-start, and the first character of a comment-end sequence) and/or
1231 c (on any of its chars) using this flag:
1232 b means CHAR is part of comment sequence b.
1233 c means CHAR is part of comment sequence c.
1234 n means CHAR is part of a nestable comment sequence.
1236 p means CHAR is a prefix character for `backward-prefix-chars';
1237 such characters are treated as whitespace when they occur
1238 between expressions.
1239 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1240 (Lisp_Object c
, Lisp_Object newentry
, Lisp_Object syntax_table
)
1244 CHECK_CHARACTER_CAR (c
);
1245 CHECK_CHARACTER_CDR (c
);
1248 CHECK_CHARACTER (c
);
1250 if (NILP (syntax_table
))
1251 syntax_table
= BVAR (current_buffer
, syntax_table
);
1253 check_syntax_table (syntax_table
);
1255 newentry
= Fstring_to_syntax (newentry
);
1257 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table
, c
, newentry
);
1259 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
1261 /* We clear the regexp cache, since character classes can now have
1262 different values from those in the compiled regexps.*/
1263 clear_regexp_cache ();
1268 /* Dump syntax table to buffer in human-readable format */
1270 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value
,
1271 Sinternal_describe_syntax_value
, 1, 1, 0,
1272 doc
: /* Insert a description of the internal syntax description SYNTAX at point. */)
1273 (Lisp_Object syntax
)
1275 int code
, syntax_code
;
1276 bool start1
, start2
, end1
, end2
, prefix
, comstyleb
, comstylec
, comnested
;
1278 Lisp_Object first
, match_lisp
, value
= syntax
;
1282 insert_string ("default");
1286 if (CHAR_TABLE_P (value
))
1288 insert_string ("deeper char-table ...");
1294 insert_string ("invalid");
1298 first
= XCAR (value
);
1299 match_lisp
= XCDR (value
);
1301 if (!INTEGERP (first
) || !(NILP (match_lisp
) || CHARACTERP (match_lisp
)))
1303 insert_string ("invalid");
1307 syntax_code
= XINT (first
) & INT_MAX
;
1308 code
= syntax_code
& 0377;
1309 start1
= SYNTAX_FLAGS_COMSTART_FIRST (syntax_code
);
1310 start2
= SYNTAX_FLAGS_COMSTART_SECOND (syntax_code
);
1311 end1
= SYNTAX_FLAGS_COMEND_FIRST (syntax_code
);
1312 end2
= SYNTAX_FLAGS_COMEND_SECOND (syntax_code
);
1313 prefix
= SYNTAX_FLAGS_PREFIX (syntax_code
);
1314 comstyleb
= SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code
);
1315 comstylec
= SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code
);
1316 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax_code
);
1320 insert_string ("invalid");
1324 str
[0] = syntax_code_spec
[code
], str
[1] = 0;
1327 if (NILP (match_lisp
))
1330 insert_char (XINT (match_lisp
));
1351 insert_string ("\twhich means: ");
1356 insert_string ("whitespace"); break;
1358 insert_string ("punctuation"); break;
1360 insert_string ("word"); break;
1362 insert_string ("symbol"); break;
1364 insert_string ("open"); break;
1366 insert_string ("close"); break;
1368 insert_string ("prefix"); break;
1370 insert_string ("string"); break;
1372 insert_string ("math"); break;
1374 insert_string ("escape"); break;
1376 insert_string ("charquote"); break;
1378 insert_string ("comment"); break;
1380 insert_string ("endcomment"); break;
1382 insert_string ("inherit"); break;
1383 case Scomment_fence
:
1384 insert_string ("comment fence"); break;
1386 insert_string ("string fence"); break;
1388 insert_string ("invalid");
1392 if (!NILP (match_lisp
))
1394 insert_string (", matches ");
1395 insert_char (XINT (match_lisp
));
1399 insert_string (",\n\t is the first character of a comment-start sequence");
1401 insert_string (",\n\t is the second character of a comment-start sequence");
1404 insert_string (",\n\t is the first character of a comment-end sequence");
1406 insert_string (",\n\t is the second character of a comment-end sequence");
1408 insert_string (" (comment style b)");
1410 insert_string (" (comment style c)");
1412 insert_string (" (nestable)");
1416 AUTO_STRING (prefixdoc
,
1417 ",\n\t is a prefix character for `backward-prefix-chars'");
1418 insert1 (Fsubstitute_command_keys (prefixdoc
));
1424 /* Return the position across COUNT words from FROM.
1425 If that many words cannot be found before the end of the buffer, return 0.
1426 COUNT negative means scan backward and stop at word beginning. */
1429 scan_words (ptrdiff_t from
, EMACS_INT count
)
1431 ptrdiff_t beg
= BEGV
;
1433 ptrdiff_t from_byte
= CHAR_TO_BYTE (from
);
1434 enum syntaxcode code
;
1436 Lisp_Object func
, pos
;
1438 SETUP_SYNTAX_TABLE (from
, count
);
1446 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1447 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1448 code
= SYNTAX (ch0
);
1449 INC_BOTH (from
, from_byte
);
1450 if (words_include_escapes
1451 && (code
== Sescape
|| code
== Scharquote
))
1457 /* Now CH0 is a character which begins a word and FROM is the
1458 position of the next character. */
1459 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch0
);
1460 if (! NILP (Ffboundp (func
)))
1462 pos
= call2 (func
, make_number (from
- 1), make_number (end
));
1463 if (INTEGERP (pos
) && from
< XINT (pos
) && XINT (pos
) <= ZV
)
1466 from_byte
= CHAR_TO_BYTE (from
);
1473 if (from
== end
) break;
1474 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1475 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1476 code
= SYNTAX (ch1
);
1478 && (! words_include_escapes
1479 || (code
!= Sescape
&& code
!= Scharquote
)))
1480 || word_boundary_p (ch0
, ch1
))
1482 INC_BOTH (from
, from_byte
);
1495 DEC_BOTH (from
, from_byte
);
1496 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1497 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1498 code
= SYNTAX (ch1
);
1499 if (words_include_escapes
1500 && (code
== Sescape
|| code
== Scharquote
))
1506 /* Now CH1 is a character which ends a word and FROM is the
1508 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch1
);
1509 if (! NILP (Ffboundp (func
)))
1511 pos
= call2 (func
, make_number (from
), make_number (beg
));
1512 if (INTEGERP (pos
) && BEGV
<= XINT (pos
) && XINT (pos
) < from
)
1515 from_byte
= CHAR_TO_BYTE (from
);
1524 DEC_BOTH (from
, from_byte
);
1525 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1526 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1527 code
= SYNTAX (ch0
);
1529 && (! words_include_escapes
1530 || (code
!= Sescape
&& code
!= Scharquote
)))
1531 || word_boundary_p (ch0
, ch1
))
1533 INC_BOTH (from
, from_byte
);
1546 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 0, 1, "^p",
1547 doc
: /* Move point forward ARG words (backward if ARG is negative).
1548 If ARG is omitted or nil, move point forward one word.
1550 If an edge of the buffer or a field boundary is reached, point is
1551 left there and the function returns nil. Field boundaries are not
1552 noticed if `inhibit-field-text-motion' is non-nil.
1554 The word boundaries are normally determined by the buffer's syntax
1555 table and character script (according to `char-script-table'), but
1556 `find-word-boundary-function-table', such as set up by `subword-mode',
1557 can change that. If a Lisp program needs to move by words determined
1558 strictly by the syntax table, it should use `forward-word-strictly'
1559 instead. See Info node `(elisp) Word Motion' for details. */)
1563 ptrdiff_t orig_val
, val
;
1566 XSETFASTINT (arg
, 1);
1570 val
= orig_val
= scan_words (PT
, XINT (arg
));
1572 val
= XINT (arg
) > 0 ? ZV
: BEGV
;
1574 /* Avoid jumping out of an input field. */
1575 tmp
= Fconstrain_to_field (make_number (val
), make_number (PT
),
1577 val
= XFASTINT (tmp
);
1580 return val
== orig_val
? Qt
: Qnil
;
1583 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1584 doc
: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1585 STRING is like the inside of a `[...]' in a regular expression
1586 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1587 (but not at the end of a range; quoting is never needed there).
1588 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1589 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1590 Char classes, e.g. `[:alpha:]', are supported.
1592 Returns the distance traveled, either zero or positive. */)
1593 (Lisp_Object string
, Lisp_Object lim
)
1595 return skip_chars (1, string
, lim
, 1);
1598 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1599 doc
: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1600 See `skip-chars-forward' for details.
1601 Returns the distance traveled, either zero or negative. */)
1602 (Lisp_Object string
, Lisp_Object lim
)
1604 return skip_chars (0, string
, lim
, 1);
1607 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1608 doc
: /* Move point forward across chars in specified syntax classes.
1609 SYNTAX is a string of syntax code characters.
1610 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1611 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1612 This function returns the distance traveled, either zero or positive. */)
1613 (Lisp_Object syntax
, Lisp_Object lim
)
1615 return skip_syntaxes (1, syntax
, lim
);
1618 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1619 doc
: /* Move point backward across chars in specified syntax classes.
1620 SYNTAX is a string of syntax code characters.
1621 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1622 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1623 This function returns either zero or a negative number, and the absolute value
1624 of this is the distance traveled. */)
1625 (Lisp_Object syntax
, Lisp_Object lim
)
1627 return skip_syntaxes (0, syntax
, lim
);
1631 skip_chars (bool forwardp
, Lisp_Object string
, Lisp_Object lim
,
1632 bool handle_iso_classes
)
1636 /* Store the ranges of non-ASCII characters. */
1637 int *char_ranges UNINIT
;
1638 int n_char_ranges
= 0;
1640 ptrdiff_t i
, i_byte
;
1641 /* True if the current buffer is multibyte and the region contains
1644 /* True if STRING is multibyte and it contains non-ASCII chars. */
1645 bool string_multibyte
;
1646 ptrdiff_t size_byte
;
1647 const unsigned char *str
;
1649 Lisp_Object iso_classes
;
1652 CHECK_STRING (string
);
1656 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1658 CHECK_NUMBER_COERCE_MARKER (lim
);
1660 /* In any case, don't allow scan outside bounds of buffer. */
1661 if (XINT (lim
) > ZV
)
1662 XSETFASTINT (lim
, ZV
);
1663 if (XINT (lim
) < BEGV
)
1664 XSETFASTINT (lim
, BEGV
);
1666 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
1667 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
1668 string_multibyte
= SBYTES (string
) > SCHARS (string
);
1670 memset (fastmap
, 0, sizeof fastmap
);
1672 str
= SDATA (string
);
1673 size_byte
= SBYTES (string
);
1676 if (i_byte
< size_byte
1677 && SREF (string
, 0) == '^')
1679 negate
= 1; i_byte
++;
1682 /* Find the characters specified and set their elements of fastmap.
1683 Handle backslashes and ranges specially.
1685 If STRING contains non-ASCII characters, setup char_ranges for
1686 them and use fastmap only for their leading codes. */
1688 if (! string_multibyte
)
1690 bool string_has_eight_bit
= 0;
1692 /* At first setup fastmap. */
1693 while (i_byte
< size_byte
)
1695 if (handle_iso_classes
)
1697 const unsigned char *ch
= str
+ i_byte
;
1698 re_wctype_t cc
= re_wctype_parse (&ch
, size_byte
- i_byte
);
1700 error ("Invalid ISO C character class");
1703 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1713 if (i_byte
== size_byte
)
1718 /* Treat `-' as range character only if another character
1720 if (i_byte
+ 1 < size_byte
1721 && str
[i_byte
] == '-')
1725 /* Skip over the dash. */
1728 /* Get the end of the range. */
1731 && i_byte
< size_byte
)
1739 if (! ASCII_CHAR_P (c2
))
1740 string_has_eight_bit
= 1;
1746 if (! ASCII_CHAR_P (c
))
1747 string_has_eight_bit
= 1;
1751 /* If the current range is multibyte and STRING contains
1752 eight-bit chars, arrange fastmap and setup char_ranges for
1753 the corresponding multibyte chars. */
1754 if (multibyte
&& string_has_eight_bit
)
1757 char himap
[0200 + 1];
1758 memcpy (himap
, fastmap
+ 0200, 0200);
1760 memset (fastmap
+ 0200, 0, 0200);
1761 SAFE_NALLOCA (char_ranges
, 2, 128);
1764 while ((p1
= memchr (himap
+ i
, 1, 0200 - i
)))
1766 /* Deduce the next range C..C2 from the next clump of 1s
1767 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1768 order half of the old FASTMAP. */
1769 int c2
, leading_code
;
1771 c
= BYTE8_TO_CHAR (i
+ 0200);
1773 c2
= BYTE8_TO_CHAR (i
+ 0200 - 1);
1775 char_ranges
[n_char_ranges
++] = c
;
1776 char_ranges
[n_char_ranges
++] = c2
;
1777 leading_code
= CHAR_LEADING_CODE (c
);
1778 memset (fastmap
+ leading_code
, 1,
1779 CHAR_LEADING_CODE (c2
) - leading_code
+ 1);
1783 else /* STRING is multibyte */
1785 SAFE_NALLOCA (char_ranges
, 2, SCHARS (string
));
1787 while (i_byte
< size_byte
)
1789 int leading_code
= str
[i_byte
];
1791 if (handle_iso_classes
)
1793 const unsigned char *ch
= str
+ i_byte
;
1794 re_wctype_t cc
= re_wctype_parse (&ch
, size_byte
- i_byte
);
1796 error ("Invalid ISO C character class");
1799 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1805 if (leading_code
== '\\')
1807 if (++i_byte
== size_byte
)
1810 leading_code
= str
[i_byte
];
1812 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1816 /* Treat `-' as range character only if another character
1818 if (i_byte
+ 1 < size_byte
1819 && str
[i_byte
] == '-')
1821 int c2
, leading_code2
;
1823 /* Skip over the dash. */
1826 /* Get the end of the range. */
1827 leading_code2
= str
[i_byte
];
1828 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1832 && i_byte
< size_byte
)
1834 leading_code2
= str
[i_byte
];
1835 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1841 if (ASCII_CHAR_P (c
))
1843 while (c
<= c2
&& c
< 0x80)
1845 leading_code
= CHAR_LEADING_CODE (c
);
1847 if (! ASCII_CHAR_P (c
))
1849 int lim2
= leading_code2
+ 1;
1850 while (leading_code
< lim2
)
1851 fastmap
[leading_code
++] = 1;
1854 char_ranges
[n_char_ranges
++] = c
;
1855 char_ranges
[n_char_ranges
++] = c2
;
1861 if (ASCII_CHAR_P (c
))
1865 fastmap
[leading_code
] = 1;
1866 char_ranges
[n_char_ranges
++] = c
;
1867 char_ranges
[n_char_ranges
++] = c
;
1872 /* If the current range is unibyte and STRING contains non-ASCII
1873 chars, arrange fastmap for the corresponding unibyte
1876 if (! multibyte
&& n_char_ranges
> 0)
1878 memset (fastmap
+ 0200, 0, 0200);
1879 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1881 int c1
= char_ranges
[i
];
1882 int lim2
= char_ranges
[i
+ 1] + 1;
1884 for (; c1
< lim2
; c1
++)
1886 int b
= CHAR_TO_BYTE_SAFE (c1
);
1894 /* If ^ was the first character, complement the fastmap. */
1898 for (i
= 0; i
< sizeof fastmap
; i
++)
1902 for (i
= 0; i
< 0200; i
++)
1904 /* All non-ASCII chars possibly match. */
1905 for (; i
< sizeof fastmap
; i
++)
1911 ptrdiff_t start_point
= PT
;
1913 ptrdiff_t pos_byte
= PT_BYTE
;
1914 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
1918 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
1919 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
1923 endp
= CHAR_POS_ADDR (XINT (lim
));
1924 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
1927 /* This code may look up syntax tables using functions that rely on the
1928 gl_state object. To make sure this object is not out of date,
1929 let's initialize it manually.
1930 We ignore syntax-table text-properties for now, since that's
1931 what we've done in the past. */
1932 SETUP_BUFFER_SYNTAX_TABLE ();
1947 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
1948 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
1958 if (! ASCII_CHAR_P (c
))
1960 /* As we are looking at a multibyte character, we
1961 must look up the character in the table
1962 CHAR_RANGES. If there's no data in the table,
1963 that character is not what we want to skip. */
1965 /* The following code do the right thing even if
1966 n_char_ranges is zero (i.e. no data in
1968 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1969 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1971 if (!(negate
^ (i
< n_char_ranges
)))
1975 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
1989 if (!NILP (iso_classes
) && in_classes (*p
, iso_classes
))
1994 goto fwd_unibyte_ok
;
2000 p
++, pos
++, pos_byte
++;
2016 unsigned char *prev_p
= p
;
2019 while (stop
<= p
&& ! CHAR_HEAD_P (*p
));
2021 c
= STRING_CHAR (p
);
2023 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
2033 if (! ASCII_CHAR_P (c
))
2035 /* See the comment in the previous similar code. */
2036 for (i
= 0; i
< n_char_ranges
; i
+= 2)
2037 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
2039 if (!(negate
^ (i
< n_char_ranges
)))
2043 pos
--, pos_byte
-= prev_p
- p
;
2057 if (! NILP (iso_classes
) && in_classes (p
[-1], iso_classes
))
2062 goto back_unibyte_ok
;
2065 if (!fastmap
[p
[-1]])
2068 p
--, pos
--, pos_byte
--;
2073 SET_PT_BOTH (pos
, pos_byte
);
2076 return make_number (PT
- start_point
);
2082 skip_syntaxes (bool forwardp
, Lisp_Object string
, Lisp_Object lim
)
2085 unsigned char fastmap
[0400];
2087 ptrdiff_t i
, i_byte
;
2089 ptrdiff_t size_byte
;
2092 CHECK_STRING (string
);
2095 XSETINT (lim
, forwardp
? ZV
: BEGV
);
2097 CHECK_NUMBER_COERCE_MARKER (lim
);
2099 /* In any case, don't allow scan outside bounds of buffer. */
2100 if (XINT (lim
) > ZV
)
2101 XSETFASTINT (lim
, ZV
);
2102 if (XINT (lim
) < BEGV
)
2103 XSETFASTINT (lim
, BEGV
);
2105 if (forwardp
? (PT
>= XFASTINT (lim
)) : (PT
<= XFASTINT (lim
)))
2106 return make_number (0);
2108 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
2109 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
2111 memset (fastmap
, 0, sizeof fastmap
);
2113 if (SBYTES (string
) > SCHARS (string
))
2114 /* As this is very rare case (syntax spec is ASCII only), don't
2115 consider efficiency. */
2116 string
= string_make_unibyte (string
);
2118 str
= SDATA (string
);
2119 size_byte
= SBYTES (string
);
2122 if (i_byte
< size_byte
2123 && SREF (string
, 0) == '^')
2125 negate
= 1; i_byte
++;
2128 /* Find the syntaxes specified and set their elements of fastmap. */
2130 while (i_byte
< size_byte
)
2133 fastmap
[syntax_spec_code
[c
]] = 1;
2136 /* If ^ was the first character, complement the fastmap. */
2138 for (i
= 0; i
< sizeof fastmap
; i
++)
2142 ptrdiff_t start_point
= PT
;
2144 ptrdiff_t pos_byte
= PT_BYTE
;
2145 unsigned char *p
, *endp
, *stop
;
2147 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
2153 p
= BYTE_POS_ADDR (pos_byte
);
2154 endp
= XINT (lim
) == GPT
? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
2155 stop
= pos
< GPT
&& GPT
< XINT (lim
) ? GPT_ADDR
: endp
;
2169 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
2172 if (! fastmap
[SYNTAX (c
)])
2174 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
2177 while (!parse_sexp_lookup_properties
2178 || pos
< gl_state
.e_property
);
2180 update_syntax_table_forward (pos
+ gl_state
.offset
,
2181 false, gl_state
.object
);
2186 p
= BYTE_POS_ADDR (pos_byte
);
2187 endp
= CHAR_POS_ADDR (XINT (lim
));
2188 stop
= pos
>= GPT
&& GPT
> XINT (lim
) ? GAP_END_ADDR
: endp
;
2201 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2203 unsigned char *prev_p
= p
;
2206 while (stop
<= p
&& ! CHAR_HEAD_P (*p
));
2208 c
= STRING_CHAR (p
);
2209 if (! fastmap
[SYNTAX (c
)])
2211 pos
--, pos_byte
-= prev_p
- p
;
2226 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2227 if (! fastmap
[SYNTAX (p
[-1])])
2229 p
--, pos
--, pos_byte
--;
2236 SET_PT_BOTH (pos
, pos_byte
);
2238 return make_number (PT
- start_point
);
2242 /* Return true if character C belongs to one of the ISO classes
2243 in the list ISO_CLASSES. Each class is represented by an
2244 integer which is its type according to re_wctype. */
2247 in_classes (int c
, Lisp_Object iso_classes
)
2249 bool fits_class
= 0;
2251 while (CONSP (iso_classes
))
2254 elt
= XCAR (iso_classes
);
2255 iso_classes
= XCDR (iso_classes
);
2257 if (re_iswctype (c
, XFASTINT (elt
)))
2264 /* Jump over a comment, assuming we are at the beginning of one.
2265 FROM is the current position.
2266 FROM_BYTE is the bytepos corresponding to FROM.
2267 Do not move past STOP (a charpos).
2268 The comment over which we have to jump is of style STYLE
2269 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2270 NESTING should be positive to indicate the nesting at the beginning
2271 for nested comments and should be zero or negative else.
2272 ST_COMMENT_STYLE cannot be nested.
2273 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2274 (or 0 If the search cannot start in the middle of a two-character).
2276 If successful, return true and store the charpos of the comment's
2277 end into *CHARPOS_PTR and the corresponding bytepos into
2278 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2279 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2280 current nesting (as defined for state->incomment) in
2281 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2282 comment be a possible first character of a two character construct,
2283 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2284 we store Smax into *last_syntax_ptr.
2286 The comment end is the last character of the comment rather than the
2287 character just after the comment.
2289 Global syntax data is assumed to initially be valid for FROM and
2290 remains valid for forward search starting at the returned position. */
2293 forw_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
2294 EMACS_INT nesting
, int style
, int prev_syntax
,
2295 ptrdiff_t *charpos_ptr
, ptrdiff_t *bytepos_ptr
,
2296 EMACS_INT
*incomment_ptr
, int *last_syntax_ptr
)
2298 unsigned short int quit_count
= 0;
2300 enum syntaxcode code
;
2301 int syntax
, other_syntax
;
2303 if (nesting
<= 0) nesting
= -1;
2305 /* Enter the loop in the middle so that we find
2306 a 2-char comment ender if we start in the middle of it. */
2307 syntax
= prev_syntax
;
2308 code
= syntax
& 0xff;
2309 if (syntax
!= 0 && from
< stop
) goto forw_incomment
;
2315 *incomment_ptr
= nesting
;
2316 *charpos_ptr
= from
;
2317 *bytepos_ptr
= from_byte
;
2319 (code
== Sescape
|| code
== Scharquote
2320 || SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2322 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)))
2326 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2327 syntax
= SYNTAX_WITH_FLAGS (c
);
2328 code
= syntax
& 0xff;
2329 if (code
== Sendcomment
2330 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
2331 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ?
2332 (nesting
> 0 && --nesting
== 0) : nesting
< 0)
2333 && !(Vcomment_end_can_be_escaped
&& char_quoted (from
, from_byte
)))
2334 /* We have encountered a comment end of the same style
2335 as the comment sequence which began this comment
2338 if (code
== Scomment_fence
2339 && style
== ST_COMMENT_STYLE
)
2340 /* We have encountered a comment end of the same style
2341 as the comment sequence which began this comment
2346 && SYNTAX_FLAGS_COMMENT_NESTED (syntax
)
2347 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
)
2348 /* We have encountered a nested comment of the same style
2349 as the comment sequence which began this comment section. */
2351 INC_BOTH (from
, from_byte
);
2352 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2355 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2356 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2357 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2358 SYNTAX_FLAGS_COMEND_SECOND (other_syntax
))
2359 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, other_syntax
) == style
2360 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2361 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
))
2362 ? nesting
> 0 : nesting
< 0))
2364 syntax
= Smax
; /* So that "|#" (lisp) can not return
2365 the syntax of "#" in *last_syntax_ptr. */
2367 /* We have encountered a comment end of the same style
2368 as the comment sequence which began this comment section. */
2372 INC_BOTH (from
, from_byte
);
2373 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2378 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
2379 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2380 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2381 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
) == style
2382 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2383 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2384 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
)))
2385 /* We have encountered a nested comment of the same style
2386 as the comment sequence which began this comment section. */
2388 syntax
= Smax
; /* So that "#|#" isn't also a comment ender. */
2389 INC_BOTH (from
, from_byte
);
2390 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2394 rarely_quit (++quit_count
);
2396 *charpos_ptr
= from
;
2397 *bytepos_ptr
= from_byte
;
2398 *last_syntax_ptr
= Smax
; /* Any syntactic power the last byte had is
2403 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
2405 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2406 Stop scanning if we find something other than a comment or whitespace.
2407 Set point to where scanning stops.
2408 If COUNT comments are found as expected, with nothing except whitespace
2409 between them, return t; otherwise return nil. */)
2412 ptrdiff_t from
, from_byte
, stop
;
2414 enum syntaxcode code
;
2415 int comstyle
= 0; /* style of comment encountered */
2416 bool comnested
= 0; /* whether the comment is nestable or not */
2419 ptrdiff_t out_charpos
, out_bytepos
;
2422 unsigned short int quit_count
= 0;
2424 CHECK_NUMBER (count
);
2425 count1
= XINT (count
);
2426 stop
= count1
> 0 ? ZV
: BEGV
;
2429 from_byte
= PT_BYTE
;
2431 SETUP_SYNTAX_TABLE (from
, count1
);
2436 bool comstart_first
;
2437 int syntax
, other_syntax
;
2441 SET_PT_BOTH (from
, from_byte
);
2444 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2445 syntax
= SYNTAX_WITH_FLAGS (c
);
2447 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2448 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2449 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2450 INC_BOTH (from
, from_byte
);
2451 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2452 if (from
< stop
&& comstart_first
2453 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2454 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2455 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
)))
2457 /* We have encountered a comment start sequence and we
2458 are ignoring all text inside comments. We must record
2459 the comment style this sequence begins so that later,
2460 only a comment end of the same style actually ends
2461 the comment section. */
2463 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2464 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2465 INC_BOTH (from
, from_byte
);
2466 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2468 rarely_quit (++quit_count
);
2470 while (code
== Swhitespace
|| (code
== Sendcomment
&& c
== '\n'));
2472 if (code
== Scomment_fence
)
2473 comstyle
= ST_COMMENT_STYLE
;
2474 else if (code
!= Scomment
)
2476 DEC_BOTH (from
, from_byte
);
2477 SET_PT_BOTH (from
, from_byte
);
2480 /* We're at the start of a comment. */
2481 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
2482 &out_charpos
, &out_bytepos
, &dummy
, &dummy2
);
2483 from
= out_charpos
; from_byte
= out_bytepos
;
2486 SET_PT_BOTH (from
, from_byte
);
2489 INC_BOTH (from
, from_byte
);
2490 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2491 /* We have skipped one comment. */
2501 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
2505 DEC_BOTH (from
, from_byte
);
2506 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2507 bool quoted
= char_quoted (from
, from_byte
);
2508 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2509 int syntax
= SYNTAX_WITH_FLAGS (c
);
2512 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2513 if (code
== Sendcomment
)
2514 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2515 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2516 && prev_char_comend_first (from
, from_byte
)
2517 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
2520 /* We must record the comment style encountered so that
2521 later, we can match only the proper comment begin
2522 sequence of the same style. */
2523 DEC_BOTH (from
, from_byte
);
2525 /* Calling char_quoted, above, set up global syntax position
2526 at the new value of FROM. */
2527 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2528 other_syntax
= SYNTAX_WITH_FLAGS (c1
);
2529 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2530 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2533 if (code
== Scomment_fence
)
2535 /* Skip until first preceding unquoted comment_fence. */
2536 bool fence_found
= 0;
2537 ptrdiff_t ini
= from
, ini_byte
= from_byte
;
2541 DEC_BOTH (from
, from_byte
);
2542 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2543 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2544 if (SYNTAX (c
) == Scomment_fence
2545 && !char_quoted (from
, from_byte
))
2550 else if (from
== stop
)
2552 rarely_quit (++quit_count
);
2554 if (fence_found
== 0)
2556 from
= ini
; /* Set point to ini + 1. */
2557 from_byte
= ini_byte
;
2561 /* We have skipped one comment. */
2564 else if (code
== Sendcomment
)
2566 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2567 &out_charpos
, &out_bytepos
);
2571 /* This end-of-line is not an end-of-comment.
2572 Treat it like a whitespace.
2573 CC-mode (and maybe others) relies on this behavior. */
2577 /* Failure: we should go back to the end of this
2578 not-quite-endcomment. */
2579 if (SYNTAX (c
) != code
)
2580 /* It was a two-char Sendcomment. */
2581 INC_BOTH (from
, from_byte
);
2587 /* We have skipped one comment. */
2588 from
= out_charpos
, from_byte
= out_bytepos
;
2592 else if (code
!= Swhitespace
|| quoted
)
2595 INC_BOTH (from
, from_byte
);
2596 SET_PT_BOTH (from
, from_byte
);
2600 rarely_quit (++quit_count
);
2606 SET_PT_BOTH (from
, from_byte
);
2610 /* Return syntax code of character C if C is an ASCII character
2611 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2613 static enum syntaxcode
2614 syntax_multibyte (int c
, bool multibyte_symbol_p
)
2616 return ASCII_CHAR_P (c
) || !multibyte_symbol_p
? SYNTAX (c
) : Ssymbol
;
2620 scan_lists (EMACS_INT from
, EMACS_INT count
, EMACS_INT depth
, bool sexpflag
)
2623 ptrdiff_t stop
= count
> 0 ? ZV
: BEGV
;
2628 enum syntaxcode code
;
2629 EMACS_INT min_depth
= depth
; /* Err out if depth gets less than this. */
2630 int comstyle
= 0; /* Style of comment encountered. */
2631 bool comnested
= 0; /* Whether the comment is nestable or not. */
2633 EMACS_INT last_good
= from
;
2635 ptrdiff_t from_byte
;
2636 ptrdiff_t out_bytepos
, out_charpos
;
2639 bool multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
2640 unsigned short int quit_count
= 0;
2642 if (depth
> 0) min_depth
= 0;
2644 if (from
> ZV
) from
= ZV
;
2645 if (from
< BEGV
) from
= BEGV
;
2647 from_byte
= CHAR_TO_BYTE (from
);
2651 SETUP_SYNTAX_TABLE (from
, count
);
2656 rarely_quit (++quit_count
);
2657 bool comstart_first
, prefix
;
2658 int syntax
, other_syntax
;
2659 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2660 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2661 syntax
= SYNTAX_WITH_FLAGS (c
);
2662 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2663 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2664 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2665 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2666 prefix
= SYNTAX_FLAGS_PREFIX (syntax
);
2667 if (depth
== min_depth
)
2669 INC_BOTH (from
, from_byte
);
2670 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2671 if (from
< stop
&& comstart_first
2672 && (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2673 other_syntax
= SYNTAX_WITH_FLAGS (c
),
2674 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2675 && parse_sexp_ignore_comments
)
2677 /* We have encountered a comment start sequence and we
2678 are ignoring all text inside comments. We must record
2679 the comment style this sequence begins so that later,
2680 only a comment end of the same style actually ends
2681 the comment section. */
2683 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2684 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2685 INC_BOTH (from
, from_byte
);
2686 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2698 INC_BOTH (from
, from_byte
);
2699 /* Treat following character as a word constituent. */
2703 if (depth
|| !sexpflag
) break;
2704 /* This word counts as a sexp; return at end of it. */
2707 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2709 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2710 switch (syntax_multibyte (c
, multibyte_symbol_p
))
2714 INC_BOTH (from
, from_byte
);
2725 INC_BOTH (from
, from_byte
);
2726 rarely_quit (++quit_count
);
2730 case Scomment_fence
:
2731 comstyle
= ST_COMMENT_STYLE
;
2734 if (!parse_sexp_ignore_comments
) break;
2735 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2736 found
= forw_comment (from
, from_byte
, stop
,
2737 comnested
, comstyle
, 0,
2738 &out_charpos
, &out_bytepos
, &dummy
,
2740 from
= out_charpos
, from_byte
= out_bytepos
;
2747 INC_BOTH (from
, from_byte
);
2748 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2754 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (from_byte
))
2756 INC_BOTH (from
, from_byte
);
2766 if (!++depth
) goto done
;
2771 if (!--depth
) goto done
;
2772 if (depth
< min_depth
)
2773 xsignal3 (Qscan_error
,
2774 build_string ("Containing expression ends prematurely"),
2775 make_number (last_good
), make_number (from
));
2780 temp_pos
= dec_bytepos (from_byte
);
2781 stringterm
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2784 enum syntaxcode c_code
;
2787 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2788 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2789 c_code
= syntax_multibyte (c
, multibyte_symbol_p
);
2791 ? c
== stringterm
&& c_code
== Sstring
2792 : c_code
== Sstring_fence
)
2795 if (c_code
== Scharquote
|| c_code
== Sescape
)
2796 INC_BOTH (from
, from_byte
);
2797 INC_BOTH (from
, from_byte
);
2798 rarely_quit (++quit_count
);
2800 INC_BOTH (from
, from_byte
);
2801 if (!depth
&& sexpflag
) goto done
;
2804 /* Ignore whitespace, punctuation, quote, endcomment. */
2809 /* Reached end of buffer. Error if within object, return nil if between */
2815 /* End of object reached */
2824 rarely_quit (++quit_count
);
2825 DEC_BOTH (from
, from_byte
);
2826 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2827 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2828 int syntax
= SYNTAX_WITH_FLAGS (c
);
2829 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2830 if (depth
== min_depth
)
2833 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2834 if (code
== Sendcomment
)
2835 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2836 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2837 && prev_char_comend_first (from
, from_byte
)
2838 && parse_sexp_ignore_comments
)
2840 /* We must record the comment style encountered so that
2841 later, we can match only the proper comment begin
2842 sequence of the same style. */
2843 int c2
, other_syntax
;
2844 DEC_BOTH (from
, from_byte
);
2845 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2847 c2
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2848 other_syntax
= SYNTAX_WITH_FLAGS (c2
);
2849 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2850 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2853 /* Quoting turns anything except a comment-ender
2854 into a word character. Note that this cannot be true
2855 if we decremented FROM in the if-statement above. */
2856 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
2858 DEC_BOTH (from
, from_byte
);
2861 else if (SYNTAX_FLAGS_PREFIX (syntax
))
2870 if (depth
|| !sexpflag
) break;
2871 /* This word counts as a sexp; count object finished
2872 after passing it. */
2875 temp_pos
= from_byte
;
2876 if (! NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
2880 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2881 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2882 /* Don't allow comment-end to be quoted. */
2883 if (syntax_multibyte (c1
, multibyte_symbol_p
) == Sendcomment
)
2885 quoted
= char_quoted (from
- 1, temp_pos
);
2888 DEC_BOTH (from
, from_byte
);
2889 temp_pos
= dec_bytepos (temp_pos
);
2890 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2892 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2894 switch (syntax_multibyte (c1
, multibyte_symbol_p
))
2896 case Sword
: case Ssymbol
: case Squote
: break;
2897 default: goto done2
;
2899 DEC_BOTH (from
, from_byte
);
2900 rarely_quit (++quit_count
);
2909 temp_pos
= dec_bytepos (from_byte
);
2910 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2911 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (temp_pos
))
2912 DEC_BOTH (from
, from_byte
);
2922 if (!++depth
) goto done2
;
2927 if (!--depth
) goto done2
;
2928 if (depth
< min_depth
)
2929 xsignal3 (Qscan_error
,
2930 build_string ("Containing expression ends prematurely"),
2931 make_number (last_good
), make_number (from
));
2935 if (!parse_sexp_ignore_comments
)
2937 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2938 &out_charpos
, &out_bytepos
);
2939 /* FIXME: if !found, it really wasn't a comment-end.
2940 For single-char Sendcomment, we can't do much about it apart
2941 from skipping the char.
2942 For 2-char endcomments, we could try again, taking both
2943 chars as separate entities, but it's a lot of trouble
2944 for very little gain, so we don't bother either. -sm */
2946 from
= out_charpos
, from_byte
= out_bytepos
;
2949 case Scomment_fence
:
2955 DEC_BOTH (from
, from_byte
);
2956 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2957 if (!char_quoted (from
, from_byte
))
2959 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2960 if (syntax_multibyte (c
, multibyte_symbol_p
) == code
)
2963 rarely_quit (++quit_count
);
2965 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
2969 stringterm
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2974 DEC_BOTH (from
, from_byte
);
2975 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2976 if (!char_quoted (from
, from_byte
))
2978 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2980 && (syntax_multibyte (c
, multibyte_symbol_p
)
2984 rarely_quit (++quit_count
);
2986 if (!depth
&& sexpflag
) goto done2
;
2989 /* Ignore whitespace, punctuation, quote, endcomment. */
2994 /* Reached start of buffer. Error if within object, return nil if between */
3005 XSETFASTINT (val
, from
);
3009 xsignal3 (Qscan_error
,
3010 build_string ("Unbalanced parentheses"),
3011 make_number (last_good
), make_number (from
));
3014 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
3015 doc
: /* Scan from character number FROM by COUNT lists.
3016 Scan forward if COUNT is positive, backward if COUNT is negative.
3017 Return the character number of the position thus found.
3019 A \"list", in this context, refers to a balanced parenthetical
3020 grouping, as determined by the syntax table.
3022 If DEPTH is nonzero, treat that as the nesting depth of the starting
3023 point (i.e. the starting point is DEPTH parentheses deep). This
3024 function scans over parentheses until the depth goes to zero COUNT
3025 times. Hence, positive DEPTH moves out that number of levels of
3026 parentheses, while negative DEPTH moves to a deeper level.
3028 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3030 If we reach the beginning or end of the accessible part of the buffer
3031 before we have scanned over COUNT lists, return nil if the depth at
3032 that point is zero, and signal an error if the depth is nonzero. */)
3033 (Lisp_Object from
, Lisp_Object count
, Lisp_Object depth
)
3035 CHECK_NUMBER (from
);
3036 CHECK_NUMBER (count
);
3037 CHECK_NUMBER (depth
);
3039 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
3042 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
3043 doc
: /* Scan from character number FROM by COUNT balanced expressions.
3044 If COUNT is negative, scan backwards.
3045 Returns the character number of the position thus found.
3047 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3049 If the beginning or end of (the accessible part of) the buffer is reached
3050 in the middle of a parenthetical grouping, an error is signaled.
3051 If the beginning or end is reached between groupings
3052 but before count is used up, nil is returned. */)
3053 (Lisp_Object from
, Lisp_Object count
)
3055 CHECK_NUMBER (from
);
3056 CHECK_NUMBER (count
);
3058 return scan_lists (XINT (from
), XINT (count
), 0, 1);
3061 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
3063 doc
: /* Move point backward over any number of chars with prefix syntax.
3064 This includes chars with expression prefix syntax class (\\=') and those with
3065 the prefix syntax flag (p). */)
3068 ptrdiff_t beg
= BEGV
;
3069 ptrdiff_t opoint
= PT
;
3070 ptrdiff_t opoint_byte
= PT_BYTE
;
3072 ptrdiff_t pos_byte
= PT_BYTE
;
3077 SET_PT_BOTH (opoint
, opoint_byte
);
3082 SETUP_SYNTAX_TABLE (pos
, -1);
3084 DEC_BOTH (pos
, pos_byte
);
3086 while (!char_quoted (pos
, pos_byte
)
3087 /* Previous statement updates syntax table. */
3088 && ((c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
), SYNTAX (c
) == Squote
)
3089 || syntax_prefix_flag_p (c
)))
3092 opoint_byte
= pos_byte
;
3096 DEC_BOTH (pos
, pos_byte
);
3100 SET_PT_BOTH (opoint
, opoint_byte
);
3106 /* If the character at FROM_BYTE is the second part of a 2-character
3107 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3110 in_2char_comment_start (struct lisp_parse_state
*state
,
3111 int prev_from_syntax
,
3112 ptrdiff_t prev_from
,
3113 ptrdiff_t from_byte
)
3116 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
)
3117 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
3118 syntax
= SYNTAX_WITH_FLAGS (c1
),
3119 SYNTAX_FLAGS_COMSTART_SECOND (syntax
)))
3121 /* Record the comment style we have entered so that only
3122 the comment-end sequence of the same style actually
3123 terminates the comment section. */
3125 = SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_from_syntax
);
3126 bool comnested
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
)
3127 | SYNTAX_FLAGS_COMMENT_NESTED (syntax
));
3128 state
->incomment
= comnested
? 1 : -1;
3129 state
->comstr_start
= prev_from
;
3135 /* Parse forward from FROM / FROM_BYTE to END,
3136 assuming that FROM has state STATE,
3137 and return a description of the state of the parse at END.
3138 If STOPBEFORE, stop at the start of an atom.
3139 If COMMENTSTOP is 1, stop at the start of a comment.
3140 If COMMENTSTOP is -1, stop at the start or end of a comment,
3141 after the beginning of a string, or after the end of a string. */
3144 scan_sexps_forward (struct lisp_parse_state
*state
,
3145 ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t end
,
3146 EMACS_INT targetdepth
, bool stopbefore
,
3149 enum syntaxcode code
;
3150 struct level
{ ptrdiff_t last
, prev
; };
3151 struct level levelstart
[100];
3152 struct level
*curlevel
= levelstart
;
3153 struct level
*endlevel
= levelstart
+ 100;
3154 EMACS_INT depth
; /* Paren depth of current scanning location.
3155 level - levelstart equals this except
3156 when the depth becomes negative. */
3157 EMACS_INT mindepth
; /* Lowest DEPTH value seen. */
3158 bool start_quoted
= 0; /* True means starting after a char quote. */
3160 ptrdiff_t prev_from
; /* Keep one character before FROM. */
3161 ptrdiff_t prev_from_byte
;
3162 int prev_from_syntax
, prev_prev_from_syntax
;
3163 bool boundary_stop
= commentstop
== -1;
3166 ptrdiff_t out_bytepos
, out_charpos
;
3168 unsigned short int quit_count
= 0;
3171 prev_from_byte
= from_byte
;
3173 DEC_BOTH (prev_from
, prev_from_byte
);
3175 /* Use this macro instead of `from++'. */
3177 do { prev_from = from; \
3178 prev_from_byte = from_byte; \
3179 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3180 prev_prev_from_syntax = prev_from_syntax; \
3181 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3182 INC_BOTH (from, from_byte); \
3184 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3189 depth
= state
->depth
;
3190 start_quoted
= state
->quoted
;
3191 prev_prev_from_syntax
= Smax
;
3192 prev_from_syntax
= state
->prev_syntax
;
3194 tem
= state
->levelstarts
;
3195 while (!NILP (tem
)) /* >= second enclosing sexps. */
3197 Lisp_Object temhd
= Fcar (tem
);
3198 if (RANGED_INTEGERP (PTRDIFF_MIN
, temhd
, PTRDIFF_MAX
))
3199 curlevel
->last
= XINT (temhd
);
3200 if (++curlevel
== endlevel
)
3201 curlevel
--; /* error ("Nesting too deep for parser"); */
3202 curlevel
->prev
= -1;
3203 curlevel
->last
= -1;
3206 curlevel
->prev
= -1;
3207 curlevel
->last
= -1;
3212 SETUP_SYNTAX_TABLE (from
, 1);
3214 /* Enter the loop at a place appropriate for initial state. */
3216 if (state
->incomment
)
3217 goto startincomment
;
3218 if (state
->instring
>= 0)
3220 nofence
= state
->instring
!= ST_STRING_STYLE
;
3222 goto startquotedinstring
;
3225 else if (start_quoted
)
3227 else if ((from
< end
)
3228 && (in_2char_comment_start (state
, prev_from_syntax
,
3229 prev_from
, from_byte
)))
3232 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3238 rarely_quit (++quit_count
);
3242 && (in_2char_comment_start (state
, prev_from_syntax
,
3243 prev_from
, from_byte
)))
3246 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3250 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
3252 code
= prev_from_syntax
& 0xff;
3257 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3258 curlevel
->last
= prev_from
;
3260 if (from
== end
) goto endquoted
;
3263 /* treat following character as a word constituent */
3266 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3267 curlevel
->last
= prev_from
;
3271 if (in_2char_comment_start (state
, prev_from_syntax
,
3272 prev_from
, from_byte
))
3275 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3279 int symchar
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3280 switch (SYNTAX (symchar
))
3285 if (from
== end
) goto endquoted
;
3295 rarely_quit (++quit_count
);
3298 curlevel
->prev
= curlevel
->last
;
3301 case Scomment_fence
:
3302 /* Record the comment style we have entered so that only
3303 the comment-end sequence of the same style actually
3304 terminates the comment section. */
3305 state
->comstyle
= ST_COMMENT_STYLE
;
3306 state
->incomment
= -1;
3307 state
->comstr_start
= prev_from
;
3310 state
->comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax
, 0);
3311 state
->incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
3313 state
->comstr_start
= prev_from
;
3315 if (commentstop
|| boundary_stop
) goto done
;
3317 /* The (from == BEGV) test was to enter the loop in the middle so
3318 that we find a 2-char comment ender even if we start in the
3319 middle of it. We don't want to do that if we're just at the
3320 beginning of the comment (think of (*) ... (*)). */
3321 found
= forw_comment (from
, from_byte
, end
,
3322 state
->incomment
, state
->comstyle
,
3323 from
== BEGV
? 0 : prev_from_syntax
,
3324 &out_charpos
, &out_bytepos
, &state
->incomment
,
3326 from
= out_charpos
; from_byte
= out_bytepos
;
3327 /* Beware! prev_from and friends (except prev_from_syntax)
3328 are invalid now. Luckily, the `done' doesn't use them
3329 and the INC_FROM sets them to a sane value without
3331 if (!found
) goto done
;
3333 state
->incomment
= 0;
3334 state
->comstyle
= 0; /* reset the comment style */
3335 prev_from_syntax
= Smax
; /* For the comment closer */
3336 if (boundary_stop
) goto done
;
3340 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3342 /* curlevel++->last ran into compiler bug on Apollo */
3343 curlevel
->last
= prev_from
;
3344 if (++curlevel
== endlevel
)
3345 curlevel
--; /* error ("Nesting too deep for parser"); */
3346 curlevel
->prev
= -1;
3347 curlevel
->last
= -1;
3348 if (targetdepth
== depth
) goto done
;
3353 if (depth
< mindepth
)
3355 if (curlevel
!= levelstart
)
3357 curlevel
->prev
= curlevel
->last
;
3358 if (targetdepth
== depth
) goto done
;
3363 state
->comstr_start
= from
- 1;
3364 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3365 curlevel
->last
= prev_from
;
3366 state
->instring
= (code
== Sstring
3367 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte
))
3369 if (boundary_stop
) goto done
;
3372 nofence
= state
->instring
!= ST_STRING_STYLE
;
3377 enum syntaxcode c_code
;
3379 if (from
>= end
) goto done
;
3380 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3381 c_code
= SYNTAX (c
);
3383 /* Check C_CODE here so that if the char has
3384 a syntax-table property which says it is NOT
3385 a string character, it does not end the string. */
3386 if (nofence
&& c
== state
->instring
&& c_code
== Sstring
)
3392 if (!nofence
) goto string_end
;
3398 startquotedinstring
:
3399 if (from
>= end
) goto endquoted
;
3406 rarely_quit (++quit_count
);
3410 state
->instring
= -1;
3411 curlevel
->prev
= curlevel
->last
;
3413 if (boundary_stop
) goto done
;
3417 /* FIXME: We should do something with it. */
3420 /* Ignore whitespace, punctuation, quote, endcomment. */
3426 stop
: /* Here if stopping before start of sexp. */
3427 from
= prev_from
; /* We have just fetched the char that starts it; */
3428 from_byte
= prev_from_byte
;
3429 prev_from_syntax
= prev_prev_from_syntax
;
3430 goto done
; /* but return the position before it. */
3435 state
->depth
= depth
;
3436 state
->mindepth
= mindepth
;
3437 state
->thislevelstart
= curlevel
->prev
;
3438 state
->prevlevelstart
3439 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
3440 state
->location
= from
;
3441 state
->location_byte
= from_byte
;
3442 state
->levelstarts
= Qnil
;
3443 while (curlevel
> levelstart
)
3444 state
->levelstarts
= Fcons (make_number ((--curlevel
)->last
),
3445 state
->levelstarts
);
3446 state
->prev_syntax
= (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax
)
3447 || state
->quoted
) ? prev_from_syntax
: Smax
;
3450 /* Convert a (lisp) parse state to the internal form used in
3451 scan_sexps_forward. */
3453 internalize_parse_state (Lisp_Object external
, struct lisp_parse_state
*state
)
3457 if (NILP (external
))
3460 state
->instring
= -1;
3461 state
->incomment
= 0;
3463 state
->comstyle
= 0; /* comment style a by default. */
3464 state
->comstr_start
= -1; /* no comment/string seen. */
3465 state
->levelstarts
= Qnil
;
3466 state
->prev_syntax
= Smax
;
3470 tem
= Fcar (external
);
3472 state
->depth
= XINT (tem
);
3476 external
= Fcdr (external
);
3477 external
= Fcdr (external
);
3478 external
= Fcdr (external
);
3479 tem
= Fcar (external
);
3480 /* Check whether we are inside string_fence-style string: */
3481 state
->instring
= (!NILP (tem
)
3482 ? (CHARACTERP (tem
) ? XFASTINT (tem
) : ST_STRING_STYLE
)
3485 external
= Fcdr (external
);
3486 tem
= Fcar (external
);
3487 state
->incomment
= (!NILP (tem
)
3488 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
3491 external
= Fcdr (external
);
3492 tem
= Fcar (external
);
3493 state
->quoted
= !NILP (tem
);
3495 /* if the eighth element of the list is nil, we are in comment
3496 style a. If it is non-nil, we are in comment style b */
3497 external
= Fcdr (external
);
3498 external
= Fcdr (external
);
3499 tem
= Fcar (external
);
3500 state
->comstyle
= (NILP (tem
)
3502 : (RANGED_INTEGERP (0, tem
, ST_COMMENT_STYLE
)
3504 : ST_COMMENT_STYLE
));
3506 external
= Fcdr (external
);
3507 tem
= Fcar (external
);
3508 state
->comstr_start
=
3509 RANGED_INTEGERP (PTRDIFF_MIN
, tem
, PTRDIFF_MAX
) ? XINT (tem
) : -1;
3510 external
= Fcdr (external
);
3511 tem
= Fcar (external
);
3512 state
->levelstarts
= tem
;
3514 external
= Fcdr (external
);
3515 tem
= Fcar (external
);
3516 state
->prev_syntax
= NILP (tem
) ? Smax
: XINT (tem
);
3520 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
3521 doc
: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3522 Parsing stops at TO or when certain criteria are met;
3523 point is set to where parsing stops.
3524 If fifth arg OLDSTATE is omitted or nil,
3525 parsing assumes that FROM is the beginning of a function.
3527 Value is a list of elements describing final state of parsing:
3529 1. character address of start of innermost containing list; nil if none.
3530 2. character address of start of last complete sexp terminated.
3531 3. non-nil if inside a string.
3532 (it is the character that will terminate the string,
3533 or t if the string should be terminated by a generic string delimiter.)
3534 4. nil if outside a comment, t if inside a non-nestable comment,
3535 else an integer (the current comment nesting).
3536 5. t if following a quote character.
3537 6. the minimum paren-depth encountered during this scan.
3538 7. style of comment, if any.
3539 8. character address of start of comment or string; nil if not in one.
3540 9. List of positions of currently open parens, outermost first.
3541 10. When the last position scanned holds the first character of a
3542 (potential) two character construct, the syntax of that position,
3543 otherwise nil. That construct can be a two character comment
3544 delimiter or an Escaped or Char-quoted character.
3545 11..... Possible further internal information used by `parse-partial-sexp'.
3547 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3548 in parentheses becomes equal to TARGETDEPTH.
3549 Fourth arg STOPBEFORE non-nil means stop when we come to
3550 any character that starts a sexp.
3551 Fifth arg OLDSTATE is a list like what this function returns.
3552 It is used to initialize the state of the parse. Elements number 1, 2, 6
3554 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3555 If it is the symbol `syntax-table', stop after the start of a comment or a
3556 string, or after end of a comment or a string. */)
3557 (Lisp_Object from
, Lisp_Object to
, Lisp_Object targetdepth
,
3558 Lisp_Object stopbefore
, Lisp_Object oldstate
, Lisp_Object commentstop
)
3560 struct lisp_parse_state state
;
3563 if (!NILP (targetdepth
))
3565 CHECK_NUMBER (targetdepth
);
3566 target
= XINT (targetdepth
);
3569 target
= TYPE_MINIMUM (EMACS_INT
); /* We won't reach this depth. */
3571 validate_region (&from
, &to
);
3572 internalize_parse_state (oldstate
, &state
);
3573 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
3575 target
, !NILP (stopbefore
),
3577 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
3579 SET_PT_BOTH (state
.location
, state
.location_byte
);
3582 Fcons (make_number (state
.depth
),
3583 Fcons (state
.prevlevelstart
< 0
3584 ? Qnil
: make_number (state
.prevlevelstart
),
3585 Fcons (state
.thislevelstart
< 0
3586 ? Qnil
: make_number (state
.thislevelstart
),
3587 Fcons (state
.instring
>= 0
3588 ? (state
.instring
== ST_STRING_STYLE
3589 ? Qt
: make_number (state
.instring
)) : Qnil
,
3590 Fcons (state
.incomment
< 0 ? Qt
:
3591 (state
.incomment
== 0 ? Qnil
:
3592 make_number (state
.incomment
)),
3593 Fcons (state
.quoted
? Qt
: Qnil
,
3594 Fcons (make_number (state
.mindepth
),
3595 Fcons ((state
.comstyle
3596 ? (state
.comstyle
== ST_COMMENT_STYLE
3598 : make_number (state
.comstyle
))
3600 Fcons (((state
.incomment
3601 || (state
.instring
>= 0))
3602 ? make_number (state
.comstr_start
)
3604 Fcons (state
.levelstarts
,
3605 Fcons (state
.prev_syntax
== Smax
3607 : make_number (state
.prev_syntax
),
3612 init_syntax_once (void)
3617 /* This has to be done here, before we call Fmake_char_table. */
3618 DEFSYM (Qsyntax_table
, "syntax-table");
3620 /* Create objects which can be shared among syntax tables. */
3621 Vsyntax_code_object
= make_uninit_vector (Smax
);
3622 for (i
= 0; i
< Smax
; i
++)
3623 ASET (Vsyntax_code_object
, i
, Fcons (make_number (i
), Qnil
));
3625 /* Now we are ready to set up this property, so we can
3626 create syntax tables. */
3627 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
3629 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3631 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
3633 /* Control characters should not be whitespace. */
3634 temp
= AREF (Vsyntax_code_object
, Spunct
);
3635 for (i
= 0; i
<= ' ' - 1; i
++)
3636 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3637 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 0177, temp
);
3639 /* Except that a few really are whitespace. */
3640 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3641 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ' ', temp
);
3642 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\t', temp
);
3643 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\n', temp
);
3644 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 015, temp
);
3645 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 014, temp
);
3647 temp
= AREF (Vsyntax_code_object
, Sword
);
3648 for (i
= 'a'; i
<= 'z'; i
++)
3649 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3650 for (i
= 'A'; i
<= 'Z'; i
++)
3651 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3652 for (i
= '0'; i
<= '9'; i
++)
3653 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3655 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
3656 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
3658 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
3659 Fcons (make_number (Sopen
), make_number (')')));
3660 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
3661 Fcons (make_number (Sclose
), make_number ('(')));
3662 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
3663 Fcons (make_number (Sopen
), make_number (']')));
3664 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
3665 Fcons (make_number (Sclose
), make_number ('[')));
3666 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
3667 Fcons (make_number (Sopen
), make_number ('}')));
3668 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
3669 Fcons (make_number (Sclose
), make_number ('{')));
3670 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
3671 Fcons (make_number (Sstring
), Qnil
));
3672 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
3673 Fcons (make_number (Sescape
), Qnil
));
3675 temp
= AREF (Vsyntax_code_object
, Ssymbol
);
3676 for (i
= 0; i
< 10; i
++)
3678 c
= "_-+*/&|<>="[i
];
3679 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3682 temp
= AREF (Vsyntax_code_object
, Spunct
);
3683 for (i
= 0; i
< 12; i
++)
3685 c
= ".,;:?!#@~^'`"[i
];
3686 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3689 /* All multibyte characters have syntax `word' by default. */
3690 temp
= AREF (Vsyntax_code_object
, Sword
);
3691 char_table_set_range (Vstandard_syntax_table
, 0x80, MAX_CHAR
, temp
);
3695 syms_of_syntax (void)
3697 DEFSYM (Qsyntax_table_p
, "syntax-table-p");
3699 staticpro (&Vsyntax_code_object
);
3701 staticpro (&gl_state
.object
);
3702 staticpro (&gl_state
.global_code
);
3703 staticpro (&gl_state
.current_syntax_table
);
3704 staticpro (&gl_state
.old_prop
);
3706 /* Defined in regex.c. */
3707 staticpro (&re_match_object
);
3709 DEFSYM (Qscan_error
, "scan-error");
3710 Fput (Qscan_error
, Qerror_conditions
,
3711 listn (CONSTYPE_PURE
, 2, Qscan_error
, Qerror
));
3712 Fput (Qscan_error
, Qerror_message
,
3713 build_pure_c_string ("Scan error"));
3715 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments
,
3716 doc
: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3718 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties
,
3719 doc
: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3720 Otherwise, that text property is simply ignored.
3721 See the info node `(elisp)Syntax Properties' for a description of the
3722 `syntax-table' property. */);
3724 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done
,
3725 doc
: /* Position up to which syntax-table properties have been set. */);
3726 syntax_propertize__done
= -1;
3727 DEFSYM (Qinternal__syntax_propertize
, "internal--syntax-propertize");
3728 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3730 words_include_escapes
= 0;
3731 DEFVAR_BOOL ("words-include-escapes", words_include_escapes
,
3732 doc
: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3734 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol
,
3735 doc
: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3736 multibyte_syntax_as_symbol
= 0;
3738 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3739 open_paren_in_column_0_is_defun_start
,
3740 doc
: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3741 open_paren_in_column_0_is_defun_start
= 1;
3744 DEFVAR_LISP ("find-word-boundary-function-table",
3745 Vfind_word_boundary_function_table
,
3747 Char table of functions to search for the word boundary.
3748 Each function is called with two arguments; POS and LIMIT.
3749 POS and LIMIT are character positions in the current buffer.
3751 If POS is less than LIMIT, POS is at the first character of a word,
3752 and the return value of a function should be a position after the
3753 last character of that word.
3755 If POS is not less than LIMIT, POS is at the last character of a word,
3756 and the return value of a function should be a position at the first
3757 character of that word.
3759 In both cases, LIMIT bounds the search. */);
3760 Vfind_word_boundary_function_table
= Fmake_char_table (Qnil
, Qnil
);
3762 DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped
,
3763 doc
: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
3764 Vcomment_end_can_be_escaped
= 0;
3765 DEFSYM (Qcomment_end_can_be_escaped
, "comment-end-can-be-escaped");
3766 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped
);
3768 defsubr (&Ssyntax_table_p
);
3769 defsubr (&Ssyntax_table
);
3770 defsubr (&Sstandard_syntax_table
);
3771 defsubr (&Scopy_syntax_table
);
3772 defsubr (&Sset_syntax_table
);
3773 defsubr (&Schar_syntax
);
3774 defsubr (&Smatching_paren
);
3775 defsubr (&Sstring_to_syntax
);
3776 defsubr (&Smodify_syntax_entry
);
3777 defsubr (&Sinternal_describe_syntax_value
);
3779 defsubr (&Sforward_word
);
3781 defsubr (&Sskip_chars_forward
);
3782 defsubr (&Sskip_chars_backward
);
3783 defsubr (&Sskip_syntax_forward
);
3784 defsubr (&Sskip_syntax_backward
);
3786 defsubr (&Sforward_comment
);
3787 defsubr (&Sscan_lists
);
3788 defsubr (&Sscan_sexps
);
3789 defsubr (&Sbackward_prefix_chars
);
3790 defsubr (&Sparse_partial_sexp
);