1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2017 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
24 #include "character.h"
28 #include "intervals.h"
31 /* Make syntax table lookup grant data in gl_state. */
32 #define SYNTAX(c) syntax_property (c, 1)
33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
36 /* Eight single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence.
40 4. This character is the second of a two-character comment-end sequence.
41 5. This character is a prefix, for backward-prefix-chars.
42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c".
45 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start.
48 Bits 6 and 8 discriminate among different comment styles.
49 Languages such as C++ allow two orthogonal syntax start/end pairs
50 and bit 6 determines whether a comment-end or Scommentend
51 ends style a or b. Comment markers can start style a, b, c, or bc.
52 Style a is always the default.
53 For 2-char comment markers, the style b flag is looked up only on the second
54 char of the comment marker and on the first char of the comment ender.
55 For style c (like the nested flag), the flag can be placed on any of
58 /* These functions extract specific flags from an integer
59 that holds the syntax code and the flags. */
62 SYNTAX_FLAGS_COMSTART_FIRST (int flags
)
64 return (flags
>> 16) & 1;
67 SYNTAX_FLAGS_COMSTART_SECOND (int flags
)
69 return (flags
>> 17) & 1;
72 SYNTAX_FLAGS_COMEND_FIRST (int flags
)
74 return (flags
>> 18) & 1;
77 SYNTAX_FLAGS_COMEND_SECOND (int flags
)
79 return (flags
>> 19) & 1;
82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags
)
84 return (flags
& 0x50000) != 0;
87 SYNTAX_FLAGS_PREFIX (int flags
)
89 return (flags
>> 20) & 1;
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags
)
94 return (flags
>> 21) & 1;
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags
)
99 return (flags
>> 23) & 1;
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags
)
104 return (flags
>> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags
)
109 return (flags
>> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags
, int other_flags
)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags
)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags
)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags
));
122 /* Extract a particular flag for a given character. */
125 SYNTAX_COMEND_FIRST (int c
)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c
));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
136 ST_COMMENT_STYLE
= 256 + 1,
137 ST_STRING_STYLE
= 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth
; /* Depth at end of parsing. */
145 int instring
; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment
; /* -1 if in unnestable comment else comment nesting */
147 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted
; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth
; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart
;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart
;
154 ptrdiff_t location
; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte
; /* Corresponding byte position. */
156 ptrdiff_t comstr_start
; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
159 int prev_syntax
; /* Syntax of previous position scanned, when
160 that position (potentially) holds the first char
161 of a 2-char construct, i.e. comment delimiter
162 or Sescape, etc. Smax otherwise. */
165 /* These variables are a cache for finding the start of a defun.
166 find_start_pos is the place for which the defun start was found.
167 find_start_value is the defun start position found for it.
168 find_start_value_byte is the corresponding byte position.
169 find_start_buffer is the buffer it was found in.
170 find_start_begv is the BEGV value when it was found.
171 find_start_modiff is the value of MODIFF when it was found. */
173 static ptrdiff_t find_start_pos
;
174 static ptrdiff_t find_start_value
;
175 static ptrdiff_t find_start_value_byte
;
176 static struct buffer
*find_start_buffer
;
177 static ptrdiff_t find_start_begv
;
178 static EMACS_INT find_start_modiff
;
181 static Lisp_Object
skip_chars (bool, Lisp_Object
, Lisp_Object
, bool);
182 static Lisp_Object
skip_syntaxes (bool, Lisp_Object
, Lisp_Object
);
183 static Lisp_Object
scan_lists (EMACS_INT
, EMACS_INT
, EMACS_INT
, bool);
184 static void scan_sexps_forward (struct lisp_parse_state
*,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT
,
187 static void internalize_parse_state (Lisp_Object
, struct lisp_parse_state
*);
188 static bool in_classes (int, Lisp_Object
);
189 static void parse_sexp_propertize (ptrdiff_t charpos
);
191 /* This setter is used only in this file, so it can be private. */
193 bset_syntax_table (struct buffer
*b
, Lisp_Object val
)
195 b
->syntax_table_
= val
;
198 /* Whether the syntax of the character C has the prefix flag set. */
200 syntax_prefix_flag_p (int c
)
202 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c
));
205 struct gl_state_s gl_state
; /* Global state of syntax parser. */
207 enum { INTERVALS_AT_ONCE
= 10 }; /* 1 + max-number of intervals
208 to scan to property-change. */
210 /* Set the syntax entry VAL for char C in table TABLE. */
213 SET_RAW_SYNTAX_ENTRY (Lisp_Object table
, int c
, Lisp_Object val
)
215 CHAR_TABLE_SET (table
, c
, val
);
218 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
219 RANGE is a cons (FROM . TO) specifying the range of characters. */
222 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table
, Lisp_Object range
,
225 Fset_char_table_range (table
, range
, val
);
228 /* Extract the information from the entry for character C
229 in the current syntax table. */
234 Lisp_Object ent
= SYNTAX_ENTRY (c
);
235 return CONSP (ent
) ? XCDR (ent
) : Qnil
;
238 /* This should be called with FROM at the start of forward
239 search, or after the last position of the backward search. It
240 makes sure that the first char is picked up with correct table, so
241 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
243 Sign of COUNT gives the direction of the search.
247 SETUP_SYNTAX_TABLE (ptrdiff_t from
, ptrdiff_t count
)
249 SETUP_BUFFER_SYNTAX_TABLE ();
250 gl_state
.b_property
= BEGV
;
251 gl_state
.e_property
= ZV
+ 1;
252 gl_state
.object
= Qnil
;
254 if (parse_sexp_lookup_properties
)
257 update_syntax_table_forward (from
, true, Qnil
);
258 else if (from
> BEGV
)
260 update_syntax_table (from
- 1, count
, true, Qnil
);
261 parse_sexp_propertize (from
- 1);
266 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
267 If it is t (which is only used in fast_c_string_match_ignore_case),
268 ignore properties altogether.
270 This is meant for regex.c to use. For buffers, regex.c passes arguments
271 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
272 So if it is a buffer, we set the offset field to BEGV. */
275 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object
,
276 ptrdiff_t from
, ptrdiff_t count
)
278 SETUP_BUFFER_SYNTAX_TABLE ();
279 gl_state
.object
= object
;
280 if (BUFFERP (gl_state
.object
))
282 struct buffer
*buf
= XBUFFER (gl_state
.object
);
283 gl_state
.b_property
= 1;
284 gl_state
.e_property
= BUF_ZV (buf
) - BUF_BEGV (buf
) + 1;
285 gl_state
.offset
= BUF_BEGV (buf
) - 1;
287 else if (NILP (gl_state
.object
))
289 gl_state
.b_property
= 1;
290 gl_state
.e_property
= ZV
- BEGV
+ 1;
291 gl_state
.offset
= BEGV
- 1;
293 else if (EQ (gl_state
.object
, Qt
))
295 gl_state
.b_property
= 0;
296 gl_state
.e_property
= PTRDIFF_MAX
;
301 gl_state
.b_property
= 0;
302 gl_state
.e_property
= 1 + SCHARS (gl_state
.object
);
305 if (parse_sexp_lookup_properties
)
306 update_syntax_table (from
+ gl_state
.offset
- (count
<= 0),
307 count
, 1, gl_state
.object
);
310 /* Update gl_state to an appropriate interval which contains CHARPOS. The
311 sign of COUNT give the relative position of CHARPOS wrt the previously
312 valid interval. If INIT, only [be]_property fields of gl_state are
313 valid at start, the rest is filled basing on OBJECT.
315 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
316 direction than the intervals - or in an interval. We update the
317 current syntax-table basing on the property of this interval, and
318 update the interval to start further than CHARPOS - or be
319 NULL. We also update lim_property to be the next value of
320 charpos to call this subroutine again - or be before/after the
321 start/end of OBJECT. */
324 update_syntax_table (ptrdiff_t charpos
, EMACS_INT count
, bool init
,
327 Lisp_Object tmp_table
;
329 bool invalidate
= true;
334 gl_state
.old_prop
= Qnil
;
335 gl_state
.start
= gl_state
.b_property
;
336 gl_state
.stop
= gl_state
.e_property
;
337 i
= interval_of (charpos
, object
);
338 gl_state
.backward_i
= gl_state
.forward_i
= i
;
342 /* interval_of updates only ->position of the return value, so
343 update the parents manually to speed up update_interval. */
344 while (!NULL_PARENT (i
))
346 if (AM_RIGHT_CHILD (i
))
347 INTERVAL_PARENT (i
)->position
= i
->position
348 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
349 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
350 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
352 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
354 i
= INTERVAL_PARENT (i
);
356 i
= gl_state
.forward_i
;
357 gl_state
.b_property
= i
->position
- gl_state
.offset
;
358 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
361 i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
363 /* We are guaranteed to be called with CHARPOS either in i,
366 error ("Error in syntax_table logic for to-the-end intervals");
367 else if (charpos
< i
->position
) /* Move left. */
370 error ("Error in syntax_table logic for intervals <-");
371 /* Update the interval. */
372 i
= update_interval (i
, charpos
);
373 if (INTERVAL_LAST_POS (i
) != gl_state
.b_property
)
376 gl_state
.forward_i
= i
;
377 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
380 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
383 error ("Error in syntax_table logic for intervals ->");
384 /* Update the interval. */
385 i
= update_interval (i
, charpos
);
386 if (i
->position
!= gl_state
.e_property
)
389 gl_state
.backward_i
= i
;
390 gl_state
.b_property
= i
->position
- gl_state
.offset
;
395 tmp_table
= textget (i
->plist
, Qsyntax_table
);
398 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
400 if (invalidate
) /* Did not get to adjacent interval. */
401 { /* with the same table => */
402 /* invalidate the old range. */
405 gl_state
.backward_i
= i
;
406 gl_state
.b_property
= i
->position
- gl_state
.offset
;
410 gl_state
.forward_i
= i
;
411 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
415 if (!EQ (tmp_table
, gl_state
.old_prop
))
417 gl_state
.current_syntax_table
= tmp_table
;
418 gl_state
.old_prop
= tmp_table
;
419 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
421 gl_state
.use_global
= 0;
423 else if (CONSP (tmp_table
))
425 gl_state
.use_global
= 1;
426 gl_state
.global_code
= tmp_table
;
430 gl_state
.use_global
= 0;
431 gl_state
.current_syntax_table
= BVAR (current_buffer
, syntax_table
);
437 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
441 gl_state
.e_property
= i
->position
- gl_state
.offset
;
442 gl_state
.forward_i
= i
;
447 = i
->position
+ LENGTH (i
) - gl_state
.offset
;
448 gl_state
.backward_i
= i
;
452 else if (cnt
== INTERVALS_AT_ONCE
)
457 = i
->position
+ LENGTH (i
) - gl_state
.offset
458 /* e_property at EOB is not set to ZV but to ZV+1, so that
459 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
460 having to check eob between the two. */
461 + (next_interval (i
) ? 0 : 1);
462 gl_state
.forward_i
= i
;
466 gl_state
.b_property
= i
->position
- gl_state
.offset
;
467 gl_state
.backward_i
= i
;
472 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
474 eassert (i
== NULL
); /* This property goes to the end. */
477 gl_state
.e_property
= gl_state
.stop
;
478 gl_state
.forward_i
= i
;
481 gl_state
.b_property
= gl_state
.start
;
485 parse_sexp_propertize (ptrdiff_t charpos
)
488 if (syntax_propertize__done
<= charpos
489 && syntax_propertize__done
< zv
)
491 EMACS_INT modiffs
= CHARS_MODIFF
;
492 safe_call1 (Qinternal__syntax_propertize
,
493 make_number (min (zv
, 1 + charpos
)));
494 if (modiffs
!= CHARS_MODIFF
)
495 error ("parse-sexp-propertize-function modified the buffer!");
496 if (syntax_propertize__done
<= charpos
497 && syntax_propertize__done
< zv
)
498 error ("parse-sexp-propertize-function did not move"
499 " syntax-propertize--done");
500 SETUP_SYNTAX_TABLE (charpos
, 1);
502 else if (gl_state
.e_property
> syntax_propertize__done
)
504 gl_state
.e_property
= syntax_propertize__done
;
505 gl_state
.e_property_truncated
= true;
507 else if (gl_state
.e_property_truncated
508 && gl_state
.e_property
< syntax_propertize__done
)
509 { /* When moving backward, e_property might be set without resetting
510 e_property_truncated, so the e_property_truncated flag may
511 occasionally be left raised spuriously. This should be rare. */
512 gl_state
.e_property_truncated
= false;
513 update_syntax_table_forward (charpos
, false, Qnil
);
518 update_syntax_table_forward (ptrdiff_t charpos
, bool init
,
521 if (gl_state
.e_property_truncated
)
523 eassert (NILP (object
));
524 eassert (charpos
>= gl_state
.e_property
);
525 parse_sexp_propertize (charpos
);
529 update_syntax_table (charpos
, 1, init
, object
);
530 if (NILP (object
) && gl_state
.e_property
> syntax_propertize__done
)
531 parse_sexp_propertize (charpos
);
535 /* Returns true if char at CHARPOS is quoted.
536 Global syntax-table data should be set up already to be good at CHARPOS
537 or after. On return global syntax data is good for lookup at CHARPOS. */
540 char_quoted (ptrdiff_t charpos
, ptrdiff_t bytepos
)
542 enum syntaxcode code
;
543 ptrdiff_t beg
= BEGV
;
545 ptrdiff_t orig
= charpos
;
547 while (charpos
> beg
)
550 DEC_BOTH (charpos
, bytepos
);
552 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
553 c
= FETCH_CHAR_AS_MULTIBYTE (bytepos
);
555 if (! (code
== Scharquote
|| code
== Sescape
))
561 UPDATE_SYNTAX_TABLE (orig
);
565 /* Return the bytepos one character before BYTEPOS.
566 We assume that BYTEPOS is not at the start of the buffer. */
569 dec_bytepos (ptrdiff_t bytepos
)
571 if (NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
578 /* Return a defun-start position before POS and not too far before.
579 It should be the last one before POS, or nearly the last.
581 When open_paren_in_column_0_is_defun_start is nonzero,
582 only the beginning of the buffer is treated as a defun-start.
584 We record the information about where the scan started
585 and what its result was, so that another call in the same area
586 can return the same value very quickly.
588 There is no promise at which position the global syntax data is
589 valid on return from the subroutine, so the caller should explicitly
590 update the global data. */
593 find_defun_start (ptrdiff_t pos
, ptrdiff_t pos_byte
)
595 ptrdiff_t opoint
= PT
, opoint_byte
= PT_BYTE
;
597 /* Use previous finding, if it's valid and applies to this inquiry. */
598 if (current_buffer
== find_start_buffer
599 /* Reuse the defun-start even if POS is a little farther on.
600 POS might be in the next defun, but that's ok.
601 Our value may not be the best possible, but will still be usable. */
602 && pos
<= find_start_pos
+ 1000
603 && pos
>= find_start_value
604 && BEGV
== find_start_begv
605 && MODIFF
== find_start_modiff
)
606 return find_start_value
;
608 if (!open_paren_in_column_0_is_defun_start
)
610 find_start_value
= BEGV
;
611 find_start_value_byte
= BEGV_BYTE
;
615 /* Back up to start of line. */
616 scan_newline (pos
, pos_byte
, BEGV
, BEGV_BYTE
, -1, 1);
618 /* We optimize syntax-table lookup for rare updates. Thus we accept
619 only those `^\s(' which are good in global _and_ text-property
621 SETUP_BUFFER_SYNTAX_TABLE ();
624 /* Open-paren at start of line means we may have found our
626 int c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
627 if (SYNTAX (c
) == Sopen
)
629 SETUP_SYNTAX_TABLE (PT
+ 1, -1); /* Try again... */
630 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
631 if (SYNTAX (c
) == Sopen
)
633 /* Now fallback to the default value. */
634 SETUP_BUFFER_SYNTAX_TABLE ();
636 /* Move to beg of previous line. */
637 scan_newline (PT
, PT_BYTE
, BEGV
, BEGV_BYTE
, -2, 1);
640 /* Record what we found, for the next try. */
641 find_start_value
= PT
;
642 find_start_value_byte
= PT_BYTE
;
643 TEMP_SET_PT_BOTH (opoint
, opoint_byte
);
646 find_start_buffer
= current_buffer
;
647 find_start_modiff
= MODIFF
;
648 find_start_begv
= BEGV
;
649 find_start_pos
= pos
;
651 return find_start_value
;
654 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
657 prev_char_comend_first (ptrdiff_t pos
, ptrdiff_t pos_byte
)
662 DEC_BOTH (pos
, pos_byte
);
663 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
664 c
= FETCH_CHAR (pos_byte
);
665 val
= SYNTAX_COMEND_FIRST (c
);
666 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
670 /* Check whether charpos FROM is at the end of a comment.
671 FROM_BYTE is the bytepos corresponding to FROM.
672 Do not move back before STOP.
674 Return true if we find a comment ending at FROM/FROM_BYTE.
676 If successful, store the charpos of the comment's beginning
677 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
679 Global syntax data remains valid for backward search starting at
680 the returned value (or at FROM, if the search was not successful). */
683 back_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
684 bool comnested
, int comstyle
, ptrdiff_t *charpos_ptr
,
685 ptrdiff_t *bytepos_ptr
)
687 /* Look back, counting the parity of string-quotes,
688 and recording the comment-starters seen.
689 When we reach a safe place, assume that's not in a string;
690 then step the main scan to the earliest comment-starter seen
691 an even number of string quotes away from the safe place.
693 OFROM[I] is position of the earliest comment-starter seen
694 which is I+2X quotes from the comment-end.
695 PARITY is current parity of quotes from the comment end. */
696 int string_style
= -1; /* Presumed outside of any string. */
697 bool string_lossage
= 0;
698 /* Not a real lossage: indicates that we have passed a matching comment
699 starter plus a non-matching comment-ender, meaning that any matching
700 comment-starter we might see later could be a false positive (hidden
701 inside another comment).
702 Test case: { a (* b } c (* d *) */
703 bool comment_lossage
= 0;
704 ptrdiff_t comment_end
= from
;
705 ptrdiff_t comment_end_byte
= from_byte
;
706 ptrdiff_t comstart_pos
= 0;
707 ptrdiff_t comstart_byte
;
708 /* Place where the containing defun starts,
709 or 0 if we didn't come across it yet. */
710 ptrdiff_t defun_start
= 0;
711 ptrdiff_t defun_start_byte
= 0;
712 enum syntaxcode code
;
713 ptrdiff_t nesting
= 1; /* Current comment nesting. */
716 unsigned short int quit_count
= 0;
718 /* FIXME: A }} comment-ender style leads to incorrect behavior
719 in the case of {{ c }}} because we ignore the last two chars which are
720 assumed to be comment-enders although they aren't. */
722 /* At beginning of range to scan, we're outside of strings;
723 that determines quote parity to the comment-end. */
726 rarely_quit (++quit_count
);
730 bool com2start
, com2end
, comstart
;
732 /* Move back and examine a character. */
733 DEC_BOTH (from
, from_byte
);
734 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
736 prev_syntax
= syntax
;
737 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
738 syntax
= SYNTAX_WITH_FLAGS (c
);
741 /* Check for 2-char comment markers. */
742 com2start
= (SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
743 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax
)
745 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax
, syntax
))
746 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
)
747 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
);
748 com2end
= (SYNTAX_FLAGS_COMEND_FIRST (syntax
)
749 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax
));
750 comstart
= (com2start
|| code
== Scomment
);
752 /* Nasty cases with overlapping 2-char comment markers:
753 - snmp-mode: -- c -- foo -- c --
761 /* If a 2-char comment sequence partly overlaps with another,
762 we don't try to be clever. E.g. |*| in C, or }% in modes that
763 have %..\n and %{..}%. */
764 if (from
> stop
&& (com2end
|| comstart
))
766 ptrdiff_t next
= from
, next_byte
= from_byte
;
767 int next_c
, next_syntax
;
768 DEC_BOTH (next
, next_byte
);
769 UPDATE_SYNTAX_TABLE_BACKWARD (next
);
770 next_c
= FETCH_CHAR_AS_MULTIBYTE (next_byte
);
771 next_syntax
= SYNTAX_WITH_FLAGS (next_c
);
772 if (((comstart
|| comnested
)
773 && SYNTAX_FLAGS_COMEND_SECOND (syntax
)
774 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax
))
775 || ((com2end
|| comnested
)
776 && SYNTAX_FLAGS_COMSTART_SECOND (syntax
)
778 == SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_syntax
))
779 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax
)))
781 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
784 if (com2start
&& comstart_pos
== 0)
785 /* We're looking at a comment starter. But it might be a comment
786 ender as well (see snmp-mode). The first time we see one, we
787 need to consider it as a comment starter,
788 and the subsequent times as a comment ender. */
791 /* Turn a 2-char comment sequences into the appropriate syntax. */
796 /* Ignore comment starters of a different style. */
797 else if (code
== Scomment
798 && (comstyle
!= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0)
799 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
) != comnested
))
802 /* Ignore escaped characters, except comment-enders which cannot
804 if ((Vcomment_end_can_be_escaped
|| code
!= Sendcomment
)
805 && char_quoted (from
, from_byte
))
812 c
= (code
== Sstring_fence
? ST_STRING_STYLE
: ST_COMMENT_STYLE
);
815 /* Track parity of quotes. */
816 if (string_style
== -1)
817 /* Entering a string. */
819 else if (string_style
== c
)
820 /* Leaving the string. */
823 /* If we have two kinds of string delimiters.
824 There's no way to grok this scanning backwards. */
829 /* We've already checked that it is the relevant comstyle. */
830 if (string_style
!= -1 || comment_lossage
|| string_lossage
)
831 /* There are odd string quotes involved, so let's be careful.
832 Test case in Pascal: " { " a { " } */
837 /* Record best comment-starter so far. */
839 comstart_byte
= from_byte
;
841 else if (--nesting
<= 0)
842 /* nested comments have to be balanced, so we don't need to
843 keep looking for earlier ones. We use here the same (slightly
844 incorrect) reasoning as below: since it is followed by uniform
845 paired string quotes, this comment-start has to be outside of
846 strings, else the comment-end itself would be inside a string. */
851 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == comstyle
852 && ((com2end
&& SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
))
853 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
)
854 /* This is the same style of comment ender as ours. */
859 /* Anything before that can't count because it would match
860 this comment-ender rather than ours. */
861 from
= stop
; /* Break out of the loop. */
863 else if (comstart_pos
!= 0 || c
!= '\n')
864 /* We're mixing comment styles here, so we'd better be careful.
865 The (comstart_pos != 0 || c != '\n') check is not quite correct
866 (we should just always set comment_lossage), but removing it
867 would imply that any multiline comment in C would go through
868 lossage, which seems overkill.
869 The failure should only happen in the rare cases such as
875 /* Assume a defun-start point is outside of strings. */
876 if (open_paren_in_column_0_is_defun_start
878 || (temp_byte
= dec_bytepos (from_byte
),
879 FETCH_CHAR (temp_byte
) == '\n')))
882 defun_start_byte
= from_byte
;
883 from
= stop
; /* Break out of the loop. */
892 if (comstart_pos
== 0)
895 from_byte
= comment_end_byte
;
896 UPDATE_SYNTAX_TABLE_FORWARD (comment_end
);
898 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
899 or `done'), then we've found the beginning of the non-nested comment. */
900 else if (1) /* !comnested */
903 from_byte
= comstart_byte
;
904 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
908 struct lisp_parse_state state
;
909 bool adjusted
= true;
910 /* We had two kinds of string delimiters mixed up
911 together. Decode this going forwards.
912 Scan fwd from a known safe place (beginning-of-defun)
913 to the one in question; this records where we
914 last passed a comment starter. */
915 /* If we did not already find the defun start, find it now. */
916 if (defun_start
== 0)
918 defun_start
= find_defun_start (comment_end
, comment_end_byte
);
919 defun_start_byte
= find_start_value_byte
;
920 adjusted
= (defun_start
> BEGV
);
924 internalize_parse_state (Qnil
, &state
);
925 scan_sexps_forward (&state
,
926 defun_start
, defun_start_byte
,
927 comment_end
, TYPE_MINIMUM (EMACS_INT
),
929 defun_start
= comment_end
;
934 = CONSP (state
.levelstarts
) ? XINT (XCAR (state
.levelstarts
))
935 : state
.thislevelstart
>= 0 ? state
.thislevelstart
937 find_start_value_byte
= CHAR_TO_BYTE (find_start_value
);
940 if (state
.incomment
== (comnested
? 1 : -1)
941 && state
.comstyle
== comstyle
)
942 from
= state
.comstr_start
;
947 /* If comment_end is inside some other comment, maybe ours
948 is nested, so we need to try again from within the
949 surrounding comment. Example: { a (* " *) */
951 /* FIXME: We should advance by one or two chars. */
952 defun_start
= state
.comstr_start
+ 2;
953 defun_start_byte
= CHAR_TO_BYTE (defun_start
);
956 rarely_quit (++quit_count
);
958 while (defun_start
< comment_end
);
960 from_byte
= CHAR_TO_BYTE (from
);
961 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
966 *bytepos_ptr
= from_byte
;
968 return from
!= comment_end
;
971 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
972 doc
: /* Return t if OBJECT is a syntax table.
973 Currently, any char-table counts as a syntax table. */)
976 if (CHAR_TABLE_P (object
)
977 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
983 check_syntax_table (Lisp_Object obj
)
985 CHECK_TYPE (CHAR_TABLE_P (obj
) && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
),
986 Qsyntax_table_p
, obj
);
989 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
990 doc
: /* Return the current syntax table.
991 This is the one specified by the current buffer. */)
994 return BVAR (current_buffer
, syntax_table
);
997 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
998 Sstandard_syntax_table
, 0, 0, 0,
999 doc
: /* Return the standard syntax table.
1000 This is the one used for new buffers. */)
1003 return Vstandard_syntax_table
;
1006 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
1007 doc
: /* Construct a new syntax table and return it.
1008 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1014 check_syntax_table (table
);
1016 table
= Vstandard_syntax_table
;
1018 copy
= Fcopy_sequence (table
);
1020 /* Only the standard syntax table should have a default element.
1021 Other syntax tables should inherit from parents instead. */
1022 set_char_table_defalt (copy
, Qnil
);
1024 /* Copied syntax tables should all have parents.
1025 If we copied one with no parent, such as the standard syntax table,
1026 use the standard syntax table as the copy's parent. */
1027 if (NILP (XCHAR_TABLE (copy
)->parent
))
1028 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
1032 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
1033 doc
: /* Select a new syntax table for the current buffer.
1034 One argument, a syntax table. */)
1038 check_syntax_table (table
);
1039 bset_syntax_table (current_buffer
, table
);
1040 /* Indicate that this buffer now has a specified syntax table. */
1041 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
1042 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
1046 /* Convert a letter which signifies a syntax code
1047 into the code it signifies.
1048 This is used by modify-syntax-entry, and other things. */
1050 unsigned char const syntax_spec_code
[0400] =
1051 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1052 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1053 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1054 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1055 Swhitespace
, Scomment_fence
, Sstring
, 0377, Smath
, 0377, 0377, Squote
,
1056 Sopen
, Sclose
, 0377, 0377, 0377, Swhitespace
, Spunct
, Scharquote
,
1057 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1058 0377, 0377, 0377, 0377, Scomment
, 0377, Sendcomment
, 0377,
1059 Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1060 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1061 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1062 0377, 0377, 0377, 0377, Sescape
, 0377, 0377, Ssymbol
,
1063 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1064 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1065 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1066 0377, 0377, 0377, 0377, Sstring_fence
, 0377, 0377, 0377
1069 /* Indexed by syntax code, give the letter that describes it. */
1071 char const syntax_code_spec
[16] =
1073 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1077 /* Indexed by syntax code, give the object (cons of syntax code and
1078 nil) to be stored in syntax table. Since these objects can be
1079 shared among syntax tables, we generate them in advance. By
1080 sharing objects, the function `describe-syntax' can give a more
1082 static Lisp_Object Vsyntax_code_object
;
1085 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
1086 doc
: /* Return the syntax code of CHARACTER, described by a character.
1087 For example, if CHARACTER is a word constituent, the
1088 character `w' (119) is returned.
1089 The characters that correspond to various syntax codes
1090 are listed in the documentation of `modify-syntax-entry'. */)
1091 (Lisp_Object character
)
1094 CHECK_CHARACTER (character
);
1095 char_int
= XINT (character
);
1096 SETUP_BUFFER_SYNTAX_TABLE ();
1097 return make_number (syntax_code_spec
[SYNTAX (char_int
)]);
1100 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
1101 doc
: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1102 (Lisp_Object character
)
1105 enum syntaxcode code
;
1106 CHECK_CHARACTER (character
);
1107 char_int
= XINT (character
);
1108 SETUP_BUFFER_SYNTAX_TABLE ();
1109 code
= SYNTAX (char_int
);
1110 if (code
== Sopen
|| code
== Sclose
)
1111 return SYNTAX_MATCH (char_int
);
1115 DEFUN ("string-to-syntax", Fstring_to_syntax
, Sstring_to_syntax
, 1, 1, 0,
1116 doc
: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1117 STRING should be a string of the form allowed as argument of
1118 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1119 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1120 the value of a `syntax-table' text property. */)
1121 (Lisp_Object string
)
1123 const unsigned char *p
;
1127 CHECK_STRING (string
);
1130 val
= syntax_spec_code
[*p
++];
1132 error ("Invalid syntax description letter: %c", p
[-1]);
1134 if (val
== Sinherit
)
1140 int character
= STRING_CHAR_AND_LENGTH (p
, len
);
1141 XSETINT (match
, character
);
1142 if (XFASTINT (match
) == ' ')
1185 if (val
< ASIZE (Vsyntax_code_object
) && NILP (match
))
1186 return AREF (Vsyntax_code_object
, val
);
1188 /* Since we can't use a shared object, let's make a new one. */
1189 return Fcons (make_number (val
), match
);
1192 /* I really don't know why this is interactive
1193 help-form should at least be made useful whilst reading the second arg. */
1194 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
1195 "cSet syntax for character: \nsSet syntax for %s to: ",
1196 doc
: /* Set syntax for character CHAR according to string NEWENTRY.
1197 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1198 the current buffer's syntax table.
1199 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1200 in the range MIN to MAX are changed.
1201 The first character of NEWENTRY should be one of the following:
1202 Space or - whitespace syntax. w word constituent.
1203 _ symbol constituent. . punctuation.
1204 ( open-parenthesis. ) close-parenthesis.
1205 " string quote. \\ escape.
1206 $ paired delimiter. \\=' expression quote or prefix operator.
1207 < comment starter. > comment ender.
1208 / character-quote. @ inherit from parent table.
1209 | generic string fence. ! generic comment fence.
1211 Only single-character comment start and end sequences are represented thus.
1212 Two-character sequences are represented as described below.
1213 The second character of NEWENTRY is the matching parenthesis,
1214 used only if the first character is `(' or `)'.
1215 Any additional characters are flags.
1216 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1217 1 means CHAR is the start of a two-char comment start sequence.
1218 2 means CHAR is the second character of such a sequence.
1219 3 means CHAR is the start of a two-char comment end sequence.
1220 4 means CHAR is the second character of such a sequence.
1222 There can be several orthogonal comment sequences. This is to support
1223 language modes such as C++. By default, all comment sequences are of style
1224 a, but you can set the comment sequence style to b (on the second character
1225 of a comment-start, and the first character of a comment-end sequence) and/or
1226 c (on any of its chars) using this flag:
1227 b means CHAR is part of comment sequence b.
1228 c means CHAR is part of comment sequence c.
1229 n means CHAR is part of a nestable comment sequence.
1231 p means CHAR is a prefix character for `backward-prefix-chars';
1232 such characters are treated as whitespace when they occur
1233 between expressions.
1234 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1235 (Lisp_Object c
, Lisp_Object newentry
, Lisp_Object syntax_table
)
1239 CHECK_CHARACTER_CAR (c
);
1240 CHECK_CHARACTER_CDR (c
);
1243 CHECK_CHARACTER (c
);
1245 if (NILP (syntax_table
))
1246 syntax_table
= BVAR (current_buffer
, syntax_table
);
1248 check_syntax_table (syntax_table
);
1250 newentry
= Fstring_to_syntax (newentry
);
1252 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table
, c
, newentry
);
1254 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
1256 /* We clear the regexp cache, since character classes can now have
1257 different values from those in the compiled regexps.*/
1258 clear_regexp_cache ();
1263 /* Dump syntax table to buffer in human-readable format */
1265 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value
,
1266 Sinternal_describe_syntax_value
, 1, 1, 0,
1267 doc
: /* Insert a description of the internal syntax description SYNTAX at point. */)
1268 (Lisp_Object syntax
)
1270 int code
, syntax_code
;
1271 bool start1
, start2
, end1
, end2
, prefix
, comstyleb
, comstylec
, comnested
;
1273 Lisp_Object first
, match_lisp
, value
= syntax
;
1277 insert_string ("default");
1281 if (CHAR_TABLE_P (value
))
1283 insert_string ("deeper char-table ...");
1289 insert_string ("invalid");
1293 first
= XCAR (value
);
1294 match_lisp
= XCDR (value
);
1296 if (!INTEGERP (first
) || !(NILP (match_lisp
) || CHARACTERP (match_lisp
)))
1298 insert_string ("invalid");
1302 syntax_code
= XINT (first
) & INT_MAX
;
1303 code
= syntax_code
& 0377;
1304 start1
= SYNTAX_FLAGS_COMSTART_FIRST (syntax_code
);
1305 start2
= SYNTAX_FLAGS_COMSTART_SECOND (syntax_code
);
1306 end1
= SYNTAX_FLAGS_COMEND_FIRST (syntax_code
);
1307 end2
= SYNTAX_FLAGS_COMEND_SECOND (syntax_code
);
1308 prefix
= SYNTAX_FLAGS_PREFIX (syntax_code
);
1309 comstyleb
= SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code
);
1310 comstylec
= SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code
);
1311 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax_code
);
1315 insert_string ("invalid");
1319 str
[0] = syntax_code_spec
[code
], str
[1] = 0;
1322 if (NILP (match_lisp
))
1325 insert_char (XINT (match_lisp
));
1346 insert_string ("\twhich means: ");
1351 insert_string ("whitespace"); break;
1353 insert_string ("punctuation"); break;
1355 insert_string ("word"); break;
1357 insert_string ("symbol"); break;
1359 insert_string ("open"); break;
1361 insert_string ("close"); break;
1363 insert_string ("prefix"); break;
1365 insert_string ("string"); break;
1367 insert_string ("math"); break;
1369 insert_string ("escape"); break;
1371 insert_string ("charquote"); break;
1373 insert_string ("comment"); break;
1375 insert_string ("endcomment"); break;
1377 insert_string ("inherit"); break;
1378 case Scomment_fence
:
1379 insert_string ("comment fence"); break;
1381 insert_string ("string fence"); break;
1383 insert_string ("invalid");
1387 if (!NILP (match_lisp
))
1389 insert_string (", matches ");
1390 insert_char (XINT (match_lisp
));
1394 insert_string (",\n\t is the first character of a comment-start sequence");
1396 insert_string (",\n\t is the second character of a comment-start sequence");
1399 insert_string (",\n\t is the first character of a comment-end sequence");
1401 insert_string (",\n\t is the second character of a comment-end sequence");
1403 insert_string (" (comment style b)");
1405 insert_string (" (comment style c)");
1407 insert_string (" (nestable)");
1411 AUTO_STRING (prefixdoc
,
1412 ",\n\t is a prefix character for `backward-prefix-chars'");
1413 insert1 (Fsubstitute_command_keys (prefixdoc
));
1419 /* Return the position across COUNT words from FROM.
1420 If that many words cannot be found before the end of the buffer, return 0.
1421 COUNT negative means scan backward and stop at word beginning. */
1424 scan_words (ptrdiff_t from
, EMACS_INT count
)
1426 ptrdiff_t beg
= BEGV
;
1428 ptrdiff_t from_byte
= CHAR_TO_BYTE (from
);
1429 enum syntaxcode code
;
1431 Lisp_Object func
, pos
;
1433 SETUP_SYNTAX_TABLE (from
, count
);
1441 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1442 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1443 code
= SYNTAX (ch0
);
1444 INC_BOTH (from
, from_byte
);
1445 if (words_include_escapes
1446 && (code
== Sescape
|| code
== Scharquote
))
1452 /* Now CH0 is a character which begins a word and FROM is the
1453 position of the next character. */
1454 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch0
);
1455 if (! NILP (Ffboundp (func
)))
1457 pos
= call2 (func
, make_number (from
- 1), make_number (end
));
1458 if (INTEGERP (pos
) && from
< XINT (pos
) && XINT (pos
) <= ZV
)
1461 from_byte
= CHAR_TO_BYTE (from
);
1468 if (from
== end
) break;
1469 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1470 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1471 code
= SYNTAX (ch1
);
1473 && (! words_include_escapes
1474 || (code
!= Sescape
&& code
!= Scharquote
)))
1475 || word_boundary_p (ch0
, ch1
))
1477 INC_BOTH (from
, from_byte
);
1490 DEC_BOTH (from
, from_byte
);
1491 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1492 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1493 code
= SYNTAX (ch1
);
1494 if (words_include_escapes
1495 && (code
== Sescape
|| code
== Scharquote
))
1501 /* Now CH1 is a character which ends a word and FROM is the
1503 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch1
);
1504 if (! NILP (Ffboundp (func
)))
1506 pos
= call2 (func
, make_number (from
), make_number (beg
));
1507 if (INTEGERP (pos
) && BEGV
<= XINT (pos
) && XINT (pos
) < from
)
1510 from_byte
= CHAR_TO_BYTE (from
);
1519 DEC_BOTH (from
, from_byte
);
1520 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1521 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1522 code
= SYNTAX (ch0
);
1524 && (! words_include_escapes
1525 || (code
!= Sescape
&& code
!= Scharquote
)))
1526 || word_boundary_p (ch0
, ch1
))
1528 INC_BOTH (from
, from_byte
);
1541 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 0, 1, "^p",
1542 doc
: /* Move point forward ARG words (backward if ARG is negative).
1543 If ARG is omitted or nil, move point forward one word.
1545 If an edge of the buffer or a field boundary is reached, point is
1546 left there and the function returns nil. Field boundaries are not
1547 noticed if `inhibit-field-text-motion' is non-nil.
1549 The word boundaries are normally determined by the buffer's syntax
1550 table, but `find-word-boundary-function-table', such as set up
1551 by `subword-mode', can change that. If a Lisp program needs to
1552 move by words determined strictly by the syntax table, it should
1553 use `forward-word-strictly' instead. */)
1557 ptrdiff_t orig_val
, val
;
1560 XSETFASTINT (arg
, 1);
1564 val
= orig_val
= scan_words (PT
, XINT (arg
));
1566 val
= XINT (arg
) > 0 ? ZV
: BEGV
;
1568 /* Avoid jumping out of an input field. */
1569 tmp
= Fconstrain_to_field (make_number (val
), make_number (PT
),
1571 val
= XFASTINT (tmp
);
1574 return val
== orig_val
? Qt
: Qnil
;
1577 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1578 doc
: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1579 STRING is like the inside of a `[...]' in a regular expression
1580 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1581 (but not at the end of a range; quoting is never needed there).
1582 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1583 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1584 Char classes, e.g. `[:alpha:]', are supported.
1586 Returns the distance traveled, either zero or positive. */)
1587 (Lisp_Object string
, Lisp_Object lim
)
1589 return skip_chars (1, string
, lim
, 1);
1592 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1593 doc
: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1594 See `skip-chars-forward' for details.
1595 Returns the distance traveled, either zero or negative. */)
1596 (Lisp_Object string
, Lisp_Object lim
)
1598 return skip_chars (0, string
, lim
, 1);
1601 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1602 doc
: /* Move point forward across chars in specified syntax classes.
1603 SYNTAX is a string of syntax code characters.
1604 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1605 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1606 This function returns the distance traveled, either zero or positive. */)
1607 (Lisp_Object syntax
, Lisp_Object lim
)
1609 return skip_syntaxes (1, syntax
, lim
);
1612 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1613 doc
: /* Move point backward across chars in specified syntax classes.
1614 SYNTAX is a string of syntax code characters.
1615 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1616 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1617 This function returns either zero or a negative number, and the absolute value
1618 of this is the distance traveled. */)
1619 (Lisp_Object syntax
, Lisp_Object lim
)
1621 return skip_syntaxes (0, syntax
, lim
);
1625 skip_chars (bool forwardp
, Lisp_Object string
, Lisp_Object lim
,
1626 bool handle_iso_classes
)
1630 /* Store the ranges of non-ASCII characters. */
1631 int *char_ranges UNINIT
;
1632 int n_char_ranges
= 0;
1634 ptrdiff_t i
, i_byte
;
1635 /* True if the current buffer is multibyte and the region contains
1638 /* True if STRING is multibyte and it contains non-ASCII chars. */
1639 bool string_multibyte
;
1640 ptrdiff_t size_byte
;
1641 const unsigned char *str
;
1643 Lisp_Object iso_classes
;
1646 CHECK_STRING (string
);
1650 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1652 CHECK_NUMBER_COERCE_MARKER (lim
);
1654 /* In any case, don't allow scan outside bounds of buffer. */
1655 if (XINT (lim
) > ZV
)
1656 XSETFASTINT (lim
, ZV
);
1657 if (XINT (lim
) < BEGV
)
1658 XSETFASTINT (lim
, BEGV
);
1660 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
1661 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
1662 string_multibyte
= SBYTES (string
) > SCHARS (string
);
1664 memset (fastmap
, 0, sizeof fastmap
);
1666 str
= SDATA (string
);
1667 size_byte
= SBYTES (string
);
1670 if (i_byte
< size_byte
1671 && SREF (string
, 0) == '^')
1673 negate
= 1; i_byte
++;
1676 /* Find the characters specified and set their elements of fastmap.
1677 Handle backslashes and ranges specially.
1679 If STRING contains non-ASCII characters, setup char_ranges for
1680 them and use fastmap only for their leading codes. */
1682 if (! string_multibyte
)
1684 bool string_has_eight_bit
= 0;
1686 /* At first setup fastmap. */
1687 while (i_byte
< size_byte
)
1689 if (handle_iso_classes
)
1691 const unsigned char *ch
= str
+ i_byte
;
1692 re_wctype_t cc
= re_wctype_parse (&ch
, size_byte
- i_byte
);
1694 error ("Invalid ISO C character class");
1697 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1707 if (i_byte
== size_byte
)
1712 /* Treat `-' as range character only if another character
1714 if (i_byte
+ 1 < size_byte
1715 && str
[i_byte
] == '-')
1719 /* Skip over the dash. */
1722 /* Get the end of the range. */
1725 && i_byte
< size_byte
)
1733 if (! ASCII_CHAR_P (c2
))
1734 string_has_eight_bit
= 1;
1740 if (! ASCII_CHAR_P (c
))
1741 string_has_eight_bit
= 1;
1745 /* If the current range is multibyte and STRING contains
1746 eight-bit chars, arrange fastmap and setup char_ranges for
1747 the corresponding multibyte chars. */
1748 if (multibyte
&& string_has_eight_bit
)
1751 char himap
[0200 + 1];
1752 memcpy (himap
, fastmap
+ 0200, 0200);
1754 memset (fastmap
+ 0200, 0, 0200);
1755 SAFE_NALLOCA (char_ranges
, 2, 128);
1758 while ((p1
= memchr (himap
+ i
, 1, 0200 - i
)))
1760 /* Deduce the next range C..C2 from the next clump of 1s
1761 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1762 order half of the old FASTMAP. */
1763 int c2
, leading_code
;
1765 c
= BYTE8_TO_CHAR (i
+ 0200);
1767 c2
= BYTE8_TO_CHAR (i
+ 0200 - 1);
1769 char_ranges
[n_char_ranges
++] = c
;
1770 char_ranges
[n_char_ranges
++] = c2
;
1771 leading_code
= CHAR_LEADING_CODE (c
);
1772 memset (fastmap
+ leading_code
, 1,
1773 CHAR_LEADING_CODE (c2
) - leading_code
+ 1);
1777 else /* STRING is multibyte */
1779 SAFE_NALLOCA (char_ranges
, 2, SCHARS (string
));
1781 while (i_byte
< size_byte
)
1783 int leading_code
= str
[i_byte
];
1785 if (handle_iso_classes
)
1787 const unsigned char *ch
= str
+ i_byte
;
1788 re_wctype_t cc
= re_wctype_parse (&ch
, size_byte
- i_byte
);
1790 error ("Invalid ISO C character class");
1793 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1799 if (leading_code
== '\\')
1801 if (++i_byte
== size_byte
)
1804 leading_code
= str
[i_byte
];
1806 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1810 /* Treat `-' as range character only if another character
1812 if (i_byte
+ 1 < size_byte
1813 && str
[i_byte
] == '-')
1815 int c2
, leading_code2
;
1817 /* Skip over the dash. */
1820 /* Get the end of the range. */
1821 leading_code2
= str
[i_byte
];
1822 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1826 && i_byte
< size_byte
)
1828 leading_code2
= str
[i_byte
];
1829 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1835 if (ASCII_CHAR_P (c
))
1837 while (c
<= c2
&& c
< 0x80)
1839 leading_code
= CHAR_LEADING_CODE (c
);
1841 if (! ASCII_CHAR_P (c
))
1843 int lim2
= leading_code2
+ 1;
1844 while (leading_code
< lim2
)
1845 fastmap
[leading_code
++] = 1;
1848 char_ranges
[n_char_ranges
++] = c
;
1849 char_ranges
[n_char_ranges
++] = c2
;
1855 if (ASCII_CHAR_P (c
))
1859 fastmap
[leading_code
] = 1;
1860 char_ranges
[n_char_ranges
++] = c
;
1861 char_ranges
[n_char_ranges
++] = c
;
1866 /* If the current range is unibyte and STRING contains non-ASCII
1867 chars, arrange fastmap for the corresponding unibyte
1870 if (! multibyte
&& n_char_ranges
> 0)
1872 memset (fastmap
+ 0200, 0, 0200);
1873 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1875 int c1
= char_ranges
[i
];
1876 int lim2
= char_ranges
[i
+ 1] + 1;
1878 for (; c1
< lim2
; c1
++)
1880 int b
= CHAR_TO_BYTE_SAFE (c1
);
1888 /* If ^ was the first character, complement the fastmap. */
1892 for (i
= 0; i
< sizeof fastmap
; i
++)
1896 for (i
= 0; i
< 0200; i
++)
1898 /* All non-ASCII chars possibly match. */
1899 for (; i
< sizeof fastmap
; i
++)
1905 ptrdiff_t start_point
= PT
;
1907 ptrdiff_t pos_byte
= PT_BYTE
;
1908 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
1912 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
1913 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
1917 endp
= CHAR_POS_ADDR (XINT (lim
));
1918 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
1921 /* This code may look up syntax tables using functions that rely on the
1922 gl_state object. To make sure this object is not out of date,
1923 let's initialize it manually.
1924 We ignore syntax-table text-properties for now, since that's
1925 what we've done in the past. */
1926 SETUP_BUFFER_SYNTAX_TABLE ();
1941 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
1942 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
1952 if (! ASCII_CHAR_P (c
))
1954 /* As we are looking at a multibyte character, we
1955 must look up the character in the table
1956 CHAR_RANGES. If there's no data in the table,
1957 that character is not what we want to skip. */
1959 /* The following code do the right thing even if
1960 n_char_ranges is zero (i.e. no data in
1962 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1963 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1965 if (!(negate
^ (i
< n_char_ranges
)))
1969 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
1983 if (!NILP (iso_classes
) && in_classes (*p
, iso_classes
))
1988 goto fwd_unibyte_ok
;
1994 p
++, pos
++, pos_byte
++;
2010 unsigned char *prev_p
= p
;
2013 while (stop
<= p
&& ! CHAR_HEAD_P (*p
));
2015 c
= STRING_CHAR (p
);
2017 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
2027 if (! ASCII_CHAR_P (c
))
2029 /* See the comment in the previous similar code. */
2030 for (i
= 0; i
< n_char_ranges
; i
+= 2)
2031 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
2033 if (!(negate
^ (i
< n_char_ranges
)))
2037 pos
--, pos_byte
-= prev_p
- p
;
2051 if (! NILP (iso_classes
) && in_classes (p
[-1], iso_classes
))
2056 goto back_unibyte_ok
;
2059 if (!fastmap
[p
[-1]])
2062 p
--, pos
--, pos_byte
--;
2067 SET_PT_BOTH (pos
, pos_byte
);
2070 return make_number (PT
- start_point
);
2076 skip_syntaxes (bool forwardp
, Lisp_Object string
, Lisp_Object lim
)
2079 unsigned char fastmap
[0400];
2081 ptrdiff_t i
, i_byte
;
2083 ptrdiff_t size_byte
;
2086 CHECK_STRING (string
);
2089 XSETINT (lim
, forwardp
? ZV
: BEGV
);
2091 CHECK_NUMBER_COERCE_MARKER (lim
);
2093 /* In any case, don't allow scan outside bounds of buffer. */
2094 if (XINT (lim
) > ZV
)
2095 XSETFASTINT (lim
, ZV
);
2096 if (XINT (lim
) < BEGV
)
2097 XSETFASTINT (lim
, BEGV
);
2099 if (forwardp
? (PT
>= XFASTINT (lim
)) : (PT
<= XFASTINT (lim
)))
2100 return make_number (0);
2102 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
2103 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
2105 memset (fastmap
, 0, sizeof fastmap
);
2107 if (SBYTES (string
) > SCHARS (string
))
2108 /* As this is very rare case (syntax spec is ASCII only), don't
2109 consider efficiency. */
2110 string
= string_make_unibyte (string
);
2112 str
= SDATA (string
);
2113 size_byte
= SBYTES (string
);
2116 if (i_byte
< size_byte
2117 && SREF (string
, 0) == '^')
2119 negate
= 1; i_byte
++;
2122 /* Find the syntaxes specified and set their elements of fastmap. */
2124 while (i_byte
< size_byte
)
2127 fastmap
[syntax_spec_code
[c
]] = 1;
2130 /* If ^ was the first character, complement the fastmap. */
2132 for (i
= 0; i
< sizeof fastmap
; i
++)
2136 ptrdiff_t start_point
= PT
;
2138 ptrdiff_t pos_byte
= PT_BYTE
;
2139 unsigned char *p
, *endp
, *stop
;
2141 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
2147 p
= BYTE_POS_ADDR (pos_byte
);
2148 endp
= XINT (lim
) == GPT
? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
2149 stop
= pos
< GPT
&& GPT
< XINT (lim
) ? GPT_ADDR
: endp
;
2163 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
2166 if (! fastmap
[SYNTAX (c
)])
2168 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
2171 while (!parse_sexp_lookup_properties
2172 || pos
< gl_state
.e_property
);
2174 update_syntax_table_forward (pos
+ gl_state
.offset
,
2175 false, gl_state
.object
);
2180 p
= BYTE_POS_ADDR (pos_byte
);
2181 endp
= CHAR_POS_ADDR (XINT (lim
));
2182 stop
= pos
>= GPT
&& GPT
> XINT (lim
) ? GAP_END_ADDR
: endp
;
2195 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2197 unsigned char *prev_p
= p
;
2200 while (stop
<= p
&& ! CHAR_HEAD_P (*p
));
2202 c
= STRING_CHAR (p
);
2203 if (! fastmap
[SYNTAX (c
)])
2205 pos
--, pos_byte
-= prev_p
- p
;
2220 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2221 if (! fastmap
[SYNTAX (p
[-1])])
2223 p
--, pos
--, pos_byte
--;
2230 SET_PT_BOTH (pos
, pos_byte
);
2232 return make_number (PT
- start_point
);
2236 /* Return true if character C belongs to one of the ISO classes
2237 in the list ISO_CLASSES. Each class is represented by an
2238 integer which is its type according to re_wctype. */
2241 in_classes (int c
, Lisp_Object iso_classes
)
2243 bool fits_class
= 0;
2245 while (CONSP (iso_classes
))
2248 elt
= XCAR (iso_classes
);
2249 iso_classes
= XCDR (iso_classes
);
2251 if (re_iswctype (c
, XFASTINT (elt
)))
2258 /* Jump over a comment, assuming we are at the beginning of one.
2259 FROM is the current position.
2260 FROM_BYTE is the bytepos corresponding to FROM.
2261 Do not move past STOP (a charpos).
2262 The comment over which we have to jump is of style STYLE
2263 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2264 NESTING should be positive to indicate the nesting at the beginning
2265 for nested comments and should be zero or negative else.
2266 ST_COMMENT_STYLE cannot be nested.
2267 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2268 (or 0 If the search cannot start in the middle of a two-character).
2270 If successful, return true and store the charpos of the comment's
2271 end into *CHARPOS_PTR and the corresponding bytepos into
2272 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2273 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2274 current nesting (as defined for state->incomment) in
2275 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2276 comment be a possible first character of a two character construct,
2277 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2278 we store Smax into *last_syntax_ptr.
2280 The comment end is the last character of the comment rather than the
2281 character just after the comment.
2283 Global syntax data is assumed to initially be valid for FROM and
2284 remains valid for forward search starting at the returned position. */
2287 forw_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
2288 EMACS_INT nesting
, int style
, int prev_syntax
,
2289 ptrdiff_t *charpos_ptr
, ptrdiff_t *bytepos_ptr
,
2290 EMACS_INT
*incomment_ptr
, int *last_syntax_ptr
)
2292 unsigned short int quit_count
= 0;
2294 enum syntaxcode code
;
2295 int syntax
, other_syntax
;
2297 if (nesting
<= 0) nesting
= -1;
2299 /* Enter the loop in the middle so that we find
2300 a 2-char comment ender if we start in the middle of it. */
2301 syntax
= prev_syntax
;
2302 code
= syntax
& 0xff;
2303 if (syntax
!= 0 && from
< stop
) goto forw_incomment
;
2309 *incomment_ptr
= nesting
;
2310 *charpos_ptr
= from
;
2311 *bytepos_ptr
= from_byte
;
2313 (code
== Sescape
|| code
== Scharquote
2314 || SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2316 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)))
2320 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2321 syntax
= SYNTAX_WITH_FLAGS (c
);
2322 code
= syntax
& 0xff;
2323 if (code
== Sendcomment
2324 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
2325 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ?
2326 (nesting
> 0 && --nesting
== 0) : nesting
< 0)
2327 && !(Vcomment_end_can_be_escaped
&& char_quoted (from
, from_byte
)))
2328 /* We have encountered a comment end of the same style
2329 as the comment sequence which began this comment
2332 if (code
== Scomment_fence
2333 && style
== ST_COMMENT_STYLE
)
2334 /* We have encountered a comment end of the same style
2335 as the comment sequence which began this comment
2340 && SYNTAX_FLAGS_COMMENT_NESTED (syntax
)
2341 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
)
2342 /* We have encountered a nested comment of the same style
2343 as the comment sequence which began this comment section. */
2345 INC_BOTH (from
, from_byte
);
2346 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2349 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2350 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2351 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2352 SYNTAX_FLAGS_COMEND_SECOND (other_syntax
))
2353 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, other_syntax
) == style
2354 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2355 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
))
2356 ? nesting
> 0 : nesting
< 0))
2358 syntax
= Smax
; /* So that "|#" (lisp) can not return
2359 the syntax of "#" in *last_syntax_ptr. */
2361 /* We have encountered a comment end of the same style
2362 as the comment sequence which began this comment section. */
2366 INC_BOTH (from
, from_byte
);
2367 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2372 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
2373 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2374 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2375 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
) == style
2376 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2377 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2378 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
)))
2379 /* We have encountered a nested comment of the same style
2380 as the comment sequence which began this comment section. */
2382 syntax
= Smax
; /* So that "#|#" isn't also a comment ender. */
2383 INC_BOTH (from
, from_byte
);
2384 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2388 rarely_quit (++quit_count
);
2390 *charpos_ptr
= from
;
2391 *bytepos_ptr
= from_byte
;
2392 *last_syntax_ptr
= Smax
; /* Any syntactic power the last byte had is
2397 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
2399 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2400 Stop scanning if we find something other than a comment or whitespace.
2401 Set point to where scanning stops.
2402 If COUNT comments are found as expected, with nothing except whitespace
2403 between them, return t; otherwise return nil. */)
2406 ptrdiff_t from
, from_byte
, stop
;
2408 enum syntaxcode code
;
2409 int comstyle
= 0; /* style of comment encountered */
2410 bool comnested
= 0; /* whether the comment is nestable or not */
2413 ptrdiff_t out_charpos
, out_bytepos
;
2416 unsigned short int quit_count
= 0;
2418 CHECK_NUMBER (count
);
2419 count1
= XINT (count
);
2420 stop
= count1
> 0 ? ZV
: BEGV
;
2423 from_byte
= PT_BYTE
;
2425 SETUP_SYNTAX_TABLE (from
, count1
);
2430 bool comstart_first
;
2431 int syntax
, other_syntax
;
2435 SET_PT_BOTH (from
, from_byte
);
2438 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2439 syntax
= SYNTAX_WITH_FLAGS (c
);
2441 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2442 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2443 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2444 INC_BOTH (from
, from_byte
);
2445 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2446 if (from
< stop
&& comstart_first
2447 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2448 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2449 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
)))
2451 /* We have encountered a comment start sequence and we
2452 are ignoring all text inside comments. We must record
2453 the comment style this sequence begins so that later,
2454 only a comment end of the same style actually ends
2455 the comment section. */
2457 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2458 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2459 INC_BOTH (from
, from_byte
);
2460 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2462 rarely_quit (++quit_count
);
2464 while (code
== Swhitespace
|| (code
== Sendcomment
&& c
== '\n'));
2466 if (code
== Scomment_fence
)
2467 comstyle
= ST_COMMENT_STYLE
;
2468 else if (code
!= Scomment
)
2470 DEC_BOTH (from
, from_byte
);
2471 SET_PT_BOTH (from
, from_byte
);
2474 /* We're at the start of a comment. */
2475 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
2476 &out_charpos
, &out_bytepos
, &dummy
, &dummy2
);
2477 from
= out_charpos
; from_byte
= out_bytepos
;
2480 SET_PT_BOTH (from
, from_byte
);
2483 INC_BOTH (from
, from_byte
);
2484 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2485 /* We have skipped one comment. */
2495 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
2499 DEC_BOTH (from
, from_byte
);
2500 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2501 bool quoted
= char_quoted (from
, from_byte
);
2502 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2503 int syntax
= SYNTAX_WITH_FLAGS (c
);
2506 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2507 if (code
== Sendcomment
)
2508 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2509 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2510 && prev_char_comend_first (from
, from_byte
)
2511 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
2514 /* We must record the comment style encountered so that
2515 later, we can match only the proper comment begin
2516 sequence of the same style. */
2517 DEC_BOTH (from
, from_byte
);
2519 /* Calling char_quoted, above, set up global syntax position
2520 at the new value of FROM. */
2521 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2522 other_syntax
= SYNTAX_WITH_FLAGS (c1
);
2523 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2524 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2527 if (code
== Scomment_fence
)
2529 /* Skip until first preceding unquoted comment_fence. */
2530 bool fence_found
= 0;
2531 ptrdiff_t ini
= from
, ini_byte
= from_byte
;
2535 DEC_BOTH (from
, from_byte
);
2536 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2537 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2538 if (SYNTAX (c
) == Scomment_fence
2539 && !char_quoted (from
, from_byte
))
2544 else if (from
== stop
)
2546 rarely_quit (++quit_count
);
2548 if (fence_found
== 0)
2550 from
= ini
; /* Set point to ini + 1. */
2551 from_byte
= ini_byte
;
2555 /* We have skipped one comment. */
2558 else if (code
== Sendcomment
)
2560 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2561 &out_charpos
, &out_bytepos
);
2565 /* This end-of-line is not an end-of-comment.
2566 Treat it like a whitespace.
2567 CC-mode (and maybe others) relies on this behavior. */
2571 /* Failure: we should go back to the end of this
2572 not-quite-endcomment. */
2573 if (SYNTAX (c
) != code
)
2574 /* It was a two-char Sendcomment. */
2575 INC_BOTH (from
, from_byte
);
2581 /* We have skipped one comment. */
2582 from
= out_charpos
, from_byte
= out_bytepos
;
2586 else if (code
!= Swhitespace
|| quoted
)
2589 INC_BOTH (from
, from_byte
);
2590 SET_PT_BOTH (from
, from_byte
);
2594 rarely_quit (++quit_count
);
2600 SET_PT_BOTH (from
, from_byte
);
2604 /* Return syntax code of character C if C is an ASCII character
2605 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2607 static enum syntaxcode
2608 syntax_multibyte (int c
, bool multibyte_symbol_p
)
2610 return ASCII_CHAR_P (c
) || !multibyte_symbol_p
? SYNTAX (c
) : Ssymbol
;
2614 scan_lists (EMACS_INT from
, EMACS_INT count
, EMACS_INT depth
, bool sexpflag
)
2617 ptrdiff_t stop
= count
> 0 ? ZV
: BEGV
;
2622 enum syntaxcode code
;
2623 EMACS_INT min_depth
= depth
; /* Err out if depth gets less than this. */
2624 int comstyle
= 0; /* Style of comment encountered. */
2625 bool comnested
= 0; /* Whether the comment is nestable or not. */
2627 EMACS_INT last_good
= from
;
2629 ptrdiff_t from_byte
;
2630 ptrdiff_t out_bytepos
, out_charpos
;
2633 bool multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
2634 unsigned short int quit_count
= 0;
2636 if (depth
> 0) min_depth
= 0;
2638 if (from
> ZV
) from
= ZV
;
2639 if (from
< BEGV
) from
= BEGV
;
2641 from_byte
= CHAR_TO_BYTE (from
);
2645 SETUP_SYNTAX_TABLE (from
, count
);
2650 rarely_quit (++quit_count
);
2651 bool comstart_first
, prefix
;
2652 int syntax
, other_syntax
;
2653 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2654 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2655 syntax
= SYNTAX_WITH_FLAGS (c
);
2656 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2657 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2658 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2659 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2660 prefix
= SYNTAX_FLAGS_PREFIX (syntax
);
2661 if (depth
== min_depth
)
2663 INC_BOTH (from
, from_byte
);
2664 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2665 if (from
< stop
&& comstart_first
2666 && (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2667 other_syntax
= SYNTAX_WITH_FLAGS (c
),
2668 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2669 && parse_sexp_ignore_comments
)
2671 /* We have encountered a comment start sequence and we
2672 are ignoring all text inside comments. We must record
2673 the comment style this sequence begins so that later,
2674 only a comment end of the same style actually ends
2675 the comment section. */
2677 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2678 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2679 INC_BOTH (from
, from_byte
);
2680 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2692 INC_BOTH (from
, from_byte
);
2693 /* Treat following character as a word constituent. */
2697 if (depth
|| !sexpflag
) break;
2698 /* This word counts as a sexp; return at end of it. */
2701 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2703 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2704 switch (syntax_multibyte (c
, multibyte_symbol_p
))
2708 INC_BOTH (from
, from_byte
);
2719 INC_BOTH (from
, from_byte
);
2720 rarely_quit (++quit_count
);
2724 case Scomment_fence
:
2725 comstyle
= ST_COMMENT_STYLE
;
2728 if (!parse_sexp_ignore_comments
) break;
2729 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2730 found
= forw_comment (from
, from_byte
, stop
,
2731 comnested
, comstyle
, 0,
2732 &out_charpos
, &out_bytepos
, &dummy
,
2734 from
= out_charpos
, from_byte
= out_bytepos
;
2741 INC_BOTH (from
, from_byte
);
2742 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2748 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (from_byte
))
2750 INC_BOTH (from
, from_byte
);
2760 if (!++depth
) goto done
;
2765 if (!--depth
) goto done
;
2766 if (depth
< min_depth
)
2767 xsignal3 (Qscan_error
,
2768 build_string ("Containing expression ends prematurely"),
2769 make_number (last_good
), make_number (from
));
2774 temp_pos
= dec_bytepos (from_byte
);
2775 stringterm
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2778 enum syntaxcode c_code
;
2781 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2782 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2783 c_code
= syntax_multibyte (c
, multibyte_symbol_p
);
2785 ? c
== stringterm
&& c_code
== Sstring
2786 : c_code
== Sstring_fence
)
2789 if (c_code
== Scharquote
|| c_code
== Sescape
)
2790 INC_BOTH (from
, from_byte
);
2791 INC_BOTH (from
, from_byte
);
2792 rarely_quit (++quit_count
);
2794 INC_BOTH (from
, from_byte
);
2795 if (!depth
&& sexpflag
) goto done
;
2798 /* Ignore whitespace, punctuation, quote, endcomment. */
2803 /* Reached end of buffer. Error if within object, return nil if between */
2809 /* End of object reached */
2818 rarely_quit (++quit_count
);
2819 DEC_BOTH (from
, from_byte
);
2820 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2821 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2822 int syntax
= SYNTAX_WITH_FLAGS (c
);
2823 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2824 if (depth
== min_depth
)
2827 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2828 if (code
== Sendcomment
)
2829 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2830 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2831 && prev_char_comend_first (from
, from_byte
)
2832 && parse_sexp_ignore_comments
)
2834 /* We must record the comment style encountered so that
2835 later, we can match only the proper comment begin
2836 sequence of the same style. */
2837 int c2
, other_syntax
;
2838 DEC_BOTH (from
, from_byte
);
2839 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2841 c2
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2842 other_syntax
= SYNTAX_WITH_FLAGS (c2
);
2843 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2844 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2847 /* Quoting turns anything except a comment-ender
2848 into a word character. Note that this cannot be true
2849 if we decremented FROM in the if-statement above. */
2850 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
2852 DEC_BOTH (from
, from_byte
);
2855 else if (SYNTAX_FLAGS_PREFIX (syntax
))
2864 if (depth
|| !sexpflag
) break;
2865 /* This word counts as a sexp; count object finished
2866 after passing it. */
2869 temp_pos
= from_byte
;
2870 if (! NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
2874 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2875 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2876 /* Don't allow comment-end to be quoted. */
2877 if (syntax_multibyte (c1
, multibyte_symbol_p
) == Sendcomment
)
2879 quoted
= char_quoted (from
- 1, temp_pos
);
2882 DEC_BOTH (from
, from_byte
);
2883 temp_pos
= dec_bytepos (temp_pos
);
2884 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2886 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2888 switch (syntax_multibyte (c1
, multibyte_symbol_p
))
2890 case Sword
: case Ssymbol
: case Squote
: break;
2891 default: goto done2
;
2893 DEC_BOTH (from
, from_byte
);
2894 rarely_quit (++quit_count
);
2903 temp_pos
= dec_bytepos (from_byte
);
2904 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2905 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (temp_pos
))
2906 DEC_BOTH (from
, from_byte
);
2916 if (!++depth
) goto done2
;
2921 if (!--depth
) goto done2
;
2922 if (depth
< min_depth
)
2923 xsignal3 (Qscan_error
,
2924 build_string ("Containing expression ends prematurely"),
2925 make_number (last_good
), make_number (from
));
2929 if (!parse_sexp_ignore_comments
)
2931 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2932 &out_charpos
, &out_bytepos
);
2933 /* FIXME: if !found, it really wasn't a comment-end.
2934 For single-char Sendcomment, we can't do much about it apart
2935 from skipping the char.
2936 For 2-char endcomments, we could try again, taking both
2937 chars as separate entities, but it's a lot of trouble
2938 for very little gain, so we don't bother either. -sm */
2940 from
= out_charpos
, from_byte
= out_bytepos
;
2943 case Scomment_fence
:
2949 DEC_BOTH (from
, from_byte
);
2950 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2951 if (!char_quoted (from
, from_byte
))
2953 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2954 if (syntax_multibyte (c
, multibyte_symbol_p
) == code
)
2957 rarely_quit (++quit_count
);
2959 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
2963 stringterm
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2968 DEC_BOTH (from
, from_byte
);
2969 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2970 if (!char_quoted (from
, from_byte
))
2972 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2974 && (syntax_multibyte (c
, multibyte_symbol_p
)
2978 rarely_quit (++quit_count
);
2980 if (!depth
&& sexpflag
) goto done2
;
2983 /* Ignore whitespace, punctuation, quote, endcomment. */
2988 /* Reached start of buffer. Error if within object, return nil if between */
2999 XSETFASTINT (val
, from
);
3003 xsignal3 (Qscan_error
,
3004 build_string ("Unbalanced parentheses"),
3005 make_number (last_good
), make_number (from
));
3008 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
3009 doc
: /* Scan from character number FROM by COUNT lists.
3010 Scan forward if COUNT is positive, backward if COUNT is negative.
3011 Return the character number of the position thus found.
3013 A \"list", in this context, refers to a balanced parenthetical
3014 grouping, as determined by the syntax table.
3016 If DEPTH is nonzero, treat that as the nesting depth of the starting
3017 point (i.e. the starting point is DEPTH parentheses deep). This
3018 function scans over parentheses until the depth goes to zero COUNT
3019 times. Hence, positive DEPTH moves out that number of levels of
3020 parentheses, while negative DEPTH moves to a deeper level.
3022 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3024 If we reach the beginning or end of the accessible part of the buffer
3025 before we have scanned over COUNT lists, return nil if the depth at
3026 that point is zero, and signal a error if the depth is nonzero. */)
3027 (Lisp_Object from
, Lisp_Object count
, Lisp_Object depth
)
3029 CHECK_NUMBER (from
);
3030 CHECK_NUMBER (count
);
3031 CHECK_NUMBER (depth
);
3033 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
3036 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
3037 doc
: /* Scan from character number FROM by COUNT balanced expressions.
3038 If COUNT is negative, scan backwards.
3039 Returns the character number of the position thus found.
3041 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3043 If the beginning or end of (the accessible part of) the buffer is reached
3044 in the middle of a parenthetical grouping, an error is signaled.
3045 If the beginning or end is reached between groupings
3046 but before count is used up, nil is returned. */)
3047 (Lisp_Object from
, Lisp_Object count
)
3049 CHECK_NUMBER (from
);
3050 CHECK_NUMBER (count
);
3052 return scan_lists (XINT (from
), XINT (count
), 0, 1);
3055 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
3057 doc
: /* Move point backward over any number of chars with prefix syntax.
3058 This includes chars with expression prefix syntax class (\\=') and those with
3059 the prefix syntax flag (p). */)
3062 ptrdiff_t beg
= BEGV
;
3063 ptrdiff_t opoint
= PT
;
3064 ptrdiff_t opoint_byte
= PT_BYTE
;
3066 ptrdiff_t pos_byte
= PT_BYTE
;
3071 SET_PT_BOTH (opoint
, opoint_byte
);
3076 SETUP_SYNTAX_TABLE (pos
, -1);
3078 DEC_BOTH (pos
, pos_byte
);
3080 while (!char_quoted (pos
, pos_byte
)
3081 /* Previous statement updates syntax table. */
3082 && ((c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
), SYNTAX (c
) == Squote
)
3083 || syntax_prefix_flag_p (c
)))
3086 opoint_byte
= pos_byte
;
3090 DEC_BOTH (pos
, pos_byte
);
3094 SET_PT_BOTH (opoint
, opoint_byte
);
3100 /* If the character at FROM_BYTE is the second part of a 2-character
3101 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3104 in_2char_comment_start (struct lisp_parse_state
*state
,
3105 int prev_from_syntax
,
3106 ptrdiff_t prev_from
,
3107 ptrdiff_t from_byte
)
3110 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
)
3111 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
3112 syntax
= SYNTAX_WITH_FLAGS (c1
),
3113 SYNTAX_FLAGS_COMSTART_SECOND (syntax
)))
3115 /* Record the comment style we have entered so that only
3116 the comment-end sequence of the same style actually
3117 terminates the comment section. */
3119 = SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_from_syntax
);
3120 bool comnested
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
)
3121 | SYNTAX_FLAGS_COMMENT_NESTED (syntax
));
3122 state
->incomment
= comnested
? 1 : -1;
3123 state
->comstr_start
= prev_from
;
3129 /* Parse forward from FROM / FROM_BYTE to END,
3130 assuming that FROM has state STATE,
3131 and return a description of the state of the parse at END.
3132 If STOPBEFORE, stop at the start of an atom.
3133 If COMMENTSTOP is 1, stop at the start of a comment.
3134 If COMMENTSTOP is -1, stop at the start or end of a comment,
3135 after the beginning of a string, or after the end of a string. */
3138 scan_sexps_forward (struct lisp_parse_state
*state
,
3139 ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t end
,
3140 EMACS_INT targetdepth
, bool stopbefore
,
3143 enum syntaxcode code
;
3144 struct level
{ ptrdiff_t last
, prev
; };
3145 struct level levelstart
[100];
3146 struct level
*curlevel
= levelstart
;
3147 struct level
*endlevel
= levelstart
+ 100;
3148 EMACS_INT depth
; /* Paren depth of current scanning location.
3149 level - levelstart equals this except
3150 when the depth becomes negative. */
3151 EMACS_INT mindepth
; /* Lowest DEPTH value seen. */
3152 bool start_quoted
= 0; /* True means starting after a char quote. */
3154 ptrdiff_t prev_from
; /* Keep one character before FROM. */
3155 ptrdiff_t prev_from_byte
;
3156 int prev_from_syntax
, prev_prev_from_syntax
;
3157 bool boundary_stop
= commentstop
== -1;
3160 ptrdiff_t out_bytepos
, out_charpos
;
3162 unsigned short int quit_count
= 0;
3165 prev_from_byte
= from_byte
;
3167 DEC_BOTH (prev_from
, prev_from_byte
);
3169 /* Use this macro instead of `from++'. */
3171 do { prev_from = from; \
3172 prev_from_byte = from_byte; \
3173 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3174 prev_prev_from_syntax = prev_from_syntax; \
3175 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3176 INC_BOTH (from, from_byte); \
3178 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3183 depth
= state
->depth
;
3184 start_quoted
= state
->quoted
;
3185 prev_prev_from_syntax
= Smax
;
3186 prev_from_syntax
= state
->prev_syntax
;
3188 tem
= state
->levelstarts
;
3189 while (!NILP (tem
)) /* >= second enclosing sexps. */
3191 Lisp_Object temhd
= Fcar (tem
);
3192 if (RANGED_INTEGERP (PTRDIFF_MIN
, temhd
, PTRDIFF_MAX
))
3193 curlevel
->last
= XINT (temhd
);
3194 if (++curlevel
== endlevel
)
3195 curlevel
--; /* error ("Nesting too deep for parser"); */
3196 curlevel
->prev
= -1;
3197 curlevel
->last
= -1;
3200 curlevel
->prev
= -1;
3201 curlevel
->last
= -1;
3206 SETUP_SYNTAX_TABLE (from
, 1);
3208 /* Enter the loop at a place appropriate for initial state. */
3210 if (state
->incomment
)
3211 goto startincomment
;
3212 if (state
->instring
>= 0)
3214 nofence
= state
->instring
!= ST_STRING_STYLE
;
3216 goto startquotedinstring
;
3219 else if (start_quoted
)
3221 else if ((from
< end
)
3222 && (in_2char_comment_start (state
, prev_from_syntax
,
3223 prev_from
, from_byte
)))
3226 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3232 rarely_quit (++quit_count
);
3236 && (in_2char_comment_start (state
, prev_from_syntax
,
3237 prev_from
, from_byte
)))
3240 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3244 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
3246 code
= prev_from_syntax
& 0xff;
3251 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3252 curlevel
->last
= prev_from
;
3254 if (from
== end
) goto endquoted
;
3257 /* treat following character as a word constituent */
3260 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3261 curlevel
->last
= prev_from
;
3265 if (in_2char_comment_start (state
, prev_from_syntax
,
3266 prev_from
, from_byte
))
3269 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3273 int symchar
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3274 switch (SYNTAX (symchar
))
3279 if (from
== end
) goto endquoted
;
3289 rarely_quit (++quit_count
);
3292 curlevel
->prev
= curlevel
->last
;
3295 case Scomment_fence
:
3296 /* Record the comment style we have entered so that only
3297 the comment-end sequence of the same style actually
3298 terminates the comment section. */
3299 state
->comstyle
= ST_COMMENT_STYLE
;
3300 state
->incomment
= -1;
3301 state
->comstr_start
= prev_from
;
3304 state
->comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax
, 0);
3305 state
->incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
3307 state
->comstr_start
= prev_from
;
3309 if (commentstop
|| boundary_stop
) goto done
;
3311 /* The (from == BEGV) test was to enter the loop in the middle so
3312 that we find a 2-char comment ender even if we start in the
3313 middle of it. We don't want to do that if we're just at the
3314 beginning of the comment (think of (*) ... (*)). */
3315 found
= forw_comment (from
, from_byte
, end
,
3316 state
->incomment
, state
->comstyle
,
3317 from
== BEGV
? 0 : prev_from_syntax
,
3318 &out_charpos
, &out_bytepos
, &state
->incomment
,
3320 from
= out_charpos
; from_byte
= out_bytepos
;
3321 /* Beware! prev_from and friends (except prev_from_syntax)
3322 are invalid now. Luckily, the `done' doesn't use them
3323 and the INC_FROM sets them to a sane value without
3325 if (!found
) goto done
;
3327 state
->incomment
= 0;
3328 state
->comstyle
= 0; /* reset the comment style */
3329 prev_from_syntax
= Smax
; /* For the comment closer */
3330 if (boundary_stop
) goto done
;
3334 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3336 /* curlevel++->last ran into compiler bug on Apollo */
3337 curlevel
->last
= prev_from
;
3338 if (++curlevel
== endlevel
)
3339 curlevel
--; /* error ("Nesting too deep for parser"); */
3340 curlevel
->prev
= -1;
3341 curlevel
->last
= -1;
3342 if (targetdepth
== depth
) goto done
;
3347 if (depth
< mindepth
)
3349 if (curlevel
!= levelstart
)
3351 curlevel
->prev
= curlevel
->last
;
3352 if (targetdepth
== depth
) goto done
;
3357 state
->comstr_start
= from
- 1;
3358 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3359 curlevel
->last
= prev_from
;
3360 state
->instring
= (code
== Sstring
3361 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte
))
3363 if (boundary_stop
) goto done
;
3366 nofence
= state
->instring
!= ST_STRING_STYLE
;
3371 enum syntaxcode c_code
;
3373 if (from
>= end
) goto done
;
3374 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3375 c_code
= SYNTAX (c
);
3377 /* Check C_CODE here so that if the char has
3378 a syntax-table property which says it is NOT
3379 a string character, it does not end the string. */
3380 if (nofence
&& c
== state
->instring
&& c_code
== Sstring
)
3386 if (!nofence
) goto string_end
;
3392 startquotedinstring
:
3393 if (from
>= end
) goto endquoted
;
3400 rarely_quit (++quit_count
);
3404 state
->instring
= -1;
3405 curlevel
->prev
= curlevel
->last
;
3407 if (boundary_stop
) goto done
;
3411 /* FIXME: We should do something with it. */
3414 /* Ignore whitespace, punctuation, quote, endcomment. */
3420 stop
: /* Here if stopping before start of sexp. */
3421 from
= prev_from
; /* We have just fetched the char that starts it; */
3422 from_byte
= prev_from_byte
;
3423 prev_from_syntax
= prev_prev_from_syntax
;
3424 goto done
; /* but return the position before it. */
3429 state
->depth
= depth
;
3430 state
->mindepth
= mindepth
;
3431 state
->thislevelstart
= curlevel
->prev
;
3432 state
->prevlevelstart
3433 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
3434 state
->location
= from
;
3435 state
->location_byte
= from_byte
;
3436 state
->levelstarts
= Qnil
;
3437 while (curlevel
> levelstart
)
3438 state
->levelstarts
= Fcons (make_number ((--curlevel
)->last
),
3439 state
->levelstarts
);
3440 state
->prev_syntax
= (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax
)
3441 || state
->quoted
) ? prev_from_syntax
: Smax
;
3444 /* Convert a (lisp) parse state to the internal form used in
3445 scan_sexps_forward. */
3447 internalize_parse_state (Lisp_Object external
, struct lisp_parse_state
*state
)
3451 if (NILP (external
))
3454 state
->instring
= -1;
3455 state
->incomment
= 0;
3457 state
->comstyle
= 0; /* comment style a by default. */
3458 state
->comstr_start
= -1; /* no comment/string seen. */
3459 state
->levelstarts
= Qnil
;
3460 state
->prev_syntax
= Smax
;
3464 tem
= Fcar (external
);
3466 state
->depth
= XINT (tem
);
3470 external
= Fcdr (external
);
3471 external
= Fcdr (external
);
3472 external
= Fcdr (external
);
3473 tem
= Fcar (external
);
3474 /* Check whether we are inside string_fence-style string: */
3475 state
->instring
= (!NILP (tem
)
3476 ? (CHARACTERP (tem
) ? XFASTINT (tem
) : ST_STRING_STYLE
)
3479 external
= Fcdr (external
);
3480 tem
= Fcar (external
);
3481 state
->incomment
= (!NILP (tem
)
3482 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
3485 external
= Fcdr (external
);
3486 tem
= Fcar (external
);
3487 state
->quoted
= !NILP (tem
);
3489 /* if the eighth element of the list is nil, we are in comment
3490 style a. If it is non-nil, we are in comment style b */
3491 external
= Fcdr (external
);
3492 external
= Fcdr (external
);
3493 tem
= Fcar (external
);
3494 state
->comstyle
= (NILP (tem
)
3496 : (RANGED_INTEGERP (0, tem
, ST_COMMENT_STYLE
)
3498 : ST_COMMENT_STYLE
));
3500 external
= Fcdr (external
);
3501 tem
= Fcar (external
);
3502 state
->comstr_start
=
3503 RANGED_INTEGERP (PTRDIFF_MIN
, tem
, PTRDIFF_MAX
) ? XINT (tem
) : -1;
3504 external
= Fcdr (external
);
3505 tem
= Fcar (external
);
3506 state
->levelstarts
= tem
;
3508 external
= Fcdr (external
);
3509 tem
= Fcar (external
);
3510 state
->prev_syntax
= NILP (tem
) ? Smax
: XINT (tem
);
3514 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
3515 doc
: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3516 Parsing stops at TO or when certain criteria are met;
3517 point is set to where parsing stops.
3518 If fifth arg OLDSTATE is omitted or nil,
3519 parsing assumes that FROM is the beginning of a function.
3521 Value is a list of elements describing final state of parsing:
3523 1. character address of start of innermost containing list; nil if none.
3524 2. character address of start of last complete sexp terminated.
3525 3. non-nil if inside a string.
3526 (it is the character that will terminate the string,
3527 or t if the string should be terminated by a generic string delimiter.)
3528 4. nil if outside a comment, t if inside a non-nestable comment,
3529 else an integer (the current comment nesting).
3530 5. t if following a quote character.
3531 6. the minimum paren-depth encountered during this scan.
3532 7. style of comment, if any.
3533 8. character address of start of comment or string; nil if not in one.
3534 9. List of positions of currently open parens, outermost first.
3535 10. When the last position scanned holds the first character of a
3536 (potential) two character construct, the syntax of that position,
3537 otherwise nil. That construct can be a two character comment
3538 delimiter or an Escaped or Char-quoted character.
3539 11..... Possible further internal information used by `parse-partial-sexp'.
3541 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3542 in parentheses becomes equal to TARGETDEPTH.
3543 Fourth arg STOPBEFORE non-nil means stop when we come to
3544 any character that starts a sexp.
3545 Fifth arg OLDSTATE is a list like what this function returns.
3546 It is used to initialize the state of the parse. Elements number 1, 2, 6
3548 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3549 If it is the symbol `syntax-table', stop after the start of a comment or a
3550 string, or after end of a comment or a string. */)
3551 (Lisp_Object from
, Lisp_Object to
, Lisp_Object targetdepth
,
3552 Lisp_Object stopbefore
, Lisp_Object oldstate
, Lisp_Object commentstop
)
3554 struct lisp_parse_state state
;
3557 if (!NILP (targetdepth
))
3559 CHECK_NUMBER (targetdepth
);
3560 target
= XINT (targetdepth
);
3563 target
= TYPE_MINIMUM (EMACS_INT
); /* We won't reach this depth. */
3565 validate_region (&from
, &to
);
3566 internalize_parse_state (oldstate
, &state
);
3567 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
3569 target
, !NILP (stopbefore
),
3571 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
3573 SET_PT_BOTH (state
.location
, state
.location_byte
);
3576 Fcons (make_number (state
.depth
),
3577 Fcons (state
.prevlevelstart
< 0
3578 ? Qnil
: make_number (state
.prevlevelstart
),
3579 Fcons (state
.thislevelstart
< 0
3580 ? Qnil
: make_number (state
.thislevelstart
),
3581 Fcons (state
.instring
>= 0
3582 ? (state
.instring
== ST_STRING_STYLE
3583 ? Qt
: make_number (state
.instring
)) : Qnil
,
3584 Fcons (state
.incomment
< 0 ? Qt
:
3585 (state
.incomment
== 0 ? Qnil
:
3586 make_number (state
.incomment
)),
3587 Fcons (state
.quoted
? Qt
: Qnil
,
3588 Fcons (make_number (state
.mindepth
),
3589 Fcons ((state
.comstyle
3590 ? (state
.comstyle
== ST_COMMENT_STYLE
3592 : make_number (state
.comstyle
))
3594 Fcons (((state
.incomment
3595 || (state
.instring
>= 0))
3596 ? make_number (state
.comstr_start
)
3598 Fcons (state
.levelstarts
,
3599 Fcons (state
.prev_syntax
== Smax
3601 : make_number (state
.prev_syntax
),
3606 init_syntax_once (void)
3611 /* This has to be done here, before we call Fmake_char_table. */
3612 DEFSYM (Qsyntax_table
, "syntax-table");
3614 /* Create objects which can be shared among syntax tables. */
3615 Vsyntax_code_object
= make_uninit_vector (Smax
);
3616 for (i
= 0; i
< Smax
; i
++)
3617 ASET (Vsyntax_code_object
, i
, Fcons (make_number (i
), Qnil
));
3619 /* Now we are ready to set up this property, so we can
3620 create syntax tables. */
3621 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
3623 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3625 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
3627 /* Control characters should not be whitespace. */
3628 temp
= AREF (Vsyntax_code_object
, Spunct
);
3629 for (i
= 0; i
<= ' ' - 1; i
++)
3630 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3631 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 0177, temp
);
3633 /* Except that a few really are whitespace. */
3634 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3635 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ' ', temp
);
3636 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\t', temp
);
3637 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\n', temp
);
3638 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 015, temp
);
3639 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 014, temp
);
3641 temp
= AREF (Vsyntax_code_object
, Sword
);
3642 for (i
= 'a'; i
<= 'z'; i
++)
3643 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3644 for (i
= 'A'; i
<= 'Z'; i
++)
3645 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3646 for (i
= '0'; i
<= '9'; i
++)
3647 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3649 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
3650 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
3652 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
3653 Fcons (make_number (Sopen
), make_number (')')));
3654 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
3655 Fcons (make_number (Sclose
), make_number ('(')));
3656 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
3657 Fcons (make_number (Sopen
), make_number (']')));
3658 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
3659 Fcons (make_number (Sclose
), make_number ('[')));
3660 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
3661 Fcons (make_number (Sopen
), make_number ('}')));
3662 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
3663 Fcons (make_number (Sclose
), make_number ('{')));
3664 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
3665 Fcons (make_number (Sstring
), Qnil
));
3666 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
3667 Fcons (make_number (Sescape
), Qnil
));
3669 temp
= AREF (Vsyntax_code_object
, Ssymbol
);
3670 for (i
= 0; i
< 10; i
++)
3672 c
= "_-+*/&|<>="[i
];
3673 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3676 temp
= AREF (Vsyntax_code_object
, Spunct
);
3677 for (i
= 0; i
< 12; i
++)
3679 c
= ".,;:?!#@~^'`"[i
];
3680 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3683 /* All multibyte characters have syntax `word' by default. */
3684 temp
= AREF (Vsyntax_code_object
, Sword
);
3685 char_table_set_range (Vstandard_syntax_table
, 0x80, MAX_CHAR
, temp
);
3689 syms_of_syntax (void)
3691 DEFSYM (Qsyntax_table_p
, "syntax-table-p");
3693 staticpro (&Vsyntax_code_object
);
3695 staticpro (&gl_state
.object
);
3696 staticpro (&gl_state
.global_code
);
3697 staticpro (&gl_state
.current_syntax_table
);
3698 staticpro (&gl_state
.old_prop
);
3700 /* Defined in regex.c. */
3701 staticpro (&re_match_object
);
3703 DEFSYM (Qscan_error
, "scan-error");
3704 Fput (Qscan_error
, Qerror_conditions
,
3705 listn (CONSTYPE_PURE
, 2, Qscan_error
, Qerror
));
3706 Fput (Qscan_error
, Qerror_message
,
3707 build_pure_c_string ("Scan error"));
3709 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments
,
3710 doc
: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3712 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties
,
3713 doc
: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3714 Otherwise, that text property is simply ignored.
3715 See the info node `(elisp)Syntax Properties' for a description of the
3716 `syntax-table' property. */);
3718 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done
,
3719 doc
: /* Position up to which syntax-table properties have been set. */);
3720 syntax_propertize__done
= -1;
3721 DEFSYM (Qinternal__syntax_propertize
, "internal--syntax-propertize");
3722 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3724 words_include_escapes
= 0;
3725 DEFVAR_BOOL ("words-include-escapes", words_include_escapes
,
3726 doc
: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3728 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol
,
3729 doc
: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3730 multibyte_syntax_as_symbol
= 0;
3732 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3733 open_paren_in_column_0_is_defun_start
,
3734 doc
: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3735 open_paren_in_column_0_is_defun_start
= 1;
3738 DEFVAR_LISP ("find-word-boundary-function-table",
3739 Vfind_word_boundary_function_table
,
3741 Char table of functions to search for the word boundary.
3742 Each function is called with two arguments; POS and LIMIT.
3743 POS and LIMIT are character positions in the current buffer.
3745 If POS is less than LIMIT, POS is at the first character of a word,
3746 and the return value of a function should be a position after the
3747 last character of that word.
3749 If POS is not less than LIMIT, POS is at the last character of a word,
3750 and the return value of a function should be a position at the first
3751 character of that word.
3753 In both cases, LIMIT bounds the search. */);
3754 Vfind_word_boundary_function_table
= Fmake_char_table (Qnil
, Qnil
);
3756 DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped
,
3757 doc
: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
3758 Vcomment_end_can_be_escaped
= 0;
3759 DEFSYM (Qcomment_end_can_be_escaped
, "comment-end-can-be-escaped");
3760 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped
);
3762 defsubr (&Ssyntax_table_p
);
3763 defsubr (&Ssyntax_table
);
3764 defsubr (&Sstandard_syntax_table
);
3765 defsubr (&Scopy_syntax_table
);
3766 defsubr (&Sset_syntax_table
);
3767 defsubr (&Schar_syntax
);
3768 defsubr (&Smatching_paren
);
3769 defsubr (&Sstring_to_syntax
);
3770 defsubr (&Smodify_syntax_entry
);
3771 defsubr (&Sinternal_describe_syntax_value
);
3773 defsubr (&Sforward_word
);
3775 defsubr (&Sskip_chars_forward
);
3776 defsubr (&Sskip_chars_backward
);
3777 defsubr (&Sskip_syntax_forward
);
3778 defsubr (&Sskip_syntax_backward
);
3780 defsubr (&Sforward_comment
);
3781 defsubr (&Sscan_lists
);
3782 defsubr (&Sscan_sexps
);
3783 defsubr (&Sbackward_prefix_chars
);
3784 defsubr (&Sparse_partial_sexp
);