1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 #include "character.h"
32 /* Make syntax table lookup grant data in gl_state. */
33 #define SYNTAX_ENTRY_VIA_PROPERTY
36 #include "intervals.h"
38 /* We use these constants in place for comment-style and
39 string-ender-char to distinguish comments/strings started by
40 comment_fence and string_fence codes. */
42 #define ST_COMMENT_STYLE (256 + 1)
43 #define ST_STRING_STYLE (256 + 2)
46 Lisp_Object Qsyntax_table_p
, Qsyntax_table
, Qscan_error
;
48 int words_include_escapes
;
49 int parse_sexp_lookup_properties
;
51 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
52 int multibyte_syntax_as_symbol
;
54 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
55 if not compiled with GCC. No need to mark it, since it is used
56 only very temporarily. */
57 Lisp_Object syntax_temp
;
59 /* Non-zero means an open parenthesis in column 0 is always considered
60 to be the start of a defun. Zero means an open parenthesis in
61 column 0 has no special meaning. */
63 int open_paren_in_column_0_is_defun_start
;
65 /* This is the internal form of the parse state used in parse-partial-sexp. */
67 struct lisp_parse_state
69 int depth
; /* Depth at end of parsing. */
70 int instring
; /* -1 if not within string, else desired terminator. */
71 int incomment
; /* -1 if in unnestable comment else comment nesting */
72 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
73 int quoted
; /* Nonzero if just after an escape char at end of parsing */
74 int mindepth
; /* Minimum depth seen while scanning. */
75 /* Char number of most recent start-of-expression at current level */
76 EMACS_INT thislevelstart
;
77 /* Char number of start of containing expression */
78 EMACS_INT prevlevelstart
;
79 EMACS_INT location
; /* Char number at which parsing stopped. */
80 EMACS_INT comstr_start
; /* Position of last comment/string starter. */
81 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
82 of levels (starting from outermost). */
85 /* These variables are a cache for finding the start of a defun.
86 find_start_pos is the place for which the defun start was found.
87 find_start_value is the defun start position found for it.
88 find_start_value_byte is the corresponding byte position.
89 find_start_buffer is the buffer it was found in.
90 find_start_begv is the BEGV value when it was found.
91 find_start_modiff is the value of MODIFF when it was found. */
93 static EMACS_INT find_start_pos
;
94 static EMACS_INT find_start_value
;
95 static EMACS_INT find_start_value_byte
;
96 static struct buffer
*find_start_buffer
;
97 static EMACS_INT find_start_begv
;
98 static int find_start_modiff
;
101 static Lisp_Object
skip_chars (int, Lisp_Object
, Lisp_Object
, int);
102 static Lisp_Object
skip_syntaxes (int, Lisp_Object
, Lisp_Object
);
103 static Lisp_Object
scan_lists (EMACS_INT
, EMACS_INT
, EMACS_INT
, int);
104 static void scan_sexps_forward (struct lisp_parse_state
*,
105 EMACS_INT
, EMACS_INT
, EMACS_INT
, int,
106 int, Lisp_Object
, int);
107 static int in_classes (int, Lisp_Object
);
110 struct gl_state_s gl_state
; /* Global state of syntax parser. */
112 INTERVAL
interval_of (int, Lisp_Object
);
113 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
114 to scan to property-change. */
116 /* Update gl_state to an appropriate interval which contains CHARPOS. The
117 sign of COUNT give the relative position of CHARPOS wrt the previously
118 valid interval. If INIT, only [be]_property fields of gl_state are
119 valid at start, the rest is filled basing on OBJECT.
121 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
122 direction than the intervals - or in an interval. We update the
123 current syntax-table basing on the property of this interval, and
124 update the interval to start further than CHARPOS - or be
125 NULL_INTERVAL. We also update lim_property to be the next value of
126 charpos to call this subroutine again - or be before/after the
127 start/end of OBJECT. */
130 update_syntax_table (int charpos
, int count
, int init
, Lisp_Object object
)
132 Lisp_Object tmp_table
;
133 int cnt
= 0, invalidate
= 1;
138 gl_state
.old_prop
= Qnil
;
139 gl_state
.start
= gl_state
.b_property
;
140 gl_state
.stop
= gl_state
.e_property
;
141 i
= interval_of (charpos
, object
);
142 gl_state
.backward_i
= gl_state
.forward_i
= i
;
144 if (NULL_INTERVAL_P (i
))
146 /* interval_of updates only ->position of the return value, so
147 update the parents manually to speed up update_interval. */
148 while (!NULL_PARENT (i
))
150 if (AM_RIGHT_CHILD (i
))
151 INTERVAL_PARENT (i
)->position
= i
->position
152 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
153 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
154 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
156 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
158 i
= INTERVAL_PARENT (i
);
160 i
= gl_state
.forward_i
;
161 gl_state
.b_property
= i
->position
- gl_state
.offset
;
162 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
165 i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
167 /* We are guaranteed to be called with CHARPOS either in i,
169 if (NULL_INTERVAL_P (i
))
170 error ("Error in syntax_table logic for to-the-end intervals");
171 else if (charpos
< i
->position
) /* Move left. */
174 error ("Error in syntax_table logic for intervals <-");
175 /* Update the interval. */
176 i
= update_interval (i
, charpos
);
177 if (INTERVAL_LAST_POS (i
) != gl_state
.b_property
)
180 gl_state
.forward_i
= i
;
181 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
184 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
187 error ("Error in syntax_table logic for intervals ->");
188 /* Update the interval. */
189 i
= update_interval (i
, charpos
);
190 if (i
->position
!= gl_state
.e_property
)
193 gl_state
.backward_i
= i
;
194 gl_state
.b_property
= i
->position
- gl_state
.offset
;
199 tmp_table
= textget (i
->plist
, Qsyntax_table
);
202 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
204 if (invalidate
) /* Did not get to adjacent interval. */
205 { /* with the same table => */
206 /* invalidate the old range. */
209 gl_state
.backward_i
= i
;
210 gl_state
.b_property
= i
->position
- gl_state
.offset
;
214 gl_state
.forward_i
= i
;
215 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
219 if (!EQ (tmp_table
, gl_state
.old_prop
))
221 gl_state
.current_syntax_table
= tmp_table
;
222 gl_state
.old_prop
= tmp_table
;
223 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
225 gl_state
.use_global
= 0;
227 else if (CONSP (tmp_table
))
229 gl_state
.use_global
= 1;
230 gl_state
.global_code
= tmp_table
;
234 gl_state
.use_global
= 0;
235 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
239 while (!NULL_INTERVAL_P (i
))
241 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
245 gl_state
.e_property
= i
->position
- gl_state
.offset
;
246 gl_state
.forward_i
= i
;
251 = i
->position
+ LENGTH (i
) - gl_state
.offset
;
252 gl_state
.backward_i
= i
;
256 else if (cnt
== INTERVALS_AT_ONCE
)
261 = i
->position
+ LENGTH (i
) - gl_state
.offset
262 /* e_property at EOB is not set to ZV but to ZV+1, so that
263 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
264 having to check eob between the two. */
265 + (NULL_INTERVAL_P (next_interval (i
)) ? 1 : 0);
266 gl_state
.forward_i
= i
;
270 gl_state
.b_property
= i
->position
- gl_state
.offset
;
271 gl_state
.backward_i
= i
;
276 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
278 eassert (NULL_INTERVAL_P (i
)); /* This property goes to the end. */
280 gl_state
.e_property
= gl_state
.stop
;
282 gl_state
.b_property
= gl_state
.start
;
285 /* Returns TRUE if char at CHARPOS is quoted.
286 Global syntax-table data should be set up already to be good at CHARPOS
287 or after. On return global syntax data is good for lookup at CHARPOS. */
290 char_quoted (EMACS_INT charpos
, EMACS_INT bytepos
)
292 register enum syntaxcode code
;
293 register EMACS_INT beg
= BEGV
;
294 register int quoted
= 0;
295 EMACS_INT orig
= charpos
;
297 while (charpos
> beg
)
300 DEC_BOTH (charpos
, bytepos
);
302 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
303 c
= FETCH_CHAR_AS_MULTIBYTE (bytepos
);
305 if (! (code
== Scharquote
|| code
== Sescape
))
311 UPDATE_SYNTAX_TABLE (orig
);
315 /* Return the bytepos one character after BYTEPOS.
316 We assume that BYTEPOS is not at the end of the buffer. */
319 inc_bytepos (EMACS_INT bytepos
)
321 if (NILP (current_buffer
->enable_multibyte_characters
))
328 /* Return the bytepos one character before BYTEPOS.
329 We assume that BYTEPOS is not at the start of the buffer. */
332 dec_bytepos (EMACS_INT bytepos
)
334 if (NILP (current_buffer
->enable_multibyte_characters
))
341 /* Return a defun-start position before POS and not too far before.
342 It should be the last one before POS, or nearly the last.
344 When open_paren_in_column_0_is_defun_start is nonzero,
345 only the beginning of the buffer is treated as a defun-start.
347 We record the information about where the scan started
348 and what its result was, so that another call in the same area
349 can return the same value very quickly.
351 There is no promise at which position the global syntax data is
352 valid on return from the subroutine, so the caller should explicitly
353 update the global data. */
356 find_defun_start (EMACS_INT pos
, EMACS_INT pos_byte
)
358 EMACS_INT opoint
= PT
, opoint_byte
= PT_BYTE
;
360 if (!open_paren_in_column_0_is_defun_start
)
362 find_start_value_byte
= BEGV_BYTE
;
366 /* Use previous finding, if it's valid and applies to this inquiry. */
367 if (current_buffer
== find_start_buffer
368 /* Reuse the defun-start even if POS is a little farther on.
369 POS might be in the next defun, but that's ok.
370 Our value may not be the best possible, but will still be usable. */
371 && pos
<= find_start_pos
+ 1000
372 && pos
>= find_start_value
373 && BEGV
== find_start_begv
374 && MODIFF
== find_start_modiff
)
375 return find_start_value
;
377 /* Back up to start of line. */
378 scan_newline (pos
, pos_byte
, BEGV
, BEGV_BYTE
, -1, 1);
380 /* We optimize syntax-table lookup for rare updates. Thus we accept
381 only those `^\s(' which are good in global _and_ text-property
383 SETUP_BUFFER_SYNTAX_TABLE ();
388 /* Open-paren at start of line means we may have found our
390 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
391 if (SYNTAX (c
) == Sopen
)
393 SETUP_SYNTAX_TABLE (PT
+ 1, -1); /* Try again... */
394 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
395 if (SYNTAX (c
) == Sopen
)
397 /* Now fallback to the default value. */
398 SETUP_BUFFER_SYNTAX_TABLE ();
400 /* Move to beg of previous line. */
401 scan_newline (PT
, PT_BYTE
, BEGV
, BEGV_BYTE
, -2, 1);
404 /* Record what we found, for the next try. */
405 find_start_value
= PT
;
406 find_start_value_byte
= PT_BYTE
;
407 find_start_buffer
= current_buffer
;
408 find_start_modiff
= MODIFF
;
409 find_start_begv
= BEGV
;
410 find_start_pos
= pos
;
412 TEMP_SET_PT_BOTH (opoint
, opoint_byte
);
414 return find_start_value
;
417 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
420 prev_char_comend_first (int pos
, int pos_byte
)
424 DEC_BOTH (pos
, pos_byte
);
425 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
426 c
= FETCH_CHAR (pos_byte
);
427 val
= SYNTAX_COMEND_FIRST (c
);
428 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
432 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
435 * prev_char_comstart_first (pos, pos_byte)
440 * DEC_BOTH (pos, pos_byte);
441 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
442 * c = FETCH_CHAR (pos_byte);
443 * val = SYNTAX_COMSTART_FIRST (c);
444 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
448 /* Checks whether charpos FROM is at the end of a comment.
449 FROM_BYTE is the bytepos corresponding to FROM.
450 Do not move back before STOP.
452 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
455 If successful, store the charpos of the comment's beginning
456 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
458 Global syntax data remains valid for backward search starting at
459 the returned value (or at FROM, if the search was not successful). */
462 back_comment (EMACS_INT from
, EMACS_INT from_byte
, EMACS_INT stop
, int comnested
, int comstyle
, EMACS_INT
*charpos_ptr
, EMACS_INT
*bytepos_ptr
)
464 /* Look back, counting the parity of string-quotes,
465 and recording the comment-starters seen.
466 When we reach a safe place, assume that's not in a string;
467 then step the main scan to the earliest comment-starter seen
468 an even number of string quotes away from the safe place.
470 OFROM[I] is position of the earliest comment-starter seen
471 which is I+2X quotes from the comment-end.
472 PARITY is current parity of quotes from the comment end. */
473 int string_style
= -1; /* Presumed outside of any string. */
474 int string_lossage
= 0;
475 /* Not a real lossage: indicates that we have passed a matching comment
476 starter plus a non-matching comment-ender, meaning that any matching
477 comment-starter we might see later could be a false positive (hidden
478 inside another comment).
479 Test case: { a (* b } c (* d *) */
480 int comment_lossage
= 0;
481 EMACS_INT comment_end
= from
;
482 EMACS_INT comment_end_byte
= from_byte
;
483 EMACS_INT comstart_pos
= 0;
484 EMACS_INT comstart_byte
;
485 /* Place where the containing defun starts,
486 or 0 if we didn't come across it yet. */
487 EMACS_INT defun_start
= 0;
488 EMACS_INT defun_start_byte
= 0;
489 register enum syntaxcode code
;
490 int nesting
= 1; /* current comment nesting */
494 /* FIXME: A }} comment-ender style leads to incorrect behavior
495 in the case of {{ c }}} because we ignore the last two chars which are
496 assumed to be comment-enders although they aren't. */
498 /* At beginning of range to scan, we're outside of strings;
499 that determines quote parity to the comment-end. */
502 int temp_byte
, prev_syntax
;
503 int com2start
, com2end
;
505 /* Move back and examine a character. */
506 DEC_BOTH (from
, from_byte
);
507 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
509 prev_syntax
= syntax
;
510 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
511 syntax
= SYNTAX_WITH_FLAGS (c
);
514 /* Check for 2-char comment markers. */
515 com2start
= (SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
516 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax
)
517 && comstyle
== SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax
)
518 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
)
519 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
);
520 com2end
= (SYNTAX_FLAGS_COMEND_FIRST (syntax
)
521 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax
));
523 /* Nasty cases with overlapping 2-char comment markers:
524 - snmp-mode: -- c -- foo -- c --
532 /* If a 2-char comment sequence partly overlaps with another,
533 we don't try to be clever. */
534 if (from
> stop
&& (com2end
|| com2start
))
536 int next
= from
, next_byte
= from_byte
, next_c
, next_syntax
;
537 DEC_BOTH (next
, next_byte
);
538 UPDATE_SYNTAX_TABLE_BACKWARD (next
);
539 next_c
= FETCH_CHAR_AS_MULTIBYTE (next_byte
);
540 next_syntax
= SYNTAX_WITH_FLAGS (next_c
);
541 if (((com2start
|| comnested
)
542 && SYNTAX_FLAGS_COMEND_SECOND (syntax
)
543 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax
))
544 || ((com2end
|| comnested
)
545 && SYNTAX_FLAGS_COMSTART_SECOND (syntax
)
546 && comstyle
== SYNTAX_FLAGS_COMMENT_STYLE (syntax
)
547 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax
)))
549 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
552 if (com2start
&& comstart_pos
== 0)
553 /* We're looking at a comment starter. But it might be a comment
554 ender as well (see snmp-mode). The first time we see one, we
555 need to consider it as a comment starter,
556 and the subsequent times as a comment ender. */
559 /* Turn a 2-char comment sequences into the appropriate syntax. */
564 /* Ignore comment starters of a different style. */
565 else if (code
== Scomment
566 && (comstyle
!= SYNTAX_FLAGS_COMMENT_STYLE (syntax
)
567 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
) != comnested
))
570 /* Ignore escaped characters, except comment-enders. */
571 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
578 c
= (code
== Sstring_fence
? ST_STRING_STYLE
: ST_COMMENT_STYLE
);
580 /* Track parity of quotes. */
581 if (string_style
== -1)
582 /* Entering a string. */
584 else if (string_style
== c
)
585 /* Leaving the string. */
588 /* If we have two kinds of string delimiters.
589 There's no way to grok this scanning backwards. */
594 /* We've already checked that it is the relevant comstyle. */
595 if (string_style
!= -1 || comment_lossage
|| string_lossage
)
596 /* There are odd string quotes involved, so let's be careful.
597 Test case in Pascal: " { " a { " } */
602 /* Record best comment-starter so far. */
604 comstart_byte
= from_byte
;
606 else if (--nesting
<= 0)
607 /* nested comments have to be balanced, so we don't need to
608 keep looking for earlier ones. We use here the same (slightly
609 incorrect) reasoning as below: since it is followed by uniform
610 paired string quotes, this comment-start has to be outside of
611 strings, else the comment-end itself would be inside a string. */
616 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == comstyle
617 && ((com2end
&& SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
))
618 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
)
619 /* This is the same style of comment ender as ours. */
624 /* Anything before that can't count because it would match
625 this comment-ender rather than ours. */
626 from
= stop
; /* Break out of the loop. */
628 else if (comstart_pos
!= 0 || c
!= '\n')
629 /* We're mixing comment styles here, so we'd better be careful.
630 The (comstart_pos != 0 || c != '\n') check is not quite correct
631 (we should just always set comment_lossage), but removing it
632 would imply that any multiline comment in C would go through
633 lossage, which seems overkill.
634 The failure should only happen in the rare cases such as
640 /* Assume a defun-start point is outside of strings. */
641 if (open_paren_in_column_0_is_defun_start
643 || (temp_byte
= dec_bytepos (from_byte
),
644 FETCH_CHAR (temp_byte
) == '\n')))
647 defun_start_byte
= from_byte
;
648 from
= stop
; /* Break out of the loop. */
657 if (comstart_pos
== 0)
660 from_byte
= comment_end_byte
;
661 UPDATE_SYNTAX_TABLE_FORWARD (comment_end
- 1);
663 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
664 or `done'), then we've found the beginning of the non-nested comment. */
665 else if (1) /* !comnested */
668 from_byte
= comstart_byte
;
669 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
673 struct lisp_parse_state state
;
675 /* We had two kinds of string delimiters mixed up
676 together. Decode this going forwards.
677 Scan fwd from a known safe place (beginning-of-defun)
678 to the one in question; this records where we
679 last passed a comment starter. */
680 /* If we did not already find the defun start, find it now. */
681 if (defun_start
== 0)
683 defun_start
= find_defun_start (comment_end
, comment_end_byte
);
684 defun_start_byte
= find_start_value_byte
;
688 scan_sexps_forward (&state
,
689 defun_start
, defun_start_byte
,
690 comment_end
, -10000, 0, Qnil
, 0);
691 defun_start
= comment_end
;
692 if (state
.incomment
== (comnested
? 1 : -1)
693 && state
.comstyle
== comstyle
)
694 from
= state
.comstr_start
;
699 /* If comment_end is inside some other comment, maybe ours
700 is nested, so we need to try again from within the
701 surrounding comment. Example: { a (* " *) */
703 /* FIXME: We should advance by one or two chars. */
704 defun_start
= state
.comstr_start
+ 2;
705 defun_start_byte
= CHAR_TO_BYTE (defun_start
);
708 } while (defun_start
< comment_end
);
710 from_byte
= CHAR_TO_BYTE (from
);
711 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
716 *bytepos_ptr
= from_byte
;
718 return (from
== comment_end
) ? -1 : from
;
721 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
722 doc
: /* Return t if OBJECT is a syntax table.
723 Currently, any char-table counts as a syntax table. */)
726 if (CHAR_TABLE_P (object
)
727 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
733 check_syntax_table (Lisp_Object obj
)
735 CHECK_TYPE (CHAR_TABLE_P (obj
) && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
),
736 Qsyntax_table_p
, obj
);
739 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
740 doc
: /* Return the current syntax table.
741 This is the one specified by the current buffer. */)
744 return current_buffer
->syntax_table
;
747 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
748 Sstandard_syntax_table
, 0, 0, 0,
749 doc
: /* Return the standard syntax table.
750 This is the one used for new buffers. */)
753 return Vstandard_syntax_table
;
756 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
757 doc
: /* Construct a new syntax table and return it.
758 It is a copy of the TABLE, which defaults to the standard syntax table. */)
764 check_syntax_table (table
);
766 table
= Vstandard_syntax_table
;
768 copy
= Fcopy_sequence (table
);
770 /* Only the standard syntax table should have a default element.
771 Other syntax tables should inherit from parents instead. */
772 XCHAR_TABLE (copy
)->defalt
= Qnil
;
774 /* Copied syntax tables should all have parents.
775 If we copied one with no parent, such as the standard syntax table,
776 use the standard syntax table as the copy's parent. */
777 if (NILP (XCHAR_TABLE (copy
)->parent
))
778 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
782 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
783 doc
: /* Select a new syntax table for the current buffer.
784 One argument, a syntax table. */)
788 check_syntax_table (table
);
789 current_buffer
->syntax_table
= table
;
790 /* Indicate that this buffer now has a specified syntax table. */
791 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
792 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
796 /* Convert a letter which signifies a syntax code
797 into the code it signifies.
798 This is used by modify-syntax-entry, and other things. */
800 unsigned char syntax_spec_code
[0400] =
801 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
802 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
803 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
804 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
805 (char) Swhitespace
, (char) Scomment_fence
, (char) Sstring
, 0377,
806 (char) Smath
, 0377, 0377, (char) Squote
,
807 (char) Sopen
, (char) Sclose
, 0377, 0377,
808 0377, (char) Swhitespace
, (char) Spunct
, (char) Scharquote
,
809 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
810 0377, 0377, 0377, 0377,
811 (char) Scomment
, 0377, (char) Sendcomment
, 0377,
812 (char) Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
813 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
814 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
815 0377, 0377, 0377, 0377, (char) Sescape
, 0377, 0377, (char) Ssymbol
,
816 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
817 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
818 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
819 0377, 0377, 0377, 0377, (char) Sstring_fence
, 0377, 0377, 0377
822 /* Indexed by syntax code, give the letter that describes it. */
824 char syntax_code_spec
[16] =
826 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
830 /* Indexed by syntax code, give the object (cons of syntax code and
831 nil) to be stored in syntax table. Since these objects can be
832 shared among syntax tables, we generate them in advance. By
833 sharing objects, the function `describe-syntax' can give a more
835 static Lisp_Object Vsyntax_code_object
;
838 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
839 doc
: /* Return the syntax code of CHARACTER, described by a character.
840 For example, if CHARACTER is a word constituent, the
841 character `w' (119) is returned.
842 The characters that correspond to various syntax codes
843 are listed in the documentation of `modify-syntax-entry'. */)
844 (Lisp_Object character
)
847 CHECK_CHARACTER (character
);
848 char_int
= XINT (character
);
849 SETUP_BUFFER_SYNTAX_TABLE ();
850 return make_number (syntax_code_spec
[(int) SYNTAX (char_int
)]);
853 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
854 doc
: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
855 (Lisp_Object character
)
858 CHECK_NUMBER (character
);
859 char_int
= XINT (character
);
860 SETUP_BUFFER_SYNTAX_TABLE ();
861 code
= SYNTAX (char_int
);
862 if (code
== Sopen
|| code
== Sclose
)
863 return SYNTAX_MATCH (char_int
);
867 DEFUN ("string-to-syntax", Fstring_to_syntax
, Sstring_to_syntax
, 1, 1, 0,
868 doc
: /* Convert a syntax specification STRING into syntax cell form.
869 STRING should be a string as it is allowed as argument of
870 `modify-syntax-entry'. Value is the equivalent cons cell
871 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
875 register const unsigned char *p
;
876 register enum syntaxcode code
;
880 CHECK_STRING (string
);
883 code
= (enum syntaxcode
) syntax_spec_code
[*p
++];
884 if (((int) code
& 0377) == 0377)
885 error ("Invalid syntax description letter: %c", p
[-1]);
887 if (code
== Sinherit
)
893 int character
= STRING_CHAR_AND_LENGTH (p
, len
);
894 XSETINT (match
, character
);
895 if (XFASTINT (match
) == ' ')
935 if (val
< XVECTOR (Vsyntax_code_object
)->size
&& NILP (match
))
936 return XVECTOR (Vsyntax_code_object
)->contents
[val
];
938 /* Since we can't use a shared object, let's make a new one. */
939 return Fcons (make_number (val
), match
);
942 /* I really don't know why this is interactive
943 help-form should at least be made useful whilst reading the second arg. */
944 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
945 "cSet syntax for character: \nsSet syntax for %s to: ",
946 doc
: /* Set syntax for character CHAR according to string NEWENTRY.
947 The syntax is changed only for table SYNTAX-TABLE, which defaults to
948 the current buffer's syntax table.
949 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
950 in the range MIN to MAX are changed.
951 The first character of NEWENTRY should be one of the following:
952 Space or - whitespace syntax. w word constituent.
953 _ symbol constituent. . punctuation.
954 ( open-parenthesis. ) close-parenthesis.
955 " string quote. \\ escape.
956 $ paired delimiter. ' expression quote or prefix operator.
957 < comment starter. > comment ender.
958 / character-quote. @ inherit from `standard-syntax-table'.
959 | generic string fence. ! generic comment fence.
961 Only single-character comment start and end sequences are represented thus.
962 Two-character sequences are represented as described below.
963 The second character of NEWENTRY is the matching parenthesis,
964 used only if the first character is `(' or `)'.
965 Any additional characters are flags.
966 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
967 1 means CHAR is the start of a two-char comment start sequence.
968 2 means CHAR is the second character of such a sequence.
969 3 means CHAR is the start of a two-char comment end sequence.
970 4 means CHAR is the second character of such a sequence.
972 There can be up to two orthogonal comment sequences. This is to support
973 language modes such as C++. By default, all comment sequences are of style
974 a, but you can set the comment sequence style to b (on the second character
975 of a comment-start, or the first character of a comment-end sequence) using
977 b means CHAR is part of comment sequence b.
978 n means CHAR is part of a nestable comment sequence.
980 p means CHAR is a prefix character for `backward-prefix-chars';
981 such characters are treated as whitespace when they occur
983 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
984 (Lisp_Object c
, Lisp_Object newentry
, Lisp_Object syntax_table
)
988 CHECK_CHARACTER_CAR (c
);
989 CHECK_CHARACTER_CDR (c
);
994 if (NILP (syntax_table
))
995 syntax_table
= current_buffer
->syntax_table
;
997 check_syntax_table (syntax_table
);
999 newentry
= Fstring_to_syntax (newentry
);
1001 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table
, c
, newentry
);
1003 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
1005 /* We clear the regexp cache, since character classes can now have
1006 different values from those in the compiled regexps.*/
1007 clear_regexp_cache ();
1012 /* Dump syntax table to buffer in human-readable format */
1014 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value
,
1015 Sinternal_describe_syntax_value
, 1, 1, 0,
1016 doc
: /* Insert a description of the internal syntax description SYNTAX at point. */)
1017 (Lisp_Object syntax
)
1019 register enum syntaxcode code
;
1020 char desc
, start1
, start2
, end1
, end2
, prefix
, comstyle
, comnested
;
1022 Lisp_Object first
, match_lisp
, value
= syntax
;
1026 insert_string ("default");
1030 if (CHAR_TABLE_P (value
))
1032 insert_string ("deeper char-table ...");
1038 insert_string ("invalid");
1042 first
= XCAR (value
);
1043 match_lisp
= XCDR (value
);
1045 if (!INTEGERP (first
) || !(NILP (match_lisp
) || INTEGERP (match_lisp
)))
1047 insert_string ("invalid");
1051 code
= (enum syntaxcode
) (XINT (first
) & 0377);
1052 start1
= (XINT (first
) >> 16) & 1;
1053 start2
= (XINT (first
) >> 17) & 1;
1054 end1
= (XINT (first
) >> 18) & 1;
1055 end2
= (XINT (first
) >> 19) & 1;
1056 prefix
= (XINT (first
) >> 20) & 1;
1057 comstyle
= (XINT (first
) >> 21) & 1;
1058 comnested
= (XINT (first
) >> 22) & 1;
1060 if ((int) code
< 0 || (int) code
>= (int) Smax
)
1062 insert_string ("invalid");
1065 desc
= syntax_code_spec
[(int) code
];
1067 str
[0] = desc
, str
[1] = 0;
1070 if (NILP (match_lisp
))
1073 insert_char (XINT (match_lisp
));
1092 insert_string ("\twhich means: ");
1094 switch (SWITCH_ENUM_CAST (code
))
1097 insert_string ("whitespace"); break;
1099 insert_string ("punctuation"); break;
1101 insert_string ("word"); break;
1103 insert_string ("symbol"); break;
1105 insert_string ("open"); break;
1107 insert_string ("close"); break;
1109 insert_string ("prefix"); break;
1111 insert_string ("string"); break;
1113 insert_string ("math"); break;
1115 insert_string ("escape"); break;
1117 insert_string ("charquote"); break;
1119 insert_string ("comment"); break;
1121 insert_string ("endcomment"); break;
1123 insert_string ("inherit"); break;
1124 case Scomment_fence
:
1125 insert_string ("comment fence"); break;
1127 insert_string ("string fence"); break;
1129 insert_string ("invalid");
1133 if (!NILP (match_lisp
))
1135 insert_string (", matches ");
1136 insert_char (XINT (match_lisp
));
1140 insert_string (",\n\t is the first character of a comment-start sequence");
1142 insert_string (",\n\t is the second character of a comment-start sequence");
1145 insert_string (",\n\t is the first character of a comment-end sequence");
1147 insert_string (",\n\t is the second character of a comment-end sequence");
1149 insert_string (" (comment style b)");
1151 insert_string (" (nestable)");
1154 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1159 int parse_sexp_ignore_comments
;
1161 /* Char-table of functions that find the next or previous word
1163 Lisp_Object Vfind_word_boundary_function_table
;
1165 /* Return the position across COUNT words from FROM.
1166 If that many words cannot be found before the end of the buffer, return 0.
1167 COUNT negative means scan backward and stop at word beginning. */
1170 scan_words (register int from
, register int count
)
1172 register int beg
= BEGV
;
1173 register int end
= ZV
;
1174 register int from_byte
= CHAR_TO_BYTE (from
);
1175 register enum syntaxcode code
;
1177 Lisp_Object func
, script
, pos
;
1182 SETUP_SYNTAX_TABLE (from
, count
);
1193 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1194 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1195 code
= SYNTAX (ch0
);
1196 INC_BOTH (from
, from_byte
);
1197 if (words_include_escapes
1198 && (code
== Sescape
|| code
== Scharquote
))
1203 /* Now CH0 is a character which begins a word and FROM is the
1204 position of the next character. */
1205 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch0
);
1206 if (! NILP (Ffboundp (func
)))
1208 pos
= call2 (func
, make_number (from
- 1), make_number (end
));
1209 if (INTEGERP (pos
) && XINT (pos
) > from
)
1212 from_byte
= CHAR_TO_BYTE (from
);
1217 script
= CHAR_TABLE_REF (Vchar_script_table
, ch0
);
1220 if (from
== end
) break;
1221 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1222 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1223 code
= SYNTAX (ch1
);
1225 && (! words_include_escapes
1226 || (code
!= Sescape
&& code
!= Scharquote
)))
1227 || word_boundary_p (ch0
, ch1
))
1229 INC_BOTH (from
, from_byte
);
1244 DEC_BOTH (from
, from_byte
);
1245 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1246 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1247 code
= SYNTAX (ch1
);
1248 if (words_include_escapes
1249 && (code
== Sescape
|| code
== Scharquote
))
1254 /* Now CH1 is a character which ends a word and FROM is the
1256 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch1
);
1257 if (! NILP (Ffboundp (func
)))
1259 pos
= call2 (func
, make_number (from
), make_number (beg
));
1260 if (INTEGERP (pos
) && XINT (pos
) < from
)
1263 from_byte
= CHAR_TO_BYTE (from
);
1268 script
= CHAR_TABLE_REF (Vchar_script_table
, ch1
);
1273 DEC_BOTH (from
, from_byte
);
1274 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1275 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1276 code
= SYNTAX (ch0
);
1278 && (! words_include_escapes
1279 || (code
!= Sescape
&& code
!= Scharquote
)))
1280 || word_boundary_p (ch0
, ch1
))
1282 INC_BOTH (from
, from_byte
);
1296 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 0, 1, "^p",
1297 doc
: /* Move point forward ARG words (backward if ARG is negative).
1299 If an edge of the buffer or a field boundary is reached, point is left there
1300 and the function returns nil. Field boundaries are not noticed if
1301 `inhibit-field-text-motion' is non-nil. */)
1308 XSETFASTINT (arg
, 1);
1312 val
= orig_val
= scan_words (PT
, XINT (arg
));
1314 val
= XINT (arg
) > 0 ? ZV
: BEGV
;
1316 /* Avoid jumping out of an input field. */
1317 tmp
= Fconstrain_to_field (make_number (val
), make_number (PT
),
1319 val
= XFASTINT (tmp
);
1322 return val
== orig_val
? Qt
: Qnil
;
1325 Lisp_Object
skip_chars (int, Lisp_Object
, Lisp_Object
, int);
1327 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1328 doc
: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1329 STRING is like the inside of a `[...]' in a regular expression
1330 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1331 (but not at the end of a range; quoting is never needed there).
1332 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1333 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1334 Char classes, e.g. `[:alpha:]', are supported.
1336 Returns the distance traveled, either zero or positive. */)
1337 (Lisp_Object string
, Lisp_Object lim
)
1339 return skip_chars (1, string
, lim
, 1);
1342 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1343 doc
: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1344 See `skip-chars-forward' for details.
1345 Returns the distance traveled, either zero or negative. */)
1346 (Lisp_Object string
, Lisp_Object lim
)
1348 return skip_chars (0, string
, lim
, 1);
1351 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1352 doc
: /* Move point forward across chars in specified syntax classes.
1353 SYNTAX is a string of syntax code characters.
1354 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1355 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1356 This function returns the distance traveled, either zero or positive. */)
1357 (Lisp_Object syntax
, Lisp_Object lim
)
1359 return skip_syntaxes (1, syntax
, lim
);
1362 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1363 doc
: /* Move point backward across chars in specified syntax classes.
1364 SYNTAX is a string of syntax code characters.
1365 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1366 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1367 This function returns the distance traveled, either zero or negative. */)
1368 (Lisp_Object syntax
, Lisp_Object lim
)
1370 return skip_syntaxes (0, syntax
, lim
);
1374 skip_chars (int forwardp
, Lisp_Object string
, Lisp_Object lim
, int handle_iso_classes
)
1376 register unsigned int c
;
1377 unsigned char fastmap
[0400];
1378 /* Store the ranges of non-ASCII characters. */
1380 int n_char_ranges
= 0;
1382 register int i
, i_byte
;
1383 /* Set to 1 if the current buffer is multibyte and the region
1384 contains non-ASCII chars. */
1386 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1388 int string_multibyte
;
1390 const unsigned char *str
;
1392 Lisp_Object iso_classes
;
1394 CHECK_STRING (string
);
1398 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1400 CHECK_NUMBER_COERCE_MARKER (lim
);
1402 /* In any case, don't allow scan outside bounds of buffer. */
1403 if (XINT (lim
) > ZV
)
1404 XSETFASTINT (lim
, ZV
);
1405 if (XINT (lim
) < BEGV
)
1406 XSETFASTINT (lim
, BEGV
);
1408 multibyte
= (!NILP (current_buffer
->enable_multibyte_characters
)
1409 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
1410 string_multibyte
= SBYTES (string
) > SCHARS (string
);
1412 memset (fastmap
, 0, sizeof fastmap
);
1414 str
= SDATA (string
);
1415 size_byte
= SBYTES (string
);
1418 if (i_byte
< size_byte
1419 && SREF (string
, 0) == '^')
1421 negate
= 1; i_byte
++;
1424 /* Find the characters specified and set their elements of fastmap.
1425 Handle backslashes and ranges specially.
1427 If STRING contains non-ASCII characters, setup char_ranges for
1428 them and use fastmap only for their leading codes. */
1430 if (! string_multibyte
)
1432 int string_has_eight_bit
= 0;
1434 /* At first setup fastmap. */
1435 while (i_byte
< size_byte
)
1439 if (handle_iso_classes
&& c
== '['
1440 && i_byte
< size_byte
1441 && str
[i_byte
] == ':')
1443 const unsigned char *class_beg
= str
+ i_byte
+ 1;
1444 const unsigned char *class_end
= class_beg
;
1445 const unsigned char *class_limit
= str
+ size_byte
- 2;
1446 /* Leave room for the null. */
1447 unsigned char class_name
[CHAR_CLASS_MAX_LENGTH
+ 1];
1450 if (class_limit
- class_beg
> CHAR_CLASS_MAX_LENGTH
)
1451 class_limit
= class_beg
+ CHAR_CLASS_MAX_LENGTH
;
1453 while (class_end
< class_limit
1454 && *class_end
>= 'a' && *class_end
<= 'z')
1457 if (class_end
== class_beg
1458 || *class_end
!= ':' || class_end
[1] != ']')
1459 goto not_a_class_name
;
1461 memcpy (class_name
, class_beg
, class_end
- class_beg
);
1462 class_name
[class_end
- class_beg
] = 0;
1464 cc
= re_wctype (class_name
);
1466 error ("Invalid ISO C character class");
1468 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1470 i_byte
= class_end
+ 2 - str
;
1477 if (i_byte
== size_byte
)
1482 /* Treat `-' as range character only if another character
1484 if (i_byte
+ 1 < size_byte
1485 && str
[i_byte
] == '-')
1489 /* Skip over the dash. */
1492 /* Get the end of the range. */
1495 && i_byte
< size_byte
)
1502 if (! ASCII_CHAR_P (c2
))
1503 string_has_eight_bit
= 1;
1509 if (! ASCII_CHAR_P (c
))
1510 string_has_eight_bit
= 1;
1514 /* If the current range is multibyte and STRING contains
1515 eight-bit chars, arrange fastmap and setup char_ranges for
1516 the corresponding multibyte chars. */
1517 if (multibyte
&& string_has_eight_bit
)
1519 unsigned char fastmap2
[0400];
1520 int range_start_byte
, range_start_char
;
1522 memcpy (fastmap
+ 0200, fastmap2
+ 0200, 0200);
1523 memset (fastmap
+ 0200, 0, 0200);
1524 /* We are sure that this loop stops. */
1525 for (i
= 0200; ! fastmap2
[i
]; i
++);
1526 c
= BYTE8_TO_CHAR (i
);
1527 fastmap
[CHAR_LEADING_CODE (c
)] = 1;
1528 range_start_byte
= i
;
1529 range_start_char
= c
;
1530 char_ranges
= (int *) alloca (sizeof (int) * 128 * 2);
1531 for (i
= 129; i
< 0400; i
++)
1533 c
= BYTE8_TO_CHAR (i
);
1534 fastmap
[CHAR_LEADING_CODE (c
)] = 1;
1535 if (i
- range_start_byte
!= c
- range_start_char
)
1537 char_ranges
[n_char_ranges
++] = range_start_char
;
1538 char_ranges
[n_char_ranges
++] = ((i
- 1 - range_start_byte
)
1539 + range_start_char
);
1540 range_start_byte
= i
;
1541 range_start_char
= c
;
1544 char_ranges
[n_char_ranges
++] = range_start_char
;
1545 char_ranges
[n_char_ranges
++] = ((i
- 1 - range_start_byte
)
1546 + range_start_char
);
1549 else /* STRING is multibyte */
1551 char_ranges
= (int *) alloca (sizeof (int) * SCHARS (string
) * 2);
1553 while (i_byte
< size_byte
)
1555 unsigned char leading_code
;
1557 leading_code
= str
[i_byte
];
1558 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1561 if (handle_iso_classes
&& c
== '['
1562 && i_byte
< size_byte
1563 && STRING_CHAR (str
+ i_byte
) == ':')
1565 const unsigned char *class_beg
= str
+ i_byte
+ 1;
1566 const unsigned char *class_end
= class_beg
;
1567 const unsigned char *class_limit
= str
+ size_byte
- 2;
1568 /* Leave room for the null. */
1569 unsigned char class_name
[CHAR_CLASS_MAX_LENGTH
+ 1];
1572 if (class_limit
- class_beg
> CHAR_CLASS_MAX_LENGTH
)
1573 class_limit
= class_beg
+ CHAR_CLASS_MAX_LENGTH
;
1575 while (class_end
< class_limit
1576 && *class_end
>= 'a' && *class_end
<= 'z')
1579 if (class_end
== class_beg
1580 || *class_end
!= ':' || class_end
[1] != ']')
1581 goto not_a_class_name_multibyte
;
1583 memcpy (class_name
, class_beg
, class_end
- class_beg
);
1584 class_name
[class_end
- class_beg
] = 0;
1586 cc
= re_wctype (class_name
);
1588 error ("Invalid ISO C character class");
1590 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1592 i_byte
= class_end
+ 2 - str
;
1596 not_a_class_name_multibyte
:
1599 if (i_byte
== size_byte
)
1602 leading_code
= str
[i_byte
];
1603 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1606 /* Treat `-' as range character only if another character
1608 if (i_byte
+ 1 < size_byte
1609 && str
[i_byte
] == '-')
1612 unsigned char leading_code2
;
1614 /* Skip over the dash. */
1617 /* Get the end of the range. */
1618 leading_code2
= str
[i_byte
];
1619 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1623 && i_byte
< size_byte
)
1625 leading_code2
= str
[i_byte
];
1626 c2
=STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1632 if (ASCII_CHAR_P (c
))
1634 while (c
<= c2
&& c
< 0x80)
1636 leading_code
= CHAR_LEADING_CODE (c
);
1638 if (! ASCII_CHAR_P (c
))
1640 while (leading_code
<= leading_code2
)
1641 fastmap
[leading_code
++] = 1;
1644 char_ranges
[n_char_ranges
++] = c
;
1645 char_ranges
[n_char_ranges
++] = c2
;
1651 if (ASCII_CHAR_P (c
))
1655 fastmap
[leading_code
] = 1;
1656 char_ranges
[n_char_ranges
++] = c
;
1657 char_ranges
[n_char_ranges
++] = c
;
1662 /* If the current range is unibyte and STRING contains non-ASCII
1663 chars, arrange fastmap for the corresponding unibyte
1666 if (! multibyte
&& n_char_ranges
> 0)
1668 memset (fastmap
+ 0200, 0, 0200);
1669 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1671 int c1
= char_ranges
[i
];
1672 int c2
= char_ranges
[i
+ 1];
1674 for (; c1
<= c2
; c1
++)
1676 int b
= CHAR_TO_BYTE_SAFE (c1
);
1684 /* If ^ was the first character, complement the fastmap. */
1688 for (i
= 0; i
< sizeof fastmap
; i
++)
1692 for (i
= 0; i
< 0200; i
++)
1694 /* All non-ASCII chars possibly match. */
1695 for (; i
< sizeof fastmap
; i
++)
1701 int start_point
= PT
;
1703 int pos_byte
= PT_BYTE
;
1704 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
1708 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
1709 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
1713 endp
= CHAR_POS_ADDR (XINT (lim
));
1714 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
1718 /* This code may look up syntax tables using macros that rely on the
1719 gl_state object. To make sure this object is not out of date,
1720 let's initialize it manually.
1721 We ignore syntax-table text-properties for now, since that's
1722 what we've done in the past. */
1723 SETUP_BUFFER_SYNTAX_TABLE ();
1738 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
1739 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
1749 if (! ASCII_CHAR_P (c
))
1751 /* As we are looking at a multibyte character, we
1752 must look up the character in the table
1753 CHAR_RANGES. If there's no data in the table,
1754 that character is not what we want to skip. */
1756 /* The following code do the right thing even if
1757 n_char_ranges is zero (i.e. no data in
1759 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1760 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1762 if (!(negate
^ (i
< n_char_ranges
)))
1766 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
1779 if (!NILP (iso_classes
) && in_classes (*p
, iso_classes
))
1784 goto fwd_unibyte_ok
;
1790 p
++, pos
++, pos_byte
++;
1798 unsigned char *prev_p
;
1808 while (--p
>= stop
&& ! CHAR_HEAD_P (*p
));
1809 c
= STRING_CHAR (p
);
1811 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
1821 if (! ASCII_CHAR_P (c
))
1823 /* See the comment in the previous similar code. */
1824 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1825 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1827 if (!(negate
^ (i
< n_char_ranges
)))
1831 pos
--, pos_byte
-= prev_p
- p
;
1844 if (! NILP (iso_classes
) && in_classes (p
[-1], iso_classes
))
1849 goto back_unibyte_ok
;
1852 if (!fastmap
[p
[-1]])
1855 p
--, pos
--, pos_byte
--;
1859 SET_PT_BOTH (pos
, pos_byte
);
1862 return make_number (PT
- start_point
);
1868 skip_syntaxes (int forwardp
, Lisp_Object string
, Lisp_Object lim
)
1870 register unsigned int c
;
1871 unsigned char fastmap
[0400];
1873 register int i
, i_byte
;
1878 CHECK_STRING (string
);
1881 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1883 CHECK_NUMBER_COERCE_MARKER (lim
);
1885 /* In any case, don't allow scan outside bounds of buffer. */
1886 if (XINT (lim
) > ZV
)
1887 XSETFASTINT (lim
, ZV
);
1888 if (XINT (lim
) < BEGV
)
1889 XSETFASTINT (lim
, BEGV
);
1891 if (forwardp
? (PT
>= XFASTINT (lim
)) : (PT
<= XFASTINT (lim
)))
1892 return make_number (0);
1894 multibyte
= (!NILP (current_buffer
->enable_multibyte_characters
)
1895 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
1897 memset (fastmap
, 0, sizeof fastmap
);
1899 if (SBYTES (string
) > SCHARS (string
))
1900 /* As this is very rare case (syntax spec is ASCII only), don't
1901 consider efficiency. */
1902 string
= string_make_unibyte (string
);
1904 str
= SDATA (string
);
1905 size_byte
= SBYTES (string
);
1908 if (i_byte
< size_byte
1909 && SREF (string
, 0) == '^')
1911 negate
= 1; i_byte
++;
1914 /* Find the syntaxes specified and set their elements of fastmap. */
1916 while (i_byte
< size_byte
)
1919 fastmap
[syntax_spec_code
[c
]] = 1;
1922 /* If ^ was the first character, complement the fastmap. */
1924 for (i
= 0; i
< sizeof fastmap
; i
++)
1928 int start_point
= PT
;
1930 int pos_byte
= PT_BYTE
;
1931 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
1935 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
1936 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
1940 endp
= CHAR_POS_ADDR (XINT (lim
));
1941 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
1945 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
1961 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
1962 if (! fastmap
[(int) SYNTAX (c
)])
1964 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
1965 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
1979 if (! fastmap
[(int) SYNTAX (*p
)])
1981 p
++, pos
++, pos_byte
++;
1982 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
1992 unsigned char *prev_p
;
2001 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2003 while (--p
>= stop
&& ! CHAR_HEAD_P (*p
));
2004 c
= STRING_CHAR (p
);
2005 if (! fastmap
[(int) SYNTAX (c
)])
2007 pos
--, pos_byte
-= prev_p
- p
;
2021 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2022 if (! fastmap
[(int) SYNTAX (p
[-1])])
2024 p
--, pos
--, pos_byte
--;
2029 SET_PT_BOTH (pos
, pos_byte
);
2032 return make_number (PT
- start_point
);
2036 /* Return 1 if character C belongs to one of the ISO classes
2037 in the list ISO_CLASSES. Each class is represented by an
2038 integer which is its type according to re_wctype. */
2041 in_classes (int c
, Lisp_Object iso_classes
)
2045 while (CONSP (iso_classes
))
2048 elt
= XCAR (iso_classes
);
2049 iso_classes
= XCDR (iso_classes
);
2051 if (re_iswctype (c
, XFASTINT (elt
)))
2058 /* Jump over a comment, assuming we are at the beginning of one.
2059 FROM is the current position.
2060 FROM_BYTE is the bytepos corresponding to FROM.
2061 Do not move past STOP (a charpos).
2062 The comment over which we have to jump is of style STYLE
2063 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2064 NESTING should be positive to indicate the nesting at the beginning
2065 for nested comments and should be zero or negative else.
2066 ST_COMMENT_STYLE cannot be nested.
2067 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2068 (or 0 If the search cannot start in the middle of a two-character).
2070 If successful, return 1 and store the charpos of the comment's end
2071 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2072 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2073 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2074 (as defined for state.incomment) in *INCOMMENT_PTR.
2076 The comment end is the last character of the comment rather than the
2077 character just after the comment.
2079 Global syntax data is assumed to initially be valid for FROM and
2080 remains valid for forward search starting at the returned position. */
2083 forw_comment (EMACS_INT from
, EMACS_INT from_byte
, EMACS_INT stop
,
2084 int nesting
, int style
, int prev_syntax
,
2085 EMACS_INT
*charpos_ptr
, EMACS_INT
*bytepos_ptr
,
2089 register enum syntaxcode code
;
2090 register int syntax
;
2092 if (nesting
<= 0) nesting
= -1;
2094 /* Enter the loop in the middle so that we find
2095 a 2-char comment ender if we start in the middle of it. */
2096 syntax
= prev_syntax
;
2097 if (syntax
!= 0) goto forw_incomment
;
2103 *incomment_ptr
= nesting
;
2104 *charpos_ptr
= from
;
2105 *bytepos_ptr
= from_byte
;
2108 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2109 syntax
= SYNTAX_WITH_FLAGS (c
);
2110 code
= syntax
& 0xff;
2111 if (code
== Sendcomment
2112 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
2113 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ?
2114 (nesting
> 0 && --nesting
== 0) : nesting
< 0))
2115 /* we have encountered a comment end of the same style
2116 as the comment sequence which began this comment
2119 if (code
== Scomment_fence
2120 && style
== ST_COMMENT_STYLE
)
2121 /* we have encountered a comment end of the same style
2122 as the comment sequence which began this comment
2127 && SYNTAX_FLAGS_COMMENT_NESTED (syntax
)
2128 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
)
2129 /* we have encountered a nested comment of the same style
2130 as the comment sequence which began this comment section */
2132 INC_BOTH (from
, from_byte
);
2133 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2136 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2137 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
2138 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2139 SYNTAX_COMEND_SECOND (c1
))
2140 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2141 SYNTAX_COMMENT_NESTED (c1
)) ? nesting
> 0 : nesting
< 0))
2144 /* we have encountered a comment end of the same style
2145 as the comment sequence which began this comment
2150 INC_BOTH (from
, from_byte
);
2151 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2156 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
2157 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2158 SYNTAX_COMMENT_STYLE (c1
) == style
2159 && SYNTAX_COMSTART_SECOND (c1
))
2160 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2161 SYNTAX_COMMENT_NESTED (c1
)))
2162 /* we have encountered a nested comment of the same style
2163 as the comment sequence which began this comment
2166 INC_BOTH (from
, from_byte
);
2167 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2171 *charpos_ptr
= from
;
2172 *bytepos_ptr
= from_byte
;
2176 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
2178 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2179 Stop scanning if we find something other than a comment or whitespace.
2180 Set point to where scanning stops.
2181 If COUNT comments are found as expected, with nothing except whitespace
2182 between them, return t; otherwise return nil. */)
2185 register EMACS_INT from
;
2186 EMACS_INT from_byte
;
2187 register EMACS_INT stop
;
2189 register enum syntaxcode code
;
2190 int comstyle
= 0; /* style of comment encountered */
2191 int comnested
= 0; /* whether the comment is nestable or not */
2194 EMACS_INT out_charpos
, out_bytepos
;
2197 CHECK_NUMBER (count
);
2198 count1
= XINT (count
);
2199 stop
= count1
> 0 ? ZV
: BEGV
;
2205 from_byte
= PT_BYTE
;
2207 SETUP_SYNTAX_TABLE (from
, count1
);
2216 SET_PT_BOTH (from
, from_byte
);
2220 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2222 comstart_first
= SYNTAX_COMSTART_FIRST (c
);
2223 comnested
= SYNTAX_COMMENT_NESTED (c
);
2224 comstyle
= SYNTAX_COMMENT_STYLE (c
);
2225 INC_BOTH (from
, from_byte
);
2226 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2227 if (from
< stop
&& comstart_first
2228 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2229 SYNTAX_COMSTART_SECOND (c1
)))
2231 /* We have encountered a comment start sequence and we
2232 are ignoring all text inside comments. We must record
2233 the comment style this sequence begins so that later,
2234 only a comment end of the same style actually ends
2235 the comment section. */
2237 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
2238 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2239 INC_BOTH (from
, from_byte
);
2240 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2243 while (code
== Swhitespace
|| (code
== Sendcomment
&& c
== '\n'));
2245 if (code
== Scomment_fence
)
2246 comstyle
= ST_COMMENT_STYLE
;
2247 else if (code
!= Scomment
)
2250 DEC_BOTH (from
, from_byte
);
2251 SET_PT_BOTH (from
, from_byte
);
2254 /* We're at the start of a comment. */
2255 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
2256 &out_charpos
, &out_bytepos
, &dummy
);
2257 from
= out_charpos
; from_byte
= out_bytepos
;
2261 SET_PT_BOTH (from
, from_byte
);
2264 INC_BOTH (from
, from_byte
);
2265 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2266 /* We have skipped one comment. */
2278 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
2283 DEC_BOTH (from
, from_byte
);
2284 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2285 quoted
= char_quoted (from
, from_byte
);
2286 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2289 comnested
= SYNTAX_COMMENT_NESTED (c
);
2290 if (code
== Sendcomment
)
2291 comstyle
= SYNTAX_COMMENT_STYLE (c
);
2292 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
2293 && prev_char_comend_first (from
, from_byte
)
2294 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
2296 /* We must record the comment style encountered so that
2297 later, we can match only the proper comment begin
2298 sequence of the same style. */
2299 DEC_BOTH (from
, from_byte
);
2301 /* Calling char_quoted, above, set up global syntax position
2302 at the new value of FROM. */
2303 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2304 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
2305 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2308 if (code
== Scomment_fence
)
2310 /* Skip until first preceding unquoted comment_fence. */
2311 int found
= 0, ini
= from
, ini_byte
= from_byte
;
2315 DEC_BOTH (from
, from_byte
);
2316 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2317 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2318 if (SYNTAX (c
) == Scomment_fence
2319 && !char_quoted (from
, from_byte
))
2324 else if (from
== stop
)
2329 from
= ini
; /* Set point to ini + 1. */
2330 from_byte
= ini_byte
;
2334 /* We have skipped one comment. */
2337 else if (code
== Sendcomment
)
2339 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2340 &out_charpos
, &out_bytepos
);
2344 /* This end-of-line is not an end-of-comment.
2345 Treat it like a whitespace.
2346 CC-mode (and maybe others) relies on this behavior. */
2350 /* Failure: we should go back to the end of this
2351 not-quite-endcomment. */
2352 if (SYNTAX(c
) != code
)
2353 /* It was a two-char Sendcomment. */
2354 INC_BOTH (from
, from_byte
);
2360 /* We have skipped one comment. */
2361 from
= out_charpos
, from_byte
= out_bytepos
;
2365 else if (code
!= Swhitespace
|| quoted
)
2369 INC_BOTH (from
, from_byte
);
2370 SET_PT_BOTH (from
, from_byte
);
2378 SET_PT_BOTH (from
, from_byte
);
2383 /* Return syntax code of character C if C is an ASCII character
2384 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
2386 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2387 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
2388 ? SYNTAX (c) : Ssymbol)
2391 scan_lists (register EMACS_INT from
, EMACS_INT count
, EMACS_INT depth
, int sexpflag
)
2394 register EMACS_INT stop
= count
> 0 ? ZV
: BEGV
;
2399 register enum syntaxcode code
, temp_code
;
2400 int min_depth
= depth
; /* Err out if depth gets less than this. */
2401 int comstyle
= 0; /* style of comment encountered */
2402 int comnested
= 0; /* whether the comment is nestable or not */
2404 EMACS_INT last_good
= from
;
2406 EMACS_INT from_byte
;
2407 EMACS_INT out_bytepos
, out_charpos
;
2409 int multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
2411 if (depth
> 0) min_depth
= 0;
2413 if (from
> ZV
) from
= ZV
;
2414 if (from
< BEGV
) from
= BEGV
;
2416 from_byte
= CHAR_TO_BYTE (from
);
2421 SETUP_SYNTAX_TABLE (from
, count
);
2426 int comstart_first
, prefix
;
2427 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2428 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2429 code
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2430 comstart_first
= SYNTAX_COMSTART_FIRST (c
);
2431 comnested
= SYNTAX_COMMENT_NESTED (c
);
2432 comstyle
= SYNTAX_COMMENT_STYLE (c
);
2433 prefix
= SYNTAX_PREFIX (c
);
2434 if (depth
== min_depth
)
2436 INC_BOTH (from
, from_byte
);
2437 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2438 if (from
< stop
&& comstart_first
2439 && (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2440 SYNTAX_COMSTART_SECOND (c
))
2441 && parse_sexp_ignore_comments
)
2443 /* we have encountered a comment start sequence and we
2444 are ignoring all text inside comments. We must record
2445 the comment style this sequence begins so that later,
2446 only a comment end of the same style actually ends
2447 the comment section */
2449 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2450 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
2451 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2452 INC_BOTH (from
, from_byte
);
2453 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2459 switch (SWITCH_ENUM_CAST (code
))
2465 INC_BOTH (from
, from_byte
);
2466 /* treat following character as a word constituent */
2469 if (depth
|| !sexpflag
) break;
2470 /* This word counts as a sexp; return at end of it. */
2473 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2475 /* Some compilers can't handle this inside the switch. */
2476 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2477 temp
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2482 INC_BOTH (from
, from_byte
);
2493 INC_BOTH (from
, from_byte
);
2497 case Scomment_fence
:
2498 comstyle
= ST_COMMENT_STYLE
;
2501 if (!parse_sexp_ignore_comments
) break;
2502 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2503 found
= forw_comment (from
, from_byte
, stop
,
2504 comnested
, comstyle
, 0,
2505 &out_charpos
, &out_bytepos
, &dummy
);
2506 from
= out_charpos
, from_byte
= out_bytepos
;
2513 INC_BOTH (from
, from_byte
);
2514 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2520 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (from_byte
))
2522 INC_BOTH (from
, from_byte
);
2532 if (!++depth
) goto done
;
2537 if (!--depth
) goto done
;
2538 if (depth
< min_depth
)
2539 xsignal3 (Qscan_error
,
2540 build_string ("Containing expression ends prematurely"),
2541 make_number (last_good
), make_number (from
));
2546 temp_pos
= dec_bytepos (from_byte
);
2547 stringterm
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2552 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2553 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2556 && SYNTAX_WITH_MULTIBYTE_CHECK (c
) == Sstring
)
2557 : SYNTAX_WITH_MULTIBYTE_CHECK (c
) == Sstring_fence
)
2560 /* Some compilers can't handle this inside the switch. */
2561 temp
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2566 INC_BOTH (from
, from_byte
);
2568 INC_BOTH (from
, from_byte
);
2570 INC_BOTH (from
, from_byte
);
2571 if (!depth
&& sexpflag
) goto done
;
2574 /* Ignore whitespace, punctuation, quote, endcomment. */
2579 /* Reached end of buffer. Error if within object, return nil if between */
2586 /* End of object reached */
2595 DEC_BOTH (from
, from_byte
);
2596 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2597 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2598 code
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2599 if (depth
== min_depth
)
2602 comnested
= SYNTAX_COMMENT_NESTED (c
);
2603 if (code
== Sendcomment
)
2604 comstyle
= SYNTAX_COMMENT_STYLE (c
);
2605 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
2606 && prev_char_comend_first (from
, from_byte
)
2607 && parse_sexp_ignore_comments
)
2609 /* We must record the comment style encountered so that
2610 later, we can match only the proper comment begin
2611 sequence of the same style. */
2612 DEC_BOTH (from
, from_byte
);
2613 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2615 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2616 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
2617 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2620 /* Quoting turns anything except a comment-ender
2621 into a word character. Note that this cannot be true
2622 if we decremented FROM in the if-statement above. */
2623 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
2625 DEC_BOTH (from
, from_byte
);
2628 else if (SYNTAX_PREFIX (c
))
2631 switch (SWITCH_ENUM_CAST (code
))
2637 if (depth
|| !sexpflag
) break;
2638 /* This word counts as a sexp; count object finished
2639 after passing it. */
2642 temp_pos
= from_byte
;
2643 if (! NILP (current_buffer
->enable_multibyte_characters
))
2647 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2648 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2649 temp_code
= SYNTAX_WITH_MULTIBYTE_CHECK (c1
);
2650 /* Don't allow comment-end to be quoted. */
2651 if (temp_code
== Sendcomment
)
2653 quoted
= char_quoted (from
- 1, temp_pos
);
2656 DEC_BOTH (from
, from_byte
);
2657 temp_pos
= dec_bytepos (temp_pos
);
2658 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2660 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2661 temp_code
= SYNTAX_WITH_MULTIBYTE_CHECK (c1
);
2662 if (! (quoted
|| temp_code
== Sword
2663 || temp_code
== Ssymbol
2664 || temp_code
== Squote
))
2666 DEC_BOTH (from
, from_byte
);
2673 temp_pos
= dec_bytepos (from_byte
);
2674 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2675 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (temp_pos
))
2676 DEC_BOTH (from
, from_byte
);
2685 if (!++depth
) goto done2
;
2690 if (!--depth
) goto done2
;
2691 if (depth
< min_depth
)
2692 xsignal3 (Qscan_error
,
2693 build_string ("Containing expression ends prematurely"),
2694 make_number (last_good
), make_number (from
));
2698 if (!parse_sexp_ignore_comments
)
2700 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2701 &out_charpos
, &out_bytepos
);
2702 /* FIXME: if found == -1, then it really wasn't a comment-end.
2703 For single-char Sendcomment, we can't do much about it apart
2704 from skipping the char.
2705 For 2-char endcomments, we could try again, taking both
2706 chars as separate entities, but it's a lot of trouble
2707 for very little gain, so we don't bother either. -sm */
2709 from
= out_charpos
, from_byte
= out_bytepos
;
2712 case Scomment_fence
:
2718 DEC_BOTH (from
, from_byte
);
2719 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2720 if (!char_quoted (from
, from_byte
)
2721 && (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2722 SYNTAX_WITH_MULTIBYTE_CHECK (c
) == code
))
2725 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
2729 stringterm
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2734 DEC_BOTH (from
, from_byte
);
2735 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2736 if (!char_quoted (from
, from_byte
)
2738 == (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
)))
2739 && SYNTAX_WITH_MULTIBYTE_CHECK (c
) == Sstring
)
2742 if (!depth
&& sexpflag
) goto done2
;
2745 /* Ignore whitespace, punctuation, quote, endcomment. */
2750 /* Reached start of buffer. Error if within object, return nil if between */
2763 XSETFASTINT (val
, from
);
2767 xsignal3 (Qscan_error
,
2768 build_string ("Unbalanced parentheses"),
2769 make_number (last_good
), make_number (from
));
2772 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
2773 doc
: /* Scan from character number FROM by COUNT lists.
2774 Returns the character number of the position thus found.
2776 If DEPTH is nonzero, paren depth begins counting from that value,
2777 only places where the depth in parentheses becomes zero
2778 are candidates for stopping; COUNT such places are counted.
2779 Thus, a positive value for DEPTH means go out levels.
2781 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2783 If the beginning or end of (the accessible part of) the buffer is reached
2784 and the depth is wrong, an error is signaled.
2785 If the depth is right but the count is not used up, nil is returned. */)
2786 (Lisp_Object from
, Lisp_Object count
, Lisp_Object depth
)
2788 CHECK_NUMBER (from
);
2789 CHECK_NUMBER (count
);
2790 CHECK_NUMBER (depth
);
2792 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
2795 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
2796 doc
: /* Scan from character number FROM by COUNT balanced expressions.
2797 If COUNT is negative, scan backwards.
2798 Returns the character number of the position thus found.
2800 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2802 If the beginning or end of (the accessible part of) the buffer is reached
2803 in the middle of a parenthetical grouping, an error is signaled.
2804 If the beginning or end is reached between groupings
2805 but before count is used up, nil is returned. */)
2806 (Lisp_Object from
, Lisp_Object count
)
2808 CHECK_NUMBER (from
);
2809 CHECK_NUMBER (count
);
2811 return scan_lists (XINT (from
), XINT (count
), 0, 1);
2814 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
2816 doc
: /* Move point backward over any number of chars with prefix syntax.
2817 This includes chars with "quote" or "prefix" syntax (' or p). */)
2822 int opoint_byte
= PT_BYTE
;
2824 int pos_byte
= PT_BYTE
;
2829 SET_PT_BOTH (opoint
, opoint_byte
);
2834 SETUP_SYNTAX_TABLE (pos
, -1);
2836 DEC_BOTH (pos
, pos_byte
);
2838 while (!char_quoted (pos
, pos_byte
)
2839 /* Previous statement updates syntax table. */
2840 && ((c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
), SYNTAX (c
) == Squote
)
2841 || SYNTAX_PREFIX (c
)))
2844 opoint_byte
= pos_byte
;
2847 DEC_BOTH (pos
, pos_byte
);
2850 SET_PT_BOTH (opoint
, opoint_byte
);
2855 /* Parse forward from FROM / FROM_BYTE to END,
2856 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2857 and return a description of the state of the parse at END.
2858 If STOPBEFORE is nonzero, stop at the start of an atom.
2859 If COMMENTSTOP is 1, stop at the start of a comment.
2860 If COMMENTSTOP is -1, stop at the start or end of a comment,
2861 after the beginning of a string, or after the end of a string. */
2864 scan_sexps_forward (struct lisp_parse_state
*stateptr
,
2865 EMACS_INT from
, EMACS_INT from_byte
, EMACS_INT end
,
2866 int targetdepth
, int stopbefore
,
2867 Lisp_Object oldstate
, int commentstop
)
2869 struct lisp_parse_state state
;
2871 register enum syntaxcode code
;
2874 struct level
{ int last
, prev
; };
2875 struct level levelstart
[100];
2876 register struct level
*curlevel
= levelstart
;
2877 struct level
*endlevel
= levelstart
+ 100;
2878 register int depth
; /* Paren depth of current scanning location.
2879 level - levelstart equals this except
2880 when the depth becomes negative. */
2881 int mindepth
; /* Lowest DEPTH value seen. */
2882 int start_quoted
= 0; /* Nonzero means starting after a char quote */
2884 EMACS_INT prev_from
; /* Keep one character before FROM. */
2885 EMACS_INT prev_from_byte
;
2886 int prev_from_syntax
;
2887 int boundary_stop
= commentstop
== -1;
2890 EMACS_INT out_bytepos
, out_charpos
;
2894 prev_from_byte
= from_byte
;
2896 DEC_BOTH (prev_from
, prev_from_byte
);
2898 /* Use this macro instead of `from++'. */
2900 do { prev_from = from; \
2901 prev_from_byte = from_byte; \
2902 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
2903 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2904 INC_BOTH (from, from_byte); \
2906 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2912 if (NILP (oldstate
))
2915 state
.instring
= -1;
2916 state
.incomment
= 0;
2917 state
.comstyle
= 0; /* comment style a by default. */
2918 state
.comstr_start
= -1; /* no comment/string seen. */
2922 tem
= Fcar (oldstate
);
2928 oldstate
= Fcdr (oldstate
);
2929 oldstate
= Fcdr (oldstate
);
2930 oldstate
= Fcdr (oldstate
);
2931 tem
= Fcar (oldstate
);
2932 /* Check whether we are inside string_fence-style string: */
2933 state
.instring
= (!NILP (tem
)
2934 ? (INTEGERP (tem
) ? XINT (tem
) : ST_STRING_STYLE
)
2937 oldstate
= Fcdr (oldstate
);
2938 tem
= Fcar (oldstate
);
2939 state
.incomment
= (!NILP (tem
)
2940 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
2943 oldstate
= Fcdr (oldstate
);
2944 tem
= Fcar (oldstate
);
2945 start_quoted
= !NILP (tem
);
2947 /* if the eighth element of the list is nil, we are in comment
2948 style a. If it is non-nil, we are in comment style b */
2949 oldstate
= Fcdr (oldstate
);
2950 oldstate
= Fcdr (oldstate
);
2951 tem
= Fcar (oldstate
);
2952 state
.comstyle
= NILP (tem
) ? 0 : (EQ (tem
, Qsyntax_table
)
2953 ? ST_COMMENT_STYLE
: 1);
2955 oldstate
= Fcdr (oldstate
);
2956 tem
= Fcar (oldstate
);
2957 state
.comstr_start
= NILP (tem
) ? -1 : XINT (tem
) ;
2958 oldstate
= Fcdr (oldstate
);
2959 tem
= Fcar (oldstate
);
2960 while (!NILP (tem
)) /* >= second enclosing sexps. */
2962 /* curlevel++->last ran into compiler bug on Apollo */
2963 curlevel
->last
= XINT (Fcar (tem
));
2964 if (++curlevel
== endlevel
)
2965 curlevel
--; /* error ("Nesting too deep for parser"); */
2966 curlevel
->prev
= -1;
2967 curlevel
->last
= -1;
2974 curlevel
->prev
= -1;
2975 curlevel
->last
= -1;
2977 SETUP_SYNTAX_TABLE (prev_from
, 1);
2978 temp
= FETCH_CHAR (prev_from_byte
);
2979 prev_from_syntax
= SYNTAX_WITH_FLAGS (temp
);
2980 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2982 /* Enter the loop at a place appropriate for initial state. */
2984 if (state
.incomment
)
2985 goto startincomment
;
2986 if (state
.instring
>= 0)
2988 nofence
= state
.instring
!= ST_STRING_STYLE
;
2990 goto startquotedinstring
;
2993 else if (start_quoted
)
2999 code
= prev_from_syntax
& 0xff;
3002 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
)
3003 && (c1
= FETCH_CHAR (from_byte
),
3004 SYNTAX_COMSTART_SECOND (c1
)))
3005 /* Duplicate code to avoid a complex if-expression
3006 which causes trouble for the SGI compiler. */
3008 /* Record the comment style we have entered so that only
3009 the comment-end sequence of the same style actually
3010 terminates the comment section. */
3011 state
.comstyle
= SYNTAX_COMMENT_STYLE (c1
);
3012 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
);
3013 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
3014 state
.incomment
= comnested
? 1 : -1;
3015 state
.comstr_start
= prev_from
;
3019 else if (code
== Scomment_fence
)
3021 /* Record the comment style we have entered so that only
3022 the comment-end sequence of the same style actually
3023 terminates the comment section. */
3024 state
.comstyle
= ST_COMMENT_STYLE
;
3025 state
.incomment
= -1;
3026 state
.comstr_start
= prev_from
;
3029 else if (code
== Scomment
)
3031 state
.comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax
);
3032 state
.incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
3034 state
.comstr_start
= prev_from
;
3037 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
3039 switch (SWITCH_ENUM_CAST (code
))
3043 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3044 curlevel
->last
= prev_from
;
3046 if (from
== end
) goto endquoted
;
3049 /* treat following character as a word constituent */
3052 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3053 curlevel
->last
= prev_from
;
3057 /* Some compilers can't handle this inside the switch. */
3058 temp
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3059 temp
= SYNTAX (temp
);
3065 if (from
== end
) goto endquoted
;
3077 curlevel
->prev
= curlevel
->last
;
3080 case Scomment_fence
: /* Can't happen because it's handled above. */
3082 if (commentstop
|| boundary_stop
) goto done
;
3084 /* The (from == BEGV) test was to enter the loop in the middle so
3085 that we find a 2-char comment ender even if we start in the
3086 middle of it. We don't want to do that if we're just at the
3087 beginning of the comment (think of (*) ... (*)). */
3088 found
= forw_comment (from
, from_byte
, end
,
3089 state
.incomment
, state
.comstyle
,
3090 (from
== BEGV
|| from
< state
.comstr_start
+ 3)
3091 ? 0 : prev_from_syntax
,
3092 &out_charpos
, &out_bytepos
, &state
.incomment
);
3093 from
= out_charpos
; from_byte
= out_bytepos
;
3094 /* Beware! prev_from and friends are invalid now.
3095 Luckily, the `done' doesn't use them and the INC_FROM
3096 sets them to a sane value without looking at them. */
3097 if (!found
) goto done
;
3099 state
.incomment
= 0;
3100 state
.comstyle
= 0; /* reset the comment style */
3101 if (boundary_stop
) goto done
;
3105 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3107 /* curlevel++->last ran into compiler bug on Apollo */
3108 curlevel
->last
= prev_from
;
3109 if (++curlevel
== endlevel
)
3110 curlevel
--; /* error ("Nesting too deep for parser"); */
3111 curlevel
->prev
= -1;
3112 curlevel
->last
= -1;
3113 if (targetdepth
== depth
) goto done
;
3118 if (depth
< mindepth
)
3120 if (curlevel
!= levelstart
)
3122 curlevel
->prev
= curlevel
->last
;
3123 if (targetdepth
== depth
) goto done
;
3128 state
.comstr_start
= from
- 1;
3129 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3130 curlevel
->last
= prev_from
;
3131 state
.instring
= (code
== Sstring
3132 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte
))
3134 if (boundary_stop
) goto done
;
3137 nofence
= state
.instring
!= ST_STRING_STYLE
;
3143 if (from
>= end
) goto done
;
3144 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3145 /* Some compilers can't handle this inside the switch. */
3148 /* Check TEMP here so that if the char has
3149 a syntax-table property which says it is NOT
3150 a string character, it does not end the string. */
3151 if (nofence
&& c
== state
.instring
&& temp
== Sstring
)
3157 if (!nofence
) goto string_end
;
3162 startquotedinstring
:
3163 if (from
>= end
) goto endquoted
;
3169 state
.instring
= -1;
3170 curlevel
->prev
= curlevel
->last
;
3172 if (boundary_stop
) goto done
;
3176 /* FIXME: We should do something with it. */
3179 /* Ignore whitespace, punctuation, quote, endcomment. */
3185 stop
: /* Here if stopping before start of sexp. */
3186 from
= prev_from
; /* We have just fetched the char that starts it; */
3187 goto done
; /* but return the position before it. */
3192 state
.depth
= depth
;
3193 state
.mindepth
= mindepth
;
3194 state
.thislevelstart
= curlevel
->prev
;
3195 state
.prevlevelstart
3196 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
3197 state
.location
= from
;
3198 state
.levelstarts
= Qnil
;
3199 while (--curlevel
>= levelstart
)
3200 state
.levelstarts
= Fcons (make_number (curlevel
->last
),
3207 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
3208 doc
: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3209 Parsing stops at TO or when certain criteria are met;
3210 point is set to where parsing stops.
3211 If fifth arg OLDSTATE is omitted or nil,
3212 parsing assumes that FROM is the beginning of a function.
3213 Value is a list of elements describing final state of parsing:
3215 1. character address of start of innermost containing list; nil if none.
3216 2. character address of start of last complete sexp terminated.
3217 3. non-nil if inside a string.
3218 (it is the character that will terminate the string,
3219 or t if the string should be terminated by a generic string delimiter.)
3220 4. nil if outside a comment, t if inside a non-nestable comment,
3221 else an integer (the current comment nesting).
3222 5. t if following a quote character.
3223 6. the minimum paren-depth encountered during this scan.
3224 7. t if in a comment of style b; symbol `syntax-table' if the comment
3225 should be terminated by a generic comment delimiter.
3226 8. character address of start of comment or string; nil if not in one.
3227 9. Intermediate data for continuation of parsing (subject to change).
3228 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3229 in parentheses becomes equal to TARGETDEPTH.
3230 Fourth arg STOPBEFORE non-nil means stop when come to
3231 any character that starts a sexp.
3232 Fifth arg OLDSTATE is a list like what this function returns.
3233 It is used to initialize the state of the parse. Elements number 1, 2, 6
3235 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3236 If it is symbol `syntax-table', stop after the start of a comment or a
3237 string, or after end of a comment or a string. */)
3238 (Lisp_Object from
, Lisp_Object to
, Lisp_Object targetdepth
, Lisp_Object stopbefore
, Lisp_Object oldstate
, Lisp_Object commentstop
)
3240 struct lisp_parse_state state
;
3243 if (!NILP (targetdepth
))
3245 CHECK_NUMBER (targetdepth
);
3246 target
= XINT (targetdepth
);
3249 target
= -100000; /* We won't reach this depth */
3251 validate_region (&from
, &to
);
3252 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
3254 target
, !NILP (stopbefore
), oldstate
,
3256 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
3258 SET_PT (state
.location
);
3260 return Fcons (make_number (state
.depth
),
3261 Fcons (state
.prevlevelstart
< 0 ? Qnil
: make_number (state
.prevlevelstart
),
3262 Fcons (state
.thislevelstart
< 0 ? Qnil
: make_number (state
.thislevelstart
),
3263 Fcons (state
.instring
>= 0
3264 ? (state
.instring
== ST_STRING_STYLE
3265 ? Qt
: make_number (state
.instring
)) : Qnil
,
3266 Fcons (state
.incomment
< 0 ? Qt
:
3267 (state
.incomment
== 0 ? Qnil
:
3268 make_number (state
.incomment
)),
3269 Fcons (state
.quoted
? Qt
: Qnil
,
3270 Fcons (make_number (state
.mindepth
),
3271 Fcons ((state
.comstyle
3272 ? (state
.comstyle
== ST_COMMENT_STYLE
3273 ? Qsyntax_table
: Qt
) :
3275 Fcons (((state
.incomment
3276 || (state
.instring
>= 0))
3277 ? make_number (state
.comstr_start
)
3279 Fcons (state
.levelstarts
, Qnil
))))))))));
3283 init_syntax_once (void)
3288 /* This has to be done here, before we call Fmake_char_table. */
3289 Qsyntax_table
= intern_c_string ("syntax-table");
3290 staticpro (&Qsyntax_table
);
3292 /* Intern_C_String this now in case it isn't already done.
3293 Setting this variable twice is harmless.
3294 But don't staticpro it here--that is done in alloc.c. */
3295 Qchar_table_extra_slots
= intern_c_string ("char-table-extra-slots");
3297 /* Create objects which can be shared among syntax tables. */
3298 Vsyntax_code_object
= Fmake_vector (make_number (Smax
), Qnil
);
3299 for (i
= 0; i
< XVECTOR (Vsyntax_code_object
)->size
; i
++)
3300 XVECTOR (Vsyntax_code_object
)->contents
[i
]
3301 = Fcons (make_number (i
), Qnil
);
3303 /* Now we are ready to set up this property, so we can
3304 create syntax tables. */
3305 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
3307 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Swhitespace
];
3309 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
3311 /* Control characters should not be whitespace. */
3312 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Spunct
];
3313 for (i
= 0; i
<= ' ' - 1; i
++)
3314 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3315 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 0177, temp
);
3317 /* Except that a few really are whitespace. */
3318 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Swhitespace
];
3319 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ' ', temp
);
3320 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\t', temp
);
3321 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\n', temp
);
3322 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 015, temp
);
3323 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 014, temp
);
3325 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Sword
];
3326 for (i
= 'a'; i
<= 'z'; i
++)
3327 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3328 for (i
= 'A'; i
<= 'Z'; i
++)
3329 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3330 for (i
= '0'; i
<= '9'; i
++)
3331 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3333 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
3334 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
3336 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
3337 Fcons (make_number (Sopen
), make_number (')')));
3338 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
3339 Fcons (make_number (Sclose
), make_number ('(')));
3340 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
3341 Fcons (make_number (Sopen
), make_number (']')));
3342 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
3343 Fcons (make_number (Sclose
), make_number ('[')));
3344 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
3345 Fcons (make_number (Sopen
), make_number ('}')));
3346 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
3347 Fcons (make_number (Sclose
), make_number ('{')));
3348 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
3349 Fcons (make_number ((int) Sstring
), Qnil
));
3350 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
3351 Fcons (make_number ((int) Sescape
), Qnil
));
3353 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Ssymbol
];
3354 for (i
= 0; i
< 10; i
++)
3356 c
= "_-+*/&|<>="[i
];
3357 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3360 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Spunct
];
3361 for (i
= 0; i
< 12; i
++)
3363 c
= ".,;:?!#@~^'`"[i
];
3364 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3367 /* All multibyte characters have syntax `word' by default. */
3368 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Sword
];
3369 char_table_set_range (Vstandard_syntax_table
, 0x80, MAX_CHAR
, temp
);
3373 syms_of_syntax (void)
3375 Qsyntax_table_p
= intern_c_string ("syntax-table-p");
3376 staticpro (&Qsyntax_table_p
);
3378 staticpro (&Vsyntax_code_object
);
3380 staticpro (&gl_state
.object
);
3381 staticpro (&gl_state
.global_code
);
3382 staticpro (&gl_state
.current_syntax_table
);
3383 staticpro (&gl_state
.old_prop
);
3385 /* Defined in regex.c */
3386 staticpro (&re_match_object
);
3388 Qscan_error
= intern_c_string ("scan-error");
3389 staticpro (&Qscan_error
);
3390 Fput (Qscan_error
, Qerror_conditions
,
3391 pure_cons (Qscan_error
, pure_cons (Qerror
, Qnil
)));
3392 Fput (Qscan_error
, Qerror_message
,
3393 make_pure_c_string ("Scan error"));
3395 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments
,
3396 doc
: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3398 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties
,
3399 doc
: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3400 Otherwise, that text property is simply ignored.
3401 See the info node `(elisp)Syntax Properties' for a description of the
3402 `syntax-table' property. */);
3404 words_include_escapes
= 0;
3405 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes
,
3406 doc
: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3408 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol
,
3409 doc
: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3410 multibyte_syntax_as_symbol
= 0;
3412 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3413 &open_paren_in_column_0_is_defun_start
,
3414 doc
: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
3415 open_paren_in_column_0_is_defun_start
= 1;
3418 DEFVAR_LISP ("find-word-boundary-function-table",
3419 &Vfind_word_boundary_function_table
,
3421 Char table of functions to search for the word boundary.
3422 Each function is called with two arguments; POS and LIMIT.
3423 POS and LIMIT are character positions in the current buffer.
3425 If POS is less than LIMIT, POS is at the first character of a word,
3426 and the return value of a function is a position after the last
3427 character of that word.
3429 If POS is not less than LIMIT, POS is at the last character of a word,
3430 and the return value of a function is a position at the first
3431 character of that word.
3433 In both cases, LIMIT bounds the search. */);
3434 Vfind_word_boundary_function_table
= Fmake_char_table (Qnil
, Qnil
);
3436 defsubr (&Ssyntax_table_p
);
3437 defsubr (&Ssyntax_table
);
3438 defsubr (&Sstandard_syntax_table
);
3439 defsubr (&Scopy_syntax_table
);
3440 defsubr (&Sset_syntax_table
);
3441 defsubr (&Schar_syntax
);
3442 defsubr (&Smatching_paren
);
3443 defsubr (&Sstring_to_syntax
);
3444 defsubr (&Smodify_syntax_entry
);
3445 defsubr (&Sinternal_describe_syntax_value
);
3447 defsubr (&Sforward_word
);
3449 defsubr (&Sskip_chars_forward
);
3450 defsubr (&Sskip_chars_backward
);
3451 defsubr (&Sskip_syntax_forward
);
3452 defsubr (&Sskip_syntax_backward
);
3454 defsubr (&Sforward_comment
);
3455 defsubr (&Sscan_lists
);
3456 defsubr (&Sscan_sexps
);
3457 defsubr (&Sbackward_prefix_chars
);
3458 defsubr (&Sparse_partial_sexp
);
3461 /* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3462 (do not change this comment) */