Merge from trunk.
[emacs.git] / src / syntax.h
blob42d689cb96ce83a5e4fe4d2ffa45a2978eed2bfa
1 /* Declarations having to do with GNU Emacs syntax tables.
3 Copyright (C) 1985, 1993-1994, 1997-1998, 2001-2011
4 Free Software Foundation, Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
22 extern void update_syntax_table (EMACS_INT, EMACS_INT, int, Lisp_Object);
24 /* The standard syntax table is stored where it will automatically
25 be used in all new buffers. */
26 #define Vstandard_syntax_table BVAR (&buffer_defaults, syntax_table)
28 /* A syntax table is a chartable whose elements are cons cells
29 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
30 is not a kind of parenthesis.
32 The low 8 bits of CODE+FLAGS is a code, as follows: */
34 enum syntaxcode
36 Swhitespace, /* for a whitespace character */
37 Spunct, /* for random punctuation characters */
38 Sword, /* for a word constituent */
39 Ssymbol, /* symbol constituent but not word constituent */
40 Sopen, /* for a beginning delimiter */
41 Sclose, /* for an ending delimiter */
42 Squote, /* for a prefix character like Lisp ' */
43 Sstring, /* for a string-grouping character like Lisp " */
44 Smath, /* for delimiters like $ in Tex. */
45 Sescape, /* for a character that begins a C-style escape */
46 Scharquote, /* for a character that quotes the following character */
47 Scomment, /* for a comment-starting character */
48 Sendcomment, /* for a comment-ending character */
49 Sinherit, /* use the standard syntax table for this character */
50 Scomment_fence, /* Starts/ends comment which is delimited on the
51 other side by any char with the same syntaxcode. */
52 Sstring_fence, /* Starts/ends string which is delimited on the
53 other side by any char with the same syntaxcode. */
54 Smax /* Upper bound on codes that are meaningful */
57 /* Set the syntax entry VAL for char C in table TABLE. */
59 #define SET_RAW_SYNTAX_ENTRY(table, c, val) \
60 CHAR_TABLE_SET ((table), c, (val))
62 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
63 RANGE is a cons (FROM . TO) specifying the range of characters. */
65 #define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
66 Fset_char_table_range ((table), (range), (val))
68 /* SYNTAX_ENTRY fetches the information from the entry for character C
69 in syntax table TABLE, or from globally kept data (gl_state).
70 Does inheritance. */
71 /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
72 position, it is either the buffer's syntax table, or syntax table
73 found in text properties. */
75 #ifdef SYNTAX_ENTRY_VIA_PROPERTY
76 # define SYNTAX_ENTRY(c) \
77 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
78 # define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
79 #else
80 # define SYNTAX_ENTRY SYNTAX_ENTRY_INT
81 # define CURRENT_SYNTAX_TABLE BVAR (current_buffer, syntax_table)
82 #endif
84 #define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
86 /* Extract the information from the entry for character C
87 in the current syntax table. */
89 #ifdef __GNUC__
90 #define SYNTAX(c) \
91 ({ Lisp_Object _syntax_temp; \
92 _syntax_temp = SYNTAX_ENTRY (c); \
93 (CONSP (_syntax_temp) \
94 ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \
95 : Swhitespace); })
97 #define SYNTAX_WITH_FLAGS(c) \
98 ({ Lisp_Object _syntax_temp; \
99 _syntax_temp = SYNTAX_ENTRY (c); \
100 (CONSP (_syntax_temp) \
101 ? XINT (XCAR (_syntax_temp)) \
102 : (int) Swhitespace); })
104 #define SYNTAX_MATCH(c) \
105 ({ Lisp_Object _syntax_temp; \
106 _syntax_temp = SYNTAX_ENTRY (c); \
107 (CONSP (_syntax_temp) \
108 ? XCDR (_syntax_temp) \
109 : Qnil); })
110 #else
111 extern Lisp_Object syntax_temp;
112 #define SYNTAX(c) \
113 (syntax_temp = SYNTAX_ENTRY ((c)), \
114 (CONSP (syntax_temp) \
115 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
116 : Swhitespace))
118 #define SYNTAX_WITH_FLAGS(c) \
119 (syntax_temp = SYNTAX_ENTRY ((c)), \
120 (CONSP (syntax_temp) \
121 ? XINT (XCAR (syntax_temp)) \
122 : (int) Swhitespace))
124 #define SYNTAX_MATCH(c) \
125 (syntax_temp = SYNTAX_ENTRY ((c)), \
126 (CONSP (syntax_temp) \
127 ? XCDR (syntax_temp) \
128 : Qnil))
129 #endif
132 /* Whether the syntax of the character C has the prefix flag set. */
133 extern int syntax_prefix_flag_p (int c);
135 /* This array, indexed by a character, contains the syntax code which that
136 character signifies (as a char). For example,
137 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
139 extern unsigned char syntax_spec_code[0400];
141 /* Indexed by syntax code, give the letter that describes it. */
143 extern char syntax_code_spec[16];
145 /* Convert the byte offset BYTEPOS into a character position,
146 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
148 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
149 These macros do nothing when parse_sexp_lookup_properties is 0,
150 so we return 0 in that case, for speed. */
152 #define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
153 (! parse_sexp_lookup_properties \
154 ? 0 \
155 : STRINGP (gl_state.object) \
156 ? string_byte_to_char (gl_state.object, (bytepos)) \
157 : BUFFERP (gl_state.object) \
158 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
159 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
160 : NILP (gl_state.object) \
161 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
162 : (bytepos))
164 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
165 currently good for a position before CHARPOS. */
167 #define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
168 (parse_sexp_lookup_properties \
169 && (charpos) >= gl_state.e_property \
170 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
171 gl_state.object), \
172 1) \
173 : 0)
175 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
176 currently good for a position after CHARPOS. */
178 #define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
179 (parse_sexp_lookup_properties \
180 && (charpos) < gl_state.b_property \
181 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
182 gl_state.object), \
183 1) \
184 : 0)
186 /* Make syntax table good for CHARPOS. */
188 #define UPDATE_SYNTAX_TABLE(charpos) \
189 (parse_sexp_lookup_properties \
190 && (charpos) < gl_state.b_property \
191 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
192 gl_state.object), \
193 1) \
194 : (parse_sexp_lookup_properties \
195 && (charpos) >= gl_state.e_property \
196 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
197 gl_state.object), \
198 1) \
199 : 0))
201 /* This macro sets up the buffer-global syntax table. */
202 #define SETUP_BUFFER_SYNTAX_TABLE() \
203 do \
205 gl_state.use_global = 0; \
206 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);\
207 } while (0)
209 /* This macro should be called with FROM at the start of forward
210 search, or after the last position of the backward search. It
211 makes sure that the first char is picked up with correct table, so
212 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
213 call.
214 Sign of COUNT gives the direction of the search.
217 #define SETUP_SYNTAX_TABLE(FROM, COUNT) \
218 do \
220 SETUP_BUFFER_SYNTAX_TABLE (); \
221 gl_state.b_property = BEGV; \
222 gl_state.e_property = ZV + 1; \
223 gl_state.object = Qnil; \
224 gl_state.offset = 0; \
225 if (parse_sexp_lookup_properties) \
226 if ((COUNT) > 0 || (FROM) > BEGV) \
227 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
228 1, Qnil); \
230 while (0)
232 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
233 If it is t (which is only used in fast_c_string_match_ignore_case),
234 ignore properties altogether.
236 This is meant for regex.c to use. For buffers, regex.c passes arguments
237 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
238 So if it is a buffer, we set the offset field to BEGV. */
240 #define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
241 do \
243 SETUP_BUFFER_SYNTAX_TABLE (); \
244 gl_state.object = (OBJECT); \
245 if (BUFFERP (gl_state.object)) \
247 struct buffer *buf = XBUFFER (gl_state.object); \
248 gl_state.b_property = 1; \
249 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
250 gl_state.offset = BUF_BEGV (buf) - 1; \
252 else if (NILP (gl_state.object)) \
254 gl_state.b_property = 1; \
255 gl_state.e_property = ZV - BEGV + 1; \
256 gl_state.offset = BEGV - 1; \
258 else if (EQ (gl_state.object, Qt)) \
260 gl_state.b_property = 0; \
261 gl_state.e_property = MOST_POSITIVE_FIXNUM; \
262 gl_state.offset = 0; \
264 else \
266 gl_state.b_property = 0; \
267 gl_state.e_property = 1 + SCHARS (gl_state.object); \
268 gl_state.offset = 0; \
270 if (parse_sexp_lookup_properties) \
271 update_syntax_table (((FROM) + gl_state.offset \
272 + (COUNT > 0 ? 0 : -1)), \
273 COUNT, 1, gl_state.object); \
275 while (0)
277 struct gl_state_s
279 Lisp_Object object; /* The object we are scanning. */
280 EMACS_INT start; /* Where to stop. */
281 EMACS_INT stop; /* Where to stop. */
282 int use_global; /* Whether to use global_code
283 or c_s_t. */
284 Lisp_Object global_code; /* Syntax code of current char. */
285 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
286 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
287 EMACS_INT b_property; /* First index where c_s_t is valid. */
288 EMACS_INT e_property; /* First index where c_s_t is
289 not valid. */
290 INTERVAL forward_i; /* Where to start lookup on forward */
291 INTERVAL backward_i; /* or backward movement. The
292 data in c_s_t is valid
293 between these intervals,
294 and possibly at the
295 intervals too, depending
296 on: */
297 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
298 EMACS_INT offset;
301 extern struct gl_state_s gl_state;
302 extern EMACS_INT scan_words (EMACS_INT, EMACS_INT);