Merge from trunk.
[emacs.git] / src / syntax.h
blob9eaf553f2e5bf3b53ec76976a1953cef7636459b
1 /* Declarations having to do with GNU Emacs syntax tables.
2 Copyright (C) 1985, 1993, 1994, 1997, 1998, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 extern Lisp_Object Qsyntax_table_p;
22 extern void update_syntax_table (int, int, int, Lisp_Object);
24 /* The standard syntax table is stored where it will automatically
25 be used in all new buffers. */
26 #define Vstandard_syntax_table buffer_defaults.syntax_table
28 /* A syntax table is a chartable whose elements are cons cells
29 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
30 is not a kind of parenthesis.
32 The low 8 bits of CODE+FLAGS is a code, as follows: */
34 enum syntaxcode
36 Swhitespace, /* for a whitespace character */
37 Spunct, /* for random punctuation characters */
38 Sword, /* for a word constituent */
39 Ssymbol, /* symbol constituent but not word constituent */
40 Sopen, /* for a beginning delimiter */
41 Sclose, /* for an ending delimiter */
42 Squote, /* for a prefix character like Lisp ' */
43 Sstring, /* for a string-grouping character like Lisp " */
44 Smath, /* for delimiters like $ in Tex. */
45 Sescape, /* for a character that begins a C-style escape */
46 Scharquote, /* for a character that quotes the following character */
47 Scomment, /* for a comment-starting character */
48 Sendcomment, /* for a comment-ending character */
49 Sinherit, /* use the standard syntax table for this character */
50 Scomment_fence, /* Starts/ends comment which is delimited on the
51 other side by any char with the same syntaxcode. */
52 Sstring_fence, /* Starts/ends string which is delimited on the
53 other side by any char with the same syntaxcode. */
54 Smax /* Upper bound on codes that are meaningful */
57 /* Set the syntax entry VAL for char C in table TABLE. */
59 #define SET_RAW_SYNTAX_ENTRY(table, c, val) \
60 CHAR_TABLE_SET ((table), c, (val))
62 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
63 RANGE is a cons (FROM . TO) specifying the range of characters. */
65 #define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
66 Fset_char_table_range ((table), (range), (val))
68 /* SYNTAX_ENTRY fetches the information from the entry for character C
69 in syntax table TABLE, or from globally kept data (gl_state).
70 Does inheritance. */
71 /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
72 position, it is either the buffer's syntax table, or syntax table
73 found in text properties. */
75 #ifdef SYNTAX_ENTRY_VIA_PROPERTY
76 # define SYNTAX_ENTRY(c) \
77 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
78 # define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
79 #else
80 # define SYNTAX_ENTRY SYNTAX_ENTRY_INT
81 # define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
82 #endif
84 #define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
86 /* Extract the information from the entry for character C
87 in the current syntax table. */
89 #ifdef __GNUC__
90 #define SYNTAX(c) \
91 ({ Lisp_Object _syntax_temp; \
92 _syntax_temp = SYNTAX_ENTRY (c); \
93 (CONSP (_syntax_temp) \
94 ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \
95 : Swhitespace); })
97 #define SYNTAX_WITH_FLAGS(c) \
98 ({ Lisp_Object _syntax_temp; \
99 _syntax_temp = SYNTAX_ENTRY (c); \
100 (CONSP (_syntax_temp) \
101 ? XINT (XCAR (_syntax_temp)) \
102 : (int) Swhitespace); })
104 #define SYNTAX_MATCH(c) \
105 ({ Lisp_Object _syntax_temp; \
106 _syntax_temp = SYNTAX_ENTRY (c); \
107 (CONSP (_syntax_temp) \
108 ? XCDR (_syntax_temp) \
109 : Qnil); })
110 #else
111 extern Lisp_Object syntax_temp;
112 #define SYNTAX(c) \
113 (syntax_temp = SYNTAX_ENTRY ((c)), \
114 (CONSP (syntax_temp) \
115 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
116 : Swhitespace))
118 #define SYNTAX_WITH_FLAGS(c) \
119 (syntax_temp = SYNTAX_ENTRY ((c)), \
120 (CONSP (syntax_temp) \
121 ? XINT (XCAR (syntax_temp)) \
122 : (int) Swhitespace))
124 #define SYNTAX_MATCH(c) \
125 (syntax_temp = SYNTAX_ENTRY ((c)), \
126 (CONSP (syntax_temp) \
127 ? XCDR (syntax_temp) \
128 : Qnil))
129 #endif
131 /* Then there are seven single-bit flags that have the following meanings:
132 1. This character is the first of a two-character comment-start sequence.
133 2. This character is the second of a two-character comment-start sequence.
134 3. This character is the first of a two-character comment-end sequence.
135 4. This character is the second of a two-character comment-end sequence.
136 5. This character is a prefix, for backward-prefix-chars.
137 6. see below
138 7. This character is part of a nestable comment sequence.
139 Note that any two-character sequence whose first character has flag 1
140 and whose second character has flag 2 will be interpreted as a comment start.
142 bit 6 is used to discriminate between two different comment styles.
143 Languages such as C++ allow two orthogonal syntax start/end pairs
144 and bit 6 is used to determine whether a comment-end or Scommentend
145 ends style a or b. Comment start sequences can start style a or b.
146 Style a is always the default.
149 /* These macros extract a particular flag for a given character. */
151 #define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
153 #define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
155 #define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
157 #define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
159 #define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
161 #define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
163 #define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
165 /* These macros extract specific flags from an integer
166 that holds the syntax code and the flags. */
168 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
170 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
172 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
174 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
176 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
178 #define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
180 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
182 /* This array, indexed by a character, contains the syntax code which that
183 character signifies (as a char). For example,
184 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
186 extern unsigned char syntax_spec_code[0400];
188 /* Indexed by syntax code, give the letter that describes it. */
190 extern char syntax_code_spec[16];
192 /* Convert the byte offset BYTEPOS into a character position,
193 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
195 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
196 These macros do nothing when parse_sexp_lookup_properties is 0,
197 so we return 0 in that case, for speed. */
199 #define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
200 (! parse_sexp_lookup_properties \
201 ? 0 \
202 : STRINGP (gl_state.object) \
203 ? string_byte_to_char (gl_state.object, (bytepos)) \
204 : BUFFERP (gl_state.object) \
205 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
206 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
207 : NILP (gl_state.object) \
208 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
209 : (bytepos))
211 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
212 currently good for a position before CHARPOS. */
214 #define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
215 (parse_sexp_lookup_properties \
216 && (charpos) >= gl_state.e_property \
217 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
218 gl_state.object), \
219 1) \
220 : 0)
222 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
223 currently good for a position after CHARPOS. */
225 #define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
226 (parse_sexp_lookup_properties \
227 && (charpos) < gl_state.b_property \
228 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
229 gl_state.object), \
230 1) \
231 : 0)
233 /* Make syntax table good for CHARPOS. */
235 #define UPDATE_SYNTAX_TABLE(charpos) \
236 (parse_sexp_lookup_properties \
237 && (charpos) < gl_state.b_property \
238 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
239 gl_state.object), \
240 1) \
241 : (parse_sexp_lookup_properties \
242 && (charpos) >= gl_state.e_property \
243 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
244 gl_state.object), \
245 1) \
246 : 0))
248 /* This macro sets up the buffer-global syntax table. */
249 #define SETUP_BUFFER_SYNTAX_TABLE() \
250 do \
252 gl_state.use_global = 0; \
253 gl_state.current_syntax_table = current_buffer->syntax_table; \
254 } while (0)
256 /* This macro should be called with FROM at the start of forward
257 search, or after the last position of the backward search. It
258 makes sure that the first char is picked up with correct table, so
259 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
260 call.
261 Sign of COUNT gives the direction of the search.
264 #define SETUP_SYNTAX_TABLE(FROM, COUNT) \
265 do \
267 SETUP_BUFFER_SYNTAX_TABLE (); \
268 gl_state.b_property = BEGV; \
269 gl_state.e_property = ZV + 1; \
270 gl_state.object = Qnil; \
271 gl_state.offset = 0; \
272 if (parse_sexp_lookup_properties) \
273 if ((COUNT) > 0 || (FROM) > BEGV) \
274 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
275 1, Qnil); \
277 while (0)
279 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
280 If it is t, ignore properties altogether.
282 This is meant for regex.c to use. For buffers, regex.c passes arguments
283 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
284 So if it is a buffer, we set the offset field to BEGV. */
286 #define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
287 do \
289 SETUP_BUFFER_SYNTAX_TABLE (); \
290 gl_state.object = (OBJECT); \
291 if (BUFFERP (gl_state.object)) \
293 struct buffer *buf = XBUFFER (gl_state.object); \
294 gl_state.b_property = 1; \
295 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
296 gl_state.offset = BUF_BEGV (buf) - 1; \
298 else if (NILP (gl_state.object)) \
300 gl_state.b_property = 1; \
301 gl_state.e_property = ZV - BEGV + 1; \
302 gl_state.offset = BEGV - 1; \
304 else if (EQ (gl_state.object, Qt)) \
306 gl_state.b_property = 0; \
307 gl_state.e_property = 1500000000; \
308 gl_state.offset = 0; \
310 else \
312 gl_state.b_property = 0; \
313 gl_state.e_property = 1 + SCHARS (gl_state.object); \
314 gl_state.offset = 0; \
316 if (parse_sexp_lookup_properties) \
317 update_syntax_table (((FROM) + gl_state.offset \
318 + (COUNT > 0 ? 0 : -1)), \
319 COUNT, 1, gl_state.object); \
321 while (0)
323 struct gl_state_s
325 Lisp_Object object; /* The object we are scanning. */
326 int start; /* Where to stop. */
327 int stop; /* Where to stop. */
328 int use_global; /* Whether to use global_code
329 or c_s_t. */
330 Lisp_Object global_code; /* Syntax code of current char. */
331 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
332 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
333 int b_property; /* First index where c_s_t is valid. */
334 int e_property; /* First index where c_s_t is
335 not valid. */
336 INTERVAL forward_i; /* Where to start lookup on forward */
337 INTERVAL backward_i; /* or backward movement. The
338 data in c_s_t is valid
339 between these intervals,
340 and possibly at the
341 intervals too, depending
342 on: */
343 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
344 int offset;
347 extern struct gl_state_s gl_state;
348 extern int parse_sexp_lookup_properties;
349 extern INTERVAL interval_of (int, Lisp_Object);
351 extern int scan_words (int, int);
353 /* arch-tag: 28833cca-cd73-4741-8c85-a3111166a0e0
354 (do not change this comment) */