Merge with git+ssh://pasky.or.cz/srv/git/elinks.git
[elinks.git] / src / dom / scanner.h
blob1f6a6f17e2c158295459bd0c903a214336f98b4f
1 #ifndef EL_DOM_SCANNER_H
2 #define EL_DOM_SCANNER_H
4 #include "dom/string.h"
5 #include "util/error.h"
7 /* Define if you want a talking scanner */
8 /* #define DEBUG_DOM_SCANNER */
10 /** DOM scanner token
12 * This struct describes one scanner state. There are two kinds of tokens: char
13 * and non-char tokens. Char tokens contains only one char and simply have
14 * their char value as type. They are tokens having special control meaning in
15 * the code, like ':', ';', '{', '}' and '*'. Non-char tokens have one or more
16 * chars and contain stuff like number or indentifier strings. */
17 struct dom_scanner_token {
18 /** The type the token. */
19 int type;
21 /** Some precedence value. */
22 int precedence;
24 /** The line number; used for error tokens. */
25 unsigned int lineno;
27 /** The start of the token string and the token length. */
28 struct dom_string string;
31 /** Skip the first charector of a token */
32 #define skip_dom_scanner_token_char(token) \
33 do { (token)->string.string++; (token)->string.length--; } while (0)
35 /** Compare the token string to a "static" string */
36 #define dom_scanner_token_contains(token, str) \
37 ((token)->string.length == (sizeof(str) - 1) \
38 && !strncasecmp((token)->string.string, str, sizeof(str) - 1))
41 struct dom_scan_table_info {
42 enum { DOM_SCAN_RANGE, DOM_SCAN_STRING, DOM_SCAN_END } type;
43 struct dom_string data;
44 int bits;
47 #define DOM_SCAN_TABLE_SIZE 256
49 #define DOM_SCAN_TABLE_INFO(type, data1, data2, bits) \
50 { (type), INIT_DOM_STRING((data1), (data2)), (bits) }
52 #define DOM_SCAN_TABLE_RANGE(from, to, bits) \
53 DOM_SCAN_TABLE_INFO(DOM_SCAN_RANGE, from, to, bits)
55 #define DOM_SCAN_TABLE_STRING(str, bits) \
56 DOM_SCAN_TABLE_INFO(DOM_SCAN_STRING, str, sizeof(str) - 1, bits)
58 #define DOM_SCAN_TABLE_END \
59 DOM_SCAN_TABLE_INFO(DOM_SCAN_END, NULL, 0, 0)
61 struct dom_scanner_string_mapping {
62 struct dom_string name;
63 int type;
64 int base_type;
67 #define DOM_STRING_MAP(str, type, family) \
68 { STATIC_DOM_STRING(str), (type), (family) }
70 #define DOM_STRING_MAP_END \
71 { INIT_DOM_STRING(NULL, 0), 0, 0 }
73 struct dom_scanner;
75 /** DOM scanner info
77 * Backend-specific information used during the actual scanning and
78 * by the front end to fill the token table on-demand, etc.
80 struct dom_scanner_info {
81 /** Table containing how to map strings to token types */
82 const struct dom_scanner_string_mapping *mappings;
84 /** Information for how to initialize the scanner table */
85 const struct dom_scan_table_info *scan_table_info;
87 /**
88 * Fills the scanner with tokens. Already scanned tokens which have not
89 * been requested remain and are moved to the start of the scanners
90 * token table. Returns the current token or NULL if there are none. */
91 struct dom_scanner_token *(*scan)(struct dom_scanner *scanner);
93 /**
94 * The scanner table. Contains bitmaps for the various characters
95 * groups. Idea sync'ed from mozilla browser. */
96 int scan_table[DOM_SCAN_TABLE_SIZE];
98 /** Has the scanner info been initialized? */
99 unsigned int initialized:1;
103 /** Initializes a DOM scanner
105 * See struct #dom_scanner for a description of the `int` flags. */
106 void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
107 struct dom_string *string, int state, int count_lines, int complete,
108 int check_complete, int detect_error);
110 /** The number of tokens in the scanners token table
112 * At best it should be big enough to contain properties with space separated
113 * values and function calls with up to 3 variables like rgb(). At worst it
114 * should be no less than 2 in order to be able to peek at the next token in
115 * the scanner. */
116 #define DOM_SCANNER_TOKENS 10
118 /** DOM scanner
120 * Holds the current state of the scanner. */
121 struct dom_scanner {
122 /** The start of the scanned string. */
123 unsigned char *string;
124 /** The end of the scanned string. */
125 unsigned char *end;
126 /** The current position in the sstring being scanned.
128 * The position in the string where to scan next and the end of the
129 * string. If position is NULL it means that no more tokens can be
130 * retrieved from the string. */
131 unsigned char *position;
133 /** The current token.
135 * If the number of scanned tokens is less than
136 * #DOM_SCANNER_TOKENS it is because there are no more tokens in
137 * the string. */
138 struct dom_scanner_token *current;
139 /** The number of scanned tokens left in the table. */
140 int tokens;
142 /** The 'meta' scanner information. */
143 struct dom_scanner_info *info;
145 #ifdef DEBUG_SCANNER
146 /* Debug info about the caller. */
147 unsigned char *file;
148 int line;
149 #endif
151 /* The following two flags are used when parsing is incremental and
152 * the scanner must ensure that only tokens that are complete are
153 * generated. */
154 unsigned int check_complete:1; /**< Only generate complete tokens */
155 unsigned int incomplete:1; /**< The scanned string is incomplete */
157 unsigned int detect_errors:1; /**< Check for markup errors */
158 unsigned int found_error; /**< Did we already report this error? */
160 unsigned int count_lines:1; /**< Is line counting enbaled? */
161 unsigned int lineno; /**< Line # of the last scanned token */
163 /** Some state indicator only meaningful to the scanner internals */
164 int state;
166 /** Token table.
168 * The table contain already scanned tokens. It is maintained in
169 * order to optimize the scanning a bit and make it possible to look
170 * ahead at the next token. You should always use the accessors
171 * (defined below) for getting tokens from the scanner. */
172 struct dom_scanner_token table[DOM_SCANNER_TOKENS];
175 /** Check if there are more tokens */
176 #define dom_scanner_has_tokens(scanner) \
177 ((scanner)->tokens > 0 && (scanner)->current < (scanner)->table + (scanner)->tokens)
179 /* This macro checks if the current scanner state is valid. Meaning if the
180 * scanners table is full the last token skipping or get_next_scanner_token()
181 * call made it possible to get the type of the next token. */
182 #define check_dom_scanner(scanner) \
183 (scanner->tokens < DOM_SCANNER_TOKENS \
184 || scanner->current + 1 < scanner->table + scanner->tokens)
187 /* Scanner table accessors and mutators */
189 /** Check the type of the next token */
190 #define check_next_dom_scanner_token(scanner, token_type) \
191 (scanner_has_tokens(scanner) \
192 && ((scanner)->current + 1 < (scanner)->table + (scanner)->tokens) \
193 && (scanner)->current[1].type == (token_type))
195 /** Get the current token */
196 static inline struct dom_scanner_token *
197 get_dom_scanner_token(struct dom_scanner *scanner)
199 return dom_scanner_has_tokens(scanner) ? scanner->current : NULL;
202 /** Get the next token
204 * Getting the next token might cause a rescan so any token pointers that has
205 * been stored in a local variable might not be valid after the call. */
206 /* Do a scanning if we do not also have access to next token. */
207 static inline struct dom_scanner_token *
208 get_next_dom_scanner_token(struct dom_scanner *scanner)
210 return (dom_scanner_has_tokens(scanner)
211 && (++scanner->current + 1 >= scanner->table + scanner->tokens)
212 ? scanner->info->scan(scanner) : get_dom_scanner_token(scanner));
215 /** Skip the current token */
216 #define skip_dom_scanner_token(scanner) get_next_dom_scanner_token(scanner)
218 /** Conditionally skip tokens
220 * Removes tokens from the scanner until it meets a token of the given type.
221 * This token will then also be skipped. */
222 struct dom_scanner_token *
223 skip_dom_scanner_tokens(struct dom_scanner *scanner, int skipto, int precedence);
225 /** Map a string to internal ID
227 * Looks up the string from ident to end to in the scanners string mapping
228 * table. */
230 map_dom_scanner_string(struct dom_scanner *scanner,
231 unsigned char *ident, unsigned char *end, int base_type);
233 #ifdef DEBUG_DOM_SCANNER
234 void dump_dom_scanner(struct dom_scanner *scanner);
235 #endif
237 /* The begin_token_scanning() and end_token_scanning() functions provide the
238 * basic setup and teardown for the rescan function made public via the
239 * scanner_info->scan member. */
241 /* Returns NULL if it is not necessary to try to scan for more tokens */
242 static inline struct dom_scanner_token *
243 begin_dom_token_scanning(struct dom_scanner *scanner)
245 struct dom_scanner_token *table = scanner->table;
246 struct dom_scanner_token *table_end = table + scanner->tokens;
247 int move_to_front = int_max(table_end - scanner->current, 0);
248 struct dom_scanner_token *current = move_to_front ? scanner->current : table;
249 size_t moved_size = 0;
251 assert(scanner->current);
253 /* Move any untouched tokens */
254 if (move_to_front) {
255 moved_size = move_to_front * sizeof(*table);
256 memmove(table, current, moved_size);
257 current = &table[move_to_front];
260 /* Clear all unused tokens */
261 memset(current, 0, sizeof(*table) * DOM_SCANNER_TOKENS - moved_size);
263 if (!scanner->position) {
264 scanner->tokens = move_to_front ? move_to_front : -1;
265 scanner->current = table;
266 assert(check_dom_scanner(scanner));
267 return NULL;
270 scanner->tokens = move_to_front;
272 return table;
275 /* Updates the @scanner struct after scanning has been done. The position
276 * _after_ the last valid token is taken as the @end argument. */
277 /* It is ok for @end to be < scanner->table since scanner->tokens will become
278 * <= 0 anyway. */
279 static inline struct dom_scanner_token *
280 end_dom_token_scanning(struct dom_scanner *scanner, struct dom_scanner_token *end)
282 assert(end <= scanner->table + DOM_SCANNER_TOKENS);
284 scanner->tokens = (end - scanner->table);
285 scanner->current = scanner->table;
286 if (scanner->position >= scanner->end)
287 scanner->position = NULL;
289 assert(check_dom_scanner(scanner));
291 return get_dom_scanner_token(scanner);
294 #endif