Elinks currently only support GB2312 as Chinese encoding, but GBK and
[elinks.git] / src / dom / scanner.c
blobc3cd1baa497e80610d2fd0e68b4a4cae77abf227
1 /* A pretty generic scanner */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <stdio.h>
8 #include <string.h>
10 #include "elinks.h"
12 #include "dom/scanner.h"
13 #include "dom/string.h"
14 #include "util/error.h"
17 int
18 map_dom_scanner_string(struct dom_scanner *scanner,
19 unsigned char *ident, unsigned char *end, int base_type)
21 const struct dom_scanner_string_mapping *mappings = scanner->info->mappings;
22 struct dom_string name = INIT_DOM_STRING(ident, end - ident);
24 for (; is_dom_string_set(&mappings->name); mappings++) {
25 if (mappings->base_type == base_type
26 && !dom_string_casecmp(&mappings->name, &name))
27 return mappings->type;
30 return base_type;
34 struct dom_scanner_token *
35 skip_dom_scanner_tokens(struct dom_scanner *scanner, int skipto, int precedence)
37 struct dom_scanner_token *token = get_dom_scanner_token(scanner);
39 /* Skip tokens while handling some basic precedens of special chars
40 * so we don't skip to long. */
41 while (token) {
42 if (token->type == skipto
43 || token->precedence > precedence)
44 break;
45 token = get_next_dom_scanner_token(scanner);
48 return (token && token->type == skipto)
49 ? get_next_dom_scanner_token(scanner) : NULL;
52 #ifdef DEBUG_SCANNER
53 void
54 dump_dom_scanner(struct dom_scanner *scanner)
56 unsigned char buffer[MAX_STR_LEN];
57 struct dom_scanner_token *token = scanner->current;
58 struct dom_scanner_token *table_end = scanner->table + scanner->tokens;
59 unsigned char *srcpos = token->string, *bufpos = buffer;
60 int src_lookahead = 50;
61 int token_lookahead = 4;
62 int srclen;
64 if (!dom_scanner_has_tokens(scanner)) return;
66 memset(buffer, 0, MAX_STR_LEN);
67 for (; token_lookahead > 0 && token < table_end; token++, token_lookahead--) {
68 int buflen = MAX_STR_LEN - (bufpos - buffer);
69 int added = snprintf(bufpos, buflen, "[%.*s] ", token->length, token->string);
71 bufpos += added;
74 if (scanner->tokens > token_lookahead) {
75 memcpy(bufpos, "... ", 4);
76 bufpos += 4;
79 srclen = strlen(srcpos);
80 int_upper_bound(&src_lookahead, srclen);
81 *bufpos++ = '[';
83 /* Compress the lookahead string */
84 for (; src_lookahead > 0; src_lookahead--, srcpos++, bufpos++) {
85 if (*srcpos == '\n' || *srcpos == '\r' || *srcpos == '\t') {
86 *bufpos++ = '\\';
87 *bufpos = *srcpos == '\n' ? 'n'
88 : (*srcpos == '\r' ? 'r' : 't');
89 } else {
90 *bufpos = *srcpos;
94 if (srclen > src_lookahead)
95 memcpy(bufpos, "...]", 4);
96 else
97 memcpy(bufpos, "]", 2);
99 errfile = scanner->file, errline = scanner->line;
100 elinks_wdebug("%s", buffer);
103 struct dom_scanner_token *
104 get_dom_scanner_token_debug(struct dom_scanner *scanner)
106 if (!dom_scanner_has_tokens(scanner)) return NULL;
108 dump_dom_scanner(scanner);
110 /* Make sure we do not return invalid tokens */
111 assert(!dom_scanner_has_tokens(scanner)
112 || scanner->current->type != 0);
114 return get_dom_scanner_token(scanner);
116 #endif
119 /* Initializers */
121 static inline void
122 init_dom_scanner_info(struct dom_scanner_info *scanner_info)
124 const struct dom_scan_table_info *info = scanner_info->scan_table_info;
125 int *scan_table = scanner_info->scan_table;
126 int i;
128 if (!info) return;
130 for (i = 0; info[i].type != DOM_SCAN_END; i++) {
131 const struct dom_string *data = &info[i].data;
133 if (info[i].type == DOM_SCAN_RANGE) {
134 int index = *data->string;
136 assert(index > 0);
137 assert(data->length < DOM_SCAN_TABLE_SIZE);
138 assert(index <= data->length);
140 for (; index <= data->length; index++)
141 scan_table[index] |= info[i].bits;
143 } else {
144 unsigned char *string = info[i].data.string;
145 int pos = info[i].data.length - 1;
147 assert(info[i].type == DOM_SCAN_STRING && pos >= 0);
149 for (; pos >= 0; pos--)
150 scan_table[string[pos]] |= info[i].bits;
155 void
156 init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
157 struct dom_string *string, int state, int count_lines, int complete,
158 int check_complete, int detect_errors)
160 if (!scanner_info->initialized) {
161 init_dom_scanner_info(scanner_info);
162 scanner_info->initialized = 1;
165 memset(scanner, 0, sizeof(*scanner));
167 scanner->string = string->string;
168 scanner->position = string->string;
169 scanner->end = string->string + string->length;
170 scanner->current = scanner->table;
171 scanner->info = scanner_info;
172 scanner->state = state;
173 scanner->count_lines = !!count_lines;
174 scanner->incomplete = !complete;
175 scanner->check_complete = !!check_complete;
176 scanner->detect_errors = !!detect_errors;
177 scanner->lineno = scanner->count_lines;
178 scanner->info->scan(scanner);