add_html_to_string: Also restore the '\0' terminator.
[elinks/kon.git] / src / util / scanner.c
blob3f7da6c083d4dab3c133f8696acad3526d19cd32
1 /* A pretty generic scanner */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <stdio.h>
8 #include <string.h>
10 #include "elinks.h"
12 #include "util/error.h"
13 #include "util/scanner.h"
14 #include "util/string.h"
17 int
18 map_scanner_string(struct scanner *scanner,
19 unsigned char *ident, unsigned char *end, int base_type)
21 const struct scanner_string_mapping *mappings = scanner->info->mappings;
22 int length = end - ident;
24 for (; mappings->name; mappings++) {
25 if (mappings->base_type == base_type
26 && !strlcasecmp(mappings->name, -1, ident, length))
27 return mappings->type;
30 return base_type;
34 struct scanner_token *
35 skip_scanner_tokens(struct scanner *scanner, int skipto, int precedence)
37 struct scanner_token *token = get_scanner_token(scanner);
39 /* Skip tokens while handling some basic precedens of special chars
40 * so we don't skip to long. */
41 while (token) {
42 if (token->type == skipto
43 || token->precedence > precedence)
44 break;
45 token = get_next_scanner_token(scanner);
48 return (token && token->type == skipto)
49 ? get_next_scanner_token(scanner) : NULL;
52 #ifdef DEBUG_SCANNER
53 void
54 dump_scanner(struct scanner *scanner)
56 unsigned char buffer[MAX_STR_LEN];
57 struct scanner_token *token = scanner->current;
58 struct scanner_token *table_end = scanner->table + scanner->tokens;
59 unsigned char *srcpos = token->string, *bufpos = buffer;
60 int src_lookahead = 50;
61 int token_lookahead = 4;
62 int srclen;
64 if (!scanner_has_tokens(scanner)) return;
66 memset(buffer, 0, MAX_STR_LEN);
67 for (; token_lookahead > 0 && token < table_end; token++, token_lookahead--) {
68 int buflen = MAX_STR_LEN - (bufpos - buffer);
69 int added = snprintf(bufpos, buflen, "[%.*s] ", token->length, token->string);
71 bufpos += added;
74 if (scanner->tokens > token_lookahead) {
75 memcpy(bufpos, "... ", 4);
76 bufpos += 4;
79 srclen = strlen(srcpos);
80 int_upper_bound(&src_lookahead, srclen);
81 *bufpos++ = '[';
83 /* Compress the lookahead string */
84 for (; src_lookahead > 0; src_lookahead--, srcpos++, bufpos++) {
85 if (*srcpos == '\n' || *srcpos == '\r' || *srcpos == '\t') {
86 *bufpos++ = '\\';
87 *bufpos = *srcpos == '\n' ? 'n'
88 : (*srcpos == '\r' ? 'r' : 't');
89 } else {
90 *bufpos = *srcpos;
94 if (srclen > src_lookahead)
95 memcpy(bufpos, "...]", 4);
96 else
97 memcpy(bufpos, "]", 2);
99 errfile = scanner->file, errline = scanner->line;
100 elinks_wdebug("%s", buffer);
103 struct scanner_token *
104 get_scanner_token_debug(struct scanner *scanner)
106 if (!scanner_has_tokens(scanner)) return NULL;
108 dump_scanner(scanner);
110 /* Make sure we do not return invalid tokens */
111 assert(!scanner_has_tokens(scanner)
112 || scanner->current->type != 0);
114 return get_scanner_token(scanner);
116 #endif
119 /* Initializers */
121 static inline void
122 init_scanner_info(struct scanner_info *scanner_info)
124 const struct scan_table_info *info = scanner_info->scan_table_info;
125 int *scan_table = scanner_info->scan_table;
126 int i;
128 if (!info) return;
130 for (i = 0; info[i].type != SCAN_END; i++) {
131 const union scan_table_data *data = &info[i].data;
133 if (info[i].type == SCAN_RANGE) {
134 int index = *data->range.start;
136 assert(index > 0);
137 assert(data->range.end < SCAN_TABLE_SIZE);
138 assert(index <= data->range.end);
140 for (; index <= data->range.end; index++)
141 scan_table[index] |= info[i].bits;
143 } else {
144 unsigned char *string = info[i].data.string.source;
145 int pos = info[i].data.string.length - 1;
147 assert(info[i].type == SCAN_STRING && pos >= 0);
149 for (; pos >= 0; pos--)
150 scan_table[string[pos]] |= info[i].bits;
155 void
156 init_scanner(struct scanner *scanner, struct scanner_info *scanner_info,
157 unsigned char *string, unsigned char *end)
159 if (!scanner_info->initialized) {
160 init_scanner_info(scanner_info);
161 scanner_info->initialized = 1;
164 memset(scanner, 0, sizeof(*scanner));
166 scanner->string = string;
167 scanner->position = string;
168 scanner->end = end ? end : string + strlen(string);
169 scanner->current = scanner->table;
170 scanner->info = scanner_info;
171 scanner->info->scan(scanner);