Consider using "=" and IS operators with even low-quality indexes in cases where...
[sqlite.git] / tool / mkkeywordhash.c
blob5386a36c4077b5f164c08899e320133ada8ce7b5
1 /*
2 ** Compile and run this standalone program in order to generate code that
3 ** implements a function that will translate alphabetic identifiers into
4 ** parser token codes.
5 */
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <assert.h>
12 ** A header comment placed at the beginning of generated code.
14 static const char zHdr[] =
15 "/***** This file contains automatically generated code ******\n"
16 "**\n"
17 "** The code in this file has been automatically generated by\n"
18 "**\n"
19 "** sqlite/tool/mkkeywordhash.c\n"
20 "**\n"
21 "** The code in this file implements a function that determines whether\n"
22 "** or not a given identifier is really an SQL keyword. The same thing\n"
23 "** might be implemented more directly using a hand-written hash table.\n"
24 "** But by using this automatically generated code, the size of the code\n"
25 "** is substantially reduced. This is important for embedded applications\n"
26 "** on platforms with limited memory.\n"
27 "*/\n"
31 ** All the keywords of the SQL language are stored in a hash
32 ** table composed of instances of the following structure.
34 typedef struct Keyword Keyword;
35 struct Keyword {
36 char *zName; /* The keyword name */
37 char *zTokenType; /* Token value for this keyword */
38 int mask; /* Code this keyword if non-zero */
39 int priority; /* Put higher priorities earlier in the hash chain */
40 int id; /* Unique ID for this record */
41 int hash; /* Hash on the keyword */
42 int offset; /* Offset to start of name string */
43 int len; /* Length of this keyword, not counting final \000 */
44 int prefix; /* Number of characters in prefix */
45 int longestSuffix; /* Longest suffix that is a prefix on another word */
46 int iNext; /* Index in aKeywordTable[] of next with same hash */
47 int substrId; /* Id to another keyword this keyword is embedded in */
48 int substrOffset; /* Offset into substrId for start of this keyword */
49 char zOrigName[20]; /* Original keyword name before processing */
53 ** Define masks used to determine which keywords are allowed
55 #if defined(SQLITE_OMIT_ALTERTABLE) || defined(SQLITE_OMIT_VIRTUALTABLE)
56 # define ALTER 0
57 #else
58 # define ALTER 0x00000001
59 #endif
60 #define ALWAYS 0x00000002
61 #ifdef SQLITE_OMIT_ANALYZE
62 # define ANALYZE 0
63 #else
64 # define ANALYZE 0x00000004
65 #endif
66 #ifdef SQLITE_OMIT_ATTACH
67 # define ATTACH 0
68 #else
69 # define ATTACH 0x00000008
70 #endif
71 #ifdef SQLITE_OMIT_AUTOINCREMENT
72 # define AUTOINCR 0
73 #else
74 # define AUTOINCR 0x00000010
75 #endif
76 #ifdef SQLITE_OMIT_CAST
77 # define CAST 0
78 #else
79 # define CAST 0x00000020
80 #endif
81 #ifdef SQLITE_OMIT_COMPOUND_SELECT
82 # define COMPOUND 0
83 #else
84 # define COMPOUND 0x00000040
85 #endif
86 #ifdef SQLITE_OMIT_CONFLICT_CLAUSE
87 # define CONFLICT 0
88 #else
89 # define CONFLICT 0x00000080
90 #endif
91 #ifdef SQLITE_OMIT_EXPLAIN
92 # define EXPLAIN 0
93 #else
94 # define EXPLAIN 0x00000100
95 #endif
96 #ifdef SQLITE_OMIT_FOREIGN_KEY
97 # define FKEY 0
98 #else
99 # define FKEY 0x00000200
100 #endif
101 #ifdef SQLITE_OMIT_PRAGMA
102 # define PRAGMA 0
103 #else
104 # define PRAGMA 0x00000400
105 #endif
106 #ifdef SQLITE_OMIT_REINDEX
107 # define REINDEX 0
108 #else
109 # define REINDEX 0x00000800
110 #endif
111 #ifdef SQLITE_OMIT_SUBQUERY
112 # define SUBQUERY 0
113 #else
114 # define SUBQUERY 0x00001000
115 #endif
116 #ifdef SQLITE_OMIT_TRIGGER
117 # define TRIGGER 0
118 #else
119 # define TRIGGER 0x00002000
120 #endif
121 #if defined(SQLITE_OMIT_AUTOVACUUM) && \
122 (defined(SQLITE_OMIT_VACUUM) || defined(SQLITE_OMIT_ATTACH))
123 # define VACUUM 0
124 #else
125 # define VACUUM 0x00004000
126 #endif
127 #ifdef SQLITE_OMIT_VIEW
128 # define VIEW 0
129 #else
130 # define VIEW 0x00008000
131 #endif
132 #ifdef SQLITE_OMIT_VIRTUALTABLE
133 # define VTAB 0
134 #else
135 # define VTAB 0x00010000
136 #endif
137 #ifdef SQLITE_OMIT_AUTOVACUUM
138 # define AUTOVACUUM 0
139 #else
140 # define AUTOVACUUM 0x00020000
141 #endif
142 #ifdef SQLITE_OMIT_CTE
143 # define CTE 0
144 #else
145 # define CTE 0x00040000
146 #endif
147 #ifdef SQLITE_OMIT_UPSERT
148 # define UPSERT 0
149 #else
150 # define UPSERT 0x00080000
151 #endif
152 #ifdef SQLITE_OMIT_WINDOWFUNC
153 # define WINDOWFUNC 0
154 #else
155 # define WINDOWFUNC 0x00100000
156 #endif
157 #ifdef SQLITE_OMIT_GENERATED_COLUMNS
158 # define GENCOL 0
159 #else
160 # define GENCOL 0x00200000
161 #endif
162 #ifdef SQLITE_OMIT_RETURNING
163 # define RETURNING 0
164 #else
165 # define RETURNING 0x00400000
166 #endif
170 ** These are the keywords
172 static Keyword aKeywordTable[] = {
173 { "ABORT", "TK_ABORT", CONFLICT|TRIGGER, 0 },
174 { "ACTION", "TK_ACTION", FKEY, 0 },
175 { "ADD", "TK_ADD", ALTER, 1 },
176 { "AFTER", "TK_AFTER", TRIGGER, 0 },
177 { "ALL", "TK_ALL", ALWAYS, 0 },
178 { "ALTER", "TK_ALTER", ALTER, 0 },
179 { "ALWAYS", "TK_ALWAYS", GENCOL, 0 },
180 { "ANALYZE", "TK_ANALYZE", ANALYZE, 0 },
181 { "AND", "TK_AND", ALWAYS, 10 },
182 { "AS", "TK_AS", ALWAYS, 10 },
183 { "ASC", "TK_ASC", ALWAYS, 0 },
184 { "ATTACH", "TK_ATTACH", ATTACH, 1 },
185 { "AUTOINCREMENT", "TK_AUTOINCR", AUTOINCR, 0 },
186 { "BEFORE", "TK_BEFORE", TRIGGER, 0 },
187 { "BEGIN", "TK_BEGIN", ALWAYS, 1 },
188 { "BETWEEN", "TK_BETWEEN", ALWAYS, 5 },
189 { "BY", "TK_BY", ALWAYS, 10 },
190 { "CASCADE", "TK_CASCADE", FKEY, 1 },
191 { "CASE", "TK_CASE", ALWAYS, 5 },
192 { "CAST", "TK_CAST", CAST, 5 },
193 { "CHECK", "TK_CHECK", ALWAYS, 1 },
194 { "COLLATE", "TK_COLLATE", ALWAYS, 1 },
195 { "COLUMN", "TK_COLUMNKW", ALTER, 1 },
196 { "COMMIT", "TK_COMMIT", ALWAYS, 1 },
197 { "CONFLICT", "TK_CONFLICT", CONFLICT, 0 },
198 { "CONSTRAINT", "TK_CONSTRAINT", ALWAYS, 1 },
199 { "CREATE", "TK_CREATE", ALWAYS, 2 },
200 { "CROSS", "TK_JOIN_KW", ALWAYS, 3 },
201 { "CURRENT", "TK_CURRENT", WINDOWFUNC, 1 },
202 { "CURRENT_DATE", "TK_CTIME_KW", ALWAYS, 1 },
203 { "CURRENT_TIME", "TK_CTIME_KW", ALWAYS, 1 },
204 { "CURRENT_TIMESTAMP","TK_CTIME_KW", ALWAYS, 1 },
205 { "DATABASE", "TK_DATABASE", ATTACH, 0 },
206 { "DEFAULT", "TK_DEFAULT", ALWAYS, 1 },
207 { "DEFERRED", "TK_DEFERRED", ALWAYS, 1 },
208 { "DEFERRABLE", "TK_DEFERRABLE", FKEY, 1 },
209 { "DELETE", "TK_DELETE", ALWAYS, 10 },
210 { "DESC", "TK_DESC", ALWAYS, 3 },
211 { "DETACH", "TK_DETACH", ATTACH, 0 },
212 { "DISTINCT", "TK_DISTINCT", ALWAYS, 5 },
213 { "DO", "TK_DO", UPSERT, 2 },
214 { "DROP", "TK_DROP", ALWAYS, 1 },
215 { "END", "TK_END", ALWAYS, 1 },
216 { "EACH", "TK_EACH", TRIGGER, 1 },
217 { "ELSE", "TK_ELSE", ALWAYS, 2 },
218 { "ESCAPE", "TK_ESCAPE", ALWAYS, 4 },
219 { "EXCEPT", "TK_EXCEPT", COMPOUND, 4 },
220 { "EXCLUSIVE", "TK_EXCLUSIVE", ALWAYS, 1 },
221 { "EXCLUDE", "TK_EXCLUDE", WINDOWFUNC, 1 },
222 { "EXISTS", "TK_EXISTS", ALWAYS, 4 },
223 { "EXPLAIN", "TK_EXPLAIN", EXPLAIN, 1 },
224 { "FAIL", "TK_FAIL", CONFLICT|TRIGGER, 1 },
225 { "FILTER", "TK_FILTER", WINDOWFUNC, 4 },
226 { "FIRST", "TK_FIRST", ALWAYS, 4 },
227 { "FOLLOWING", "TK_FOLLOWING", WINDOWFUNC, 4 },
228 { "FOR", "TK_FOR", TRIGGER, 2 },
229 { "FOREIGN", "TK_FOREIGN", FKEY, 1 },
230 { "FROM", "TK_FROM", ALWAYS, 10 },
231 { "FULL", "TK_JOIN_KW", ALWAYS, 3 },
232 { "GENERATED", "TK_GENERATED", ALWAYS, 1 },
233 { "GLOB", "TK_LIKE_KW", ALWAYS, 3 },
234 { "GROUP", "TK_GROUP", ALWAYS, 5 },
235 { "GROUPS", "TK_GROUPS", WINDOWFUNC, 2 },
236 { "HAVING", "TK_HAVING", ALWAYS, 5 },
237 { "IF", "TK_IF", ALWAYS, 2 },
238 { "IGNORE", "TK_IGNORE", CONFLICT|TRIGGER, 1 },
239 { "IMMEDIATE", "TK_IMMEDIATE", ALWAYS, 1 },
240 { "IN", "TK_IN", ALWAYS, 10 },
241 { "INDEX", "TK_INDEX", ALWAYS, 1 },
242 { "INDEXED", "TK_INDEXED", ALWAYS, 0 },
243 { "INITIALLY", "TK_INITIALLY", FKEY, 1 },
244 { "INNER", "TK_JOIN_KW", ALWAYS, 1 },
245 { "INSERT", "TK_INSERT", ALWAYS, 10 },
246 { "INSTEAD", "TK_INSTEAD", TRIGGER, 1 },
247 { "INTERSECT", "TK_INTERSECT", COMPOUND, 5 },
248 { "INTO", "TK_INTO", ALWAYS, 10 },
249 { "IS", "TK_IS", ALWAYS, 5 },
250 { "ISNULL", "TK_ISNULL", ALWAYS, 5 },
251 { "JOIN", "TK_JOIN", ALWAYS, 5 },
252 { "KEY", "TK_KEY", ALWAYS, 1 },
253 { "LAST", "TK_LAST", ALWAYS, 4 },
254 { "LEFT", "TK_JOIN_KW", ALWAYS, 5 },
255 { "LIKE", "TK_LIKE_KW", ALWAYS, 5 },
256 { "LIMIT", "TK_LIMIT", ALWAYS, 3 },
257 { "MATCH", "TK_MATCH", ALWAYS, 2 },
258 { "MATERIALIZED", "TK_MATERIALIZED", CTE, 12 },
259 { "NATURAL", "TK_JOIN_KW", ALWAYS, 3 },
260 { "NO", "TK_NO", FKEY|WINDOWFUNC, 2 },
261 { "NOT", "TK_NOT", ALWAYS, 10 },
262 { "NOTHING", "TK_NOTHING", UPSERT, 1 },
263 { "NOTNULL", "TK_NOTNULL", ALWAYS, 3 },
264 { "NULL", "TK_NULL", ALWAYS, 10 },
265 { "NULLS", "TK_NULLS", ALWAYS, 3 },
266 { "OF", "TK_OF", ALWAYS, 3 },
267 { "OFFSET", "TK_OFFSET", ALWAYS, 1 },
268 { "ON", "TK_ON", ALWAYS, 1 },
269 { "OR", "TK_OR", ALWAYS, 9 },
270 { "ORDER", "TK_ORDER", ALWAYS, 10 },
271 { "OTHERS", "TK_OTHERS", WINDOWFUNC, 3 },
272 { "OUTER", "TK_JOIN_KW", ALWAYS, 5 },
273 { "OVER", "TK_OVER", WINDOWFUNC, 3 },
274 { "PARTITION", "TK_PARTITION", WINDOWFUNC, 3 },
275 { "PLAN", "TK_PLAN", EXPLAIN, 0 },
276 { "PRAGMA", "TK_PRAGMA", PRAGMA, 0 },
277 { "PRECEDING", "TK_PRECEDING", WINDOWFUNC, 3 },
278 { "PRIMARY", "TK_PRIMARY", ALWAYS, 1 },
279 { "QUERY", "TK_QUERY", EXPLAIN, 0 },
280 { "RAISE", "TK_RAISE", TRIGGER, 1 },
281 { "RANGE", "TK_RANGE", WINDOWFUNC, 3 },
282 { "RECURSIVE", "TK_RECURSIVE", CTE, 3 },
283 { "REFERENCES", "TK_REFERENCES", FKEY, 1 },
284 { "REGEXP", "TK_LIKE_KW", ALWAYS, 3 },
285 { "REINDEX", "TK_REINDEX", REINDEX, 1 },
286 { "RELEASE", "TK_RELEASE", ALWAYS, 1 },
287 { "RENAME", "TK_RENAME", ALTER, 1 },
288 { "REPLACE", "TK_REPLACE", CONFLICT, 10 },
289 { "RESTRICT", "TK_RESTRICT", FKEY, 1 },
290 { "RETURNING", "TK_RETURNING", RETURNING, 10 },
291 { "RIGHT", "TK_JOIN_KW", ALWAYS, 0 },
292 { "ROLLBACK", "TK_ROLLBACK", ALWAYS, 1 },
293 { "ROW", "TK_ROW", TRIGGER, 1 },
294 { "ROWS", "TK_ROWS", ALWAYS, 1 },
295 { "SAVEPOINT", "TK_SAVEPOINT", ALWAYS, 1 },
296 { "SELECT", "TK_SELECT", ALWAYS, 10 },
297 { "SET", "TK_SET", ALWAYS, 10 },
298 { "TABLE", "TK_TABLE", ALWAYS, 1 },
299 { "TEMP", "TK_TEMP", ALWAYS, 1 },
300 { "TEMPORARY", "TK_TEMP", ALWAYS, 1 },
301 { "THEN", "TK_THEN", ALWAYS, 3 },
302 { "TIES", "TK_TIES", WINDOWFUNC, 3 },
303 { "TO", "TK_TO", ALWAYS, 3 },
304 { "TRANSACTION", "TK_TRANSACTION", ALWAYS, 1 },
305 { "TRIGGER", "TK_TRIGGER", TRIGGER, 1 },
306 { "UNBOUNDED", "TK_UNBOUNDED", WINDOWFUNC, 3 },
307 { "UNION", "TK_UNION", COMPOUND, 3 },
308 { "UNIQUE", "TK_UNIQUE", ALWAYS, 1 },
309 { "UPDATE", "TK_UPDATE", ALWAYS, 10 },
310 { "USING", "TK_USING", ALWAYS, 8 },
311 { "VACUUM", "TK_VACUUM", VACUUM, 1 },
312 { "VALUES", "TK_VALUES", ALWAYS, 10 },
313 { "VIEW", "TK_VIEW", VIEW, 1 },
314 { "VIRTUAL", "TK_VIRTUAL", VTAB, 1 },
315 { "WHEN", "TK_WHEN", ALWAYS, 1 },
316 { "WHERE", "TK_WHERE", ALWAYS, 10 },
317 { "WINDOW", "TK_WINDOW", WINDOWFUNC, 3 },
318 { "WITH", "TK_WITH", CTE, 4 },
319 { "WITHOUT", "TK_WITHOUT", ALWAYS, 1 },
322 /* Number of keywords */
323 static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]));
325 /* Map all alphabetic characters into lower-case for hashing. This is
326 ** only valid for alphabetics. In particular it does not work for '_'
327 ** and so the hash cannot be on a keyword position that might be an '_'.
329 #define charMap(X) (0x20|(X))
332 ** Comparision function for two Keyword records
334 static int keywordCompare1(const void *a, const void *b){
335 const Keyword *pA = (Keyword*)a;
336 const Keyword *pB = (Keyword*)b;
337 int n = pA->len - pB->len;
338 if( n==0 ){
339 n = strcmp(pA->zName, pB->zName);
341 assert( n!=0 );
342 return n;
344 static int keywordCompare2(const void *a, const void *b){
345 const Keyword *pA = (Keyword*)a;
346 const Keyword *pB = (Keyword*)b;
347 int n = pB->longestSuffix - pA->longestSuffix;
348 if( n==0 ){
349 n = strcmp(pA->zName, pB->zName);
351 assert( n!=0 );
352 return n;
354 static int keywordCompare3(const void *a, const void *b){
355 const Keyword *pA = (Keyword*)a;
356 const Keyword *pB = (Keyword*)b;
357 int n = pA->offset - pB->offset;
358 if( n==0 ) n = pB->id - pA->id;
359 assert( n!=0 );
360 return n;
364 ** Return a KeywordTable entry with the given id
366 static Keyword *findById(int id){
367 int i;
368 for(i=0; i<nKeyword; i++){
369 if( aKeywordTable[i].id==id ) break;
371 return &aKeywordTable[i];
375 ** If aKeyword[*pFrom-1].iNext has a higher priority that aKeyword[*pFrom-1]
376 ** itself, then swap them.
378 static void reorder(int *pFrom){
379 int i = *pFrom - 1;
380 int j;
381 if( i<0 ) return;
382 j = aKeywordTable[i].iNext;
383 if( j==0 ) return;
384 j--;
385 if( aKeywordTable[i].priority >= aKeywordTable[j].priority ) return;
386 aKeywordTable[i].iNext = aKeywordTable[j].iNext;
387 aKeywordTable[j].iNext = i+1;
388 *pFrom = j+1;
389 reorder(&aKeywordTable[i].iNext);
392 /* Parameter to the hash function
394 #define HASH_OP ^
395 #define HASH_CC '^'
396 #define HASH_C0 4
397 #define HASH_C1 3
398 #define HASH_C2 1
401 ** This routine does the work. The generated code is printed on standard
402 ** output.
404 int main(int argc, char **argv){
405 int i, j, k, h;
406 int bestSize, bestCount;
407 int count;
408 int nChar;
409 int totalLen = 0;
410 int aKWHash[1000]; /* 1000 is much bigger than nKeyword */
411 char zKWText[2000];
413 /* Remove entries from the list of keywords that have mask==0 */
414 for(i=j=0; i<nKeyword; i++){
415 if( aKeywordTable[i].mask==0 ) continue;
416 if( j<i ){
417 aKeywordTable[j] = aKeywordTable[i];
419 j++;
421 nKeyword = j;
423 /* Fill in the lengths of strings and hashes for all entries. */
424 for(i=0; i<nKeyword; i++){
425 Keyword *p = &aKeywordTable[i];
426 p->len = (int)strlen(p->zName);
427 assert( p->len<sizeof(p->zOrigName) );
428 memcpy(p->zOrigName, p->zName, p->len+1);
429 totalLen += p->len;
430 p->hash = (charMap(p->zName[0])*HASH_C0) HASH_OP
431 (charMap(p->zName[p->len-1])*HASH_C1) HASH_OP
432 (p->len*HASH_C2);
433 p->id = i+1;
436 /* Sort the table from shortest to longest keyword */
437 qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1);
439 /* Look for short keywords embedded in longer keywords */
440 for(i=nKeyword-2; i>=0; i--){
441 Keyword *p = &aKeywordTable[i];
442 for(j=nKeyword-1; j>i && p->substrId==0; j--){
443 Keyword *pOther = &aKeywordTable[j];
444 if( pOther->substrId ) continue;
445 if( pOther->len<=p->len ) continue;
446 for(k=0; k<=pOther->len-p->len; k++){
447 if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){
448 p->substrId = pOther->id;
449 p->substrOffset = k;
450 break;
456 /* Compute the longestSuffix value for every word */
457 for(i=0; i<nKeyword; i++){
458 Keyword *p = &aKeywordTable[i];
459 if( p->substrId ) continue;
460 for(j=0; j<nKeyword; j++){
461 Keyword *pOther;
462 if( j==i ) continue;
463 pOther = &aKeywordTable[j];
464 if( pOther->substrId ) continue;
465 for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){
466 if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
467 p->longestSuffix = k;
473 /* Sort the table into reverse order by length */
474 qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2);
476 /* Fill in the offset for all entries */
477 nChar = 0;
478 for(i=0; i<nKeyword; i++){
479 Keyword *p = &aKeywordTable[i];
480 if( p->offset>0 || p->substrId ) continue;
481 p->offset = nChar;
482 nChar += p->len;
483 for(k=p->len-1; k>=1; k--){
484 for(j=i+1; j<nKeyword; j++){
485 Keyword *pOther = &aKeywordTable[j];
486 if( pOther->offset>0 || pOther->substrId ) continue;
487 if( pOther->len<=k ) continue;
488 if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
489 p = pOther;
490 p->offset = nChar - k;
491 nChar = p->offset + p->len;
492 p->zName += k;
493 p->len -= k;
494 p->prefix = k;
495 j = i;
496 k = p->len;
501 for(i=0; i<nKeyword; i++){
502 Keyword *p = &aKeywordTable[i];
503 if( p->substrId ){
504 p->offset = findById(p->substrId)->offset + p->substrOffset;
508 /* Sort the table by offset */
509 qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3);
511 /* Figure out how big to make the hash table in order to minimize the
512 ** number of collisions */
513 bestSize = nKeyword;
514 bestCount = nKeyword*nKeyword;
515 for(i=nKeyword/2; i<=2*nKeyword; i++){
516 if( i<=0 ) continue;
517 for(j=0; j<i; j++) aKWHash[j] = 0;
518 for(j=0; j<nKeyword; j++){
519 h = aKeywordTable[j].hash % i;
520 aKWHash[h] *= 2;
521 aKWHash[h]++;
523 for(j=count=0; j<i; j++) count += aKWHash[j];
524 if( count<bestCount ){
525 bestCount = count;
526 bestSize = i;
530 /* Compute the hash */
531 for(i=0; i<bestSize; i++) aKWHash[i] = 0;
532 for(i=0; i<nKeyword; i++){
533 h = aKeywordTable[i].hash % bestSize;
534 aKeywordTable[i].iNext = aKWHash[h];
535 aKWHash[h] = i+1;
536 reorder(&aKWHash[h]);
539 /* Begin generating code */
540 printf("%s", zHdr);
541 printf("/* Hash score: %d */\n", bestCount);
542 printf("/* zKWText[] encodes %d bytes of keyword text in %d bytes */\n",
543 totalLen + nKeyword, nChar+1 );
544 for(i=j=k=0; i<nKeyword; i++){
545 Keyword *p = &aKeywordTable[i];
546 if( p->substrId ) continue;
547 memcpy(&zKWText[k], p->zName, p->len);
548 k += p->len;
549 if( j+p->len>70 ){
550 printf("%*s */\n", 74-j, "");
551 j = 0;
553 if( j==0 ){
554 printf("/* ");
555 j = 8;
557 printf("%s", p->zName);
558 j += p->len;
560 if( j>0 ){
561 printf("%*s */\n", 74-j, "");
563 printf("static const char zKWText[%d] = {\n", nChar);
564 zKWText[nChar] = 0;
565 for(i=j=0; i<k; i++){
566 if( j==0 ){
567 printf(" ");
569 if( zKWText[i]==0 ){
570 printf("0");
571 }else{
572 printf("'%c',", zKWText[i]);
574 j += 4;
575 if( j>68 ){
576 printf("\n");
577 j = 0;
580 if( j>0 ) printf("\n");
581 printf("};\n");
583 printf("/* aKWHash[i] is the hash value for the i-th keyword */\n");
584 printf("static const unsigned char aKWHash[%d] = {\n", bestSize);
585 for(i=j=0; i<bestSize; i++){
586 if( j==0 ) printf(" ");
587 printf(" %3d,", aKWHash[i]);
588 j++;
589 if( j>12 ){
590 printf("\n");
591 j = 0;
594 printf("%s};\n", j==0 ? "" : "\n");
596 printf("/* aKWNext[] forms the hash collision chain. If aKWHash[i]==0\n");
597 printf("** then the i-th keyword has no more hash collisions. Otherwise,\n");
598 printf("** the next keyword with the same hash is aKWHash[i]-1. */\n");
599 printf("static const unsigned char aKWNext[%d] = {0,\n", nKeyword+1);
600 for(i=j=0; i<nKeyword; i++){
601 if( j==0 ) printf(" ");
602 printf(" %3d,", aKeywordTable[i].iNext);
603 j++;
604 if( j>12 ){
605 printf("\n");
606 j = 0;
609 printf("%s};\n", j==0 ? "" : "\n");
611 printf("/* aKWLen[i] is the length (in bytes) of the i-th keyword */\n");
612 printf("static const unsigned char aKWLen[%d] = {0,\n", nKeyword+1);
613 for(i=j=0; i<nKeyword; i++){
614 if( j==0 ) printf(" ");
615 printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
616 j++;
617 if( j>12 ){
618 printf("\n");
619 j = 0;
622 printf("%s};\n", j==0 ? "" : "\n");
624 printf("/* aKWOffset[i] is the index into zKWText[] of the start of\n");
625 printf("** the text for the i-th keyword. */\n");
626 printf("static const unsigned short int aKWOffset[%d] = {0,\n", nKeyword+1);
627 for(i=j=0; i<nKeyword; i++){
628 if( j==0 ) printf(" ");
629 printf(" %3d,", aKeywordTable[i].offset);
630 j++;
631 if( j>12 ){
632 printf("\n");
633 j = 0;
636 printf("%s};\n", j==0 ? "" : "\n");
638 printf("/* aKWCode[i] is the parser symbol code for the i-th keyword */\n");
639 printf("static const unsigned char aKWCode[%d] = {0,\n", nKeyword+1);
640 for(i=j=0; i<nKeyword; i++){
641 char *zToken = aKeywordTable[i].zTokenType;
642 if( j==0 ) printf(" ");
643 printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");
644 j++;
645 if( j>=5 ){
646 printf("\n");
647 j = 0;
650 printf("%s};\n", j==0 ? "" : "\n");
651 printf("/* Hash table decoded:\n");
652 for(i=0; i<bestSize; i++){
653 j = aKWHash[i];
654 printf("** %3d:", i);
655 while( j ){
656 printf(" %s", aKeywordTable[j-1].zOrigName);
657 j = aKeywordTable[j-1].iNext;
659 printf("\n");
661 printf("*/\n");
662 printf("/* Check to see if z[0..n-1] is a keyword. If it is, write the\n");
663 printf("** parser symbol code for that keyword into *pType. Always\n");
664 printf("** return the integer n (the length of the token). */\n");
665 printf("static int keywordCode(const char *z, int n, int *pType){\n");
666 printf(" int i, j;\n");
667 printf(" const char *zKW;\n");
668 printf(" assert( n>=2 );\n");
669 printf(" i = ((charMap(z[0])*%d) %c", HASH_C0, HASH_CC);
670 printf(" (charMap(z[n-1])*%d) %c", HASH_C1, HASH_CC);
671 printf(" n*%d) %% %d;\n", HASH_C2, bestSize);
672 printf(" for(i=(int)aKWHash[i]; i>0; i=aKWNext[i]){\n");
673 printf(" if( aKWLen[i]!=n ) continue;\n");
674 printf(" zKW = &zKWText[aKWOffset[i]];\n");
675 printf("#ifdef SQLITE_ASCII\n");
676 printf(" if( (z[0]&~0x20)!=zKW[0] ) continue;\n");
677 printf(" if( (z[1]&~0x20)!=zKW[1] ) continue;\n");
678 printf(" j = 2;\n");
679 printf(" while( j<n && (z[j]&~0x20)==zKW[j] ){ j++; }\n");
680 printf("#endif\n");
681 printf("#ifdef SQLITE_EBCDIC\n");
682 printf(" if( toupper(z[0])!=zKW[0] ) continue;\n");
683 printf(" if( toupper(z[1])!=zKW[1] ) continue;\n");
684 printf(" j = 2;\n");
685 printf(" while( j<n && toupper(z[j])==zKW[j] ){ j++; }\n");
686 printf("#endif\n");
687 printf(" if( j<n ) continue;\n");
688 for(i=0; i<nKeyword; i++){
689 printf(" testcase( i==%d ); /* %s */\n",
690 i+1, aKeywordTable[i].zOrigName);
692 printf(" *pType = aKWCode[i];\n");
693 printf(" break;\n");
694 printf(" }\n");
695 printf(" return n;\n");
696 printf("}\n");
697 printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n");
698 printf(" int id = TK_ID;\n");
699 printf(" if( n>=2 ) keywordCode((char*)z, n, &id);\n");
700 printf(" return id;\n");
701 printf("}\n");
702 printf("#define SQLITE_N_KEYWORD %d\n", nKeyword);
703 printf("int sqlite3_keyword_name(int i,const char **pzName,int *pnName){\n");
704 printf(" if( i<0 || i>=SQLITE_N_KEYWORD ) return SQLITE_ERROR;\n");
705 printf(" i++;\n");
706 printf(" *pzName = zKWText + aKWOffset[i];\n");
707 printf(" *pnName = aKWLen[i];\n");
708 printf(" return SQLITE_OK;\n");
709 printf("}\n");
710 printf("int sqlite3_keyword_count(void){ return SQLITE_N_KEYWORD; }\n");
711 printf("int sqlite3_keyword_check(const char *zName, int nName){\n");
712 printf(" return TK_ID!=sqlite3KeywordCode((const u8*)zName, nName);\n");
713 printf("}\n");
715 return 0;