2 ** Compile and run this standalone program in order to generate code that
3 ** implements a function that will translate alphabetic identifiers into
12 ** A header comment placed at the beginning of generated code.
14 static const char zHdr
[] =
15 "/***** This file contains automatically generated code ******\n"
17 "** The code in this file has been automatically generated by\n"
19 "** sqlite/tool/mkkeywordhash.c\n"
21 "** The code in this file implements a function that determines whether\n"
22 "** or not a given identifier is really an SQL keyword. The same thing\n"
23 "** might be implemented more directly using a hand-written hash table.\n"
24 "** But by using this automatically generated code, the size of the code\n"
25 "** is substantially reduced. This is important for embedded applications\n"
26 "** on platforms with limited memory.\n"
31 ** All the keywords of the SQL language are stored in a hash
32 ** table composed of instances of the following structure.
34 typedef struct Keyword Keyword
;
36 char *zName
; /* The keyword name */
37 char *zTokenType
; /* Token value for this keyword */
38 int mask
; /* Code this keyword if non-zero */
39 int id
; /* Unique ID for this record */
40 int hash
; /* Hash on the keyword */
41 int offset
; /* Offset to start of name string */
42 int len
; /* Length of this keyword, not counting final \000 */
43 int prefix
; /* Number of characters in prefix */
44 int longestSuffix
; /* Longest suffix that is a prefix on another word */
45 int iNext
; /* Index in aKeywordTable[] of next with same hash */
46 int substrId
; /* Id to another keyword this keyword is embedded in */
47 int substrOffset
; /* Offset into substrId for start of this keyword */
48 char zOrigName
[20]; /* Original keyword name before processing */
52 ** Define masks used to determine which keywords are allowed
54 #ifdef SQLITE_OMIT_ALTERTABLE
57 # define ALTER 0x00000001
59 #define ALWAYS 0x00000002
60 #ifdef SQLITE_OMIT_ANALYZE
63 # define ANALYZE 0x00000004
65 #ifdef SQLITE_OMIT_ATTACH
68 # define ATTACH 0x00000008
70 #ifdef SQLITE_OMIT_AUTOINCREMENT
73 # define AUTOINCR 0x00000010
75 #ifdef SQLITE_OMIT_CAST
78 # define CAST 0x00000020
80 #ifdef SQLITE_OMIT_COMPOUND_SELECT
83 # define COMPOUND 0x00000040
85 #ifdef SQLITE_OMIT_CONFLICT_CLAUSE
88 # define CONFLICT 0x00000080
90 #ifdef SQLITE_OMIT_EXPLAIN
93 # define EXPLAIN 0x00000100
95 #ifdef SQLITE_OMIT_FOREIGN_KEY
98 # define FKEY 0x00000200
100 #ifdef SQLITE_OMIT_PRAGMA
103 # define PRAGMA 0x00000400
105 #ifdef SQLITE_OMIT_REINDEX
108 # define REINDEX 0x00000800
110 #ifdef SQLITE_OMIT_SUBQUERY
113 # define SUBQUERY 0x00001000
115 #ifdef SQLITE_OMIT_TRIGGER
118 # define TRIGGER 0x00002000
120 #if defined(SQLITE_OMIT_AUTOVACUUM) && \
121 (defined(SQLITE_OMIT_VACUUM) || defined(SQLITE_OMIT_ATTACH))
124 # define VACUUM 0x00004000
126 #ifdef SQLITE_OMIT_VIEW
129 # define VIEW 0x00008000
131 #ifdef SQLITE_OMIT_VIRTUALTABLE
134 # define VTAB 0x00010000
136 #ifdef SQLITE_OMIT_AUTOVACUUM
137 # define AUTOVACUUM 0
139 # define AUTOVACUUM 0x00020000
141 #ifdef SQLITE_OMIT_CTE
144 # define CTE 0x00040000
148 ** These are the keywords
150 static Keyword aKeywordTable
[] = {
151 { "ABORT", "TK_ABORT", CONFLICT
|TRIGGER
},
152 { "ACTION", "TK_ACTION", FKEY
},
153 { "ADD", "TK_ADD", ALTER
},
154 { "AFTER", "TK_AFTER", TRIGGER
},
155 { "ALL", "TK_ALL", ALWAYS
},
156 { "ALTER", "TK_ALTER", ALTER
},
157 { "ANALYZE", "TK_ANALYZE", ANALYZE
},
158 { "AND", "TK_AND", ALWAYS
},
159 { "AS", "TK_AS", ALWAYS
},
160 { "ASC", "TK_ASC", ALWAYS
},
161 { "ATTACH", "TK_ATTACH", ATTACH
},
162 { "AUTOINCREMENT", "TK_AUTOINCR", AUTOINCR
},
163 { "BEFORE", "TK_BEFORE", TRIGGER
},
164 { "BEGIN", "TK_BEGIN", ALWAYS
},
165 { "BETWEEN", "TK_BETWEEN", ALWAYS
},
166 { "BY", "TK_BY", ALWAYS
},
167 { "CASCADE", "TK_CASCADE", FKEY
},
168 { "CASE", "TK_CASE", ALWAYS
},
169 { "CAST", "TK_CAST", CAST
},
170 { "CHECK", "TK_CHECK", ALWAYS
},
171 { "COLLATE", "TK_COLLATE", ALWAYS
},
172 { "COLUMN", "TK_COLUMNKW", ALTER
},
173 { "COMMIT", "TK_COMMIT", ALWAYS
},
174 { "CONFLICT", "TK_CONFLICT", CONFLICT
},
175 { "CONSTRAINT", "TK_CONSTRAINT", ALWAYS
},
176 { "CREATE", "TK_CREATE", ALWAYS
},
177 { "CROSS", "TK_JOIN_KW", ALWAYS
},
178 { "CURRENT_DATE", "TK_CTIME_KW", ALWAYS
},
179 { "CURRENT_TIME", "TK_CTIME_KW", ALWAYS
},
180 { "CURRENT_TIMESTAMP","TK_CTIME_KW", ALWAYS
},
181 { "DATABASE", "TK_DATABASE", ATTACH
},
182 { "DEFAULT", "TK_DEFAULT", ALWAYS
},
183 { "DEFERRED", "TK_DEFERRED", ALWAYS
},
184 { "DEFERRABLE", "TK_DEFERRABLE", FKEY
},
185 { "DELETE", "TK_DELETE", ALWAYS
},
186 { "DESC", "TK_DESC", ALWAYS
},
187 { "DETACH", "TK_DETACH", ATTACH
},
188 { "DISTINCT", "TK_DISTINCT", ALWAYS
},
189 { "DROP", "TK_DROP", ALWAYS
},
190 { "END", "TK_END", ALWAYS
},
191 { "EACH", "TK_EACH", TRIGGER
},
192 { "ELSE", "TK_ELSE", ALWAYS
},
193 { "ESCAPE", "TK_ESCAPE", ALWAYS
},
194 { "EXCEPT", "TK_EXCEPT", COMPOUND
},
195 { "EXCLUSIVE", "TK_EXCLUSIVE", ALWAYS
},
196 { "EXISTS", "TK_EXISTS", ALWAYS
},
197 { "EXPLAIN", "TK_EXPLAIN", EXPLAIN
},
198 { "FAIL", "TK_FAIL", CONFLICT
|TRIGGER
},
199 { "FOR", "TK_FOR", TRIGGER
},
200 { "FOREIGN", "TK_FOREIGN", FKEY
},
201 { "FROM", "TK_FROM", ALWAYS
},
202 { "FULL", "TK_JOIN_KW", ALWAYS
},
203 { "GLOB", "TK_LIKE_KW", ALWAYS
},
204 { "GROUP", "TK_GROUP", ALWAYS
},
205 { "HAVING", "TK_HAVING", ALWAYS
},
206 { "IF", "TK_IF", ALWAYS
},
207 { "IGNORE", "TK_IGNORE", CONFLICT
|TRIGGER
},
208 { "IMMEDIATE", "TK_IMMEDIATE", ALWAYS
},
209 { "IN", "TK_IN", ALWAYS
},
210 { "INDEX", "TK_INDEX", ALWAYS
},
211 { "INDEXED", "TK_INDEXED", ALWAYS
},
212 { "INITIALLY", "TK_INITIALLY", FKEY
},
213 { "INNER", "TK_JOIN_KW", ALWAYS
},
214 { "INSERT", "TK_INSERT", ALWAYS
},
215 { "INSTEAD", "TK_INSTEAD", TRIGGER
},
216 { "INTERSECT", "TK_INTERSECT", COMPOUND
},
217 { "INTO", "TK_INTO", ALWAYS
},
218 { "IS", "TK_IS", ALWAYS
},
219 { "ISNULL", "TK_ISNULL", ALWAYS
},
220 { "JOIN", "TK_JOIN", ALWAYS
},
221 { "KEY", "TK_KEY", ALWAYS
},
222 { "LEFT", "TK_JOIN_KW", ALWAYS
},
223 { "LIKE", "TK_LIKE_KW", ALWAYS
},
224 { "LIMIT", "TK_LIMIT", ALWAYS
},
225 { "MATCH", "TK_MATCH", ALWAYS
},
226 { "NATURAL", "TK_JOIN_KW", ALWAYS
},
227 { "NO", "TK_NO", FKEY
},
228 { "NOT", "TK_NOT", ALWAYS
},
229 { "NOTNULL", "TK_NOTNULL", ALWAYS
},
230 { "NULL", "TK_NULL", ALWAYS
},
231 { "OF", "TK_OF", ALWAYS
},
232 { "OFFSET", "TK_OFFSET", ALWAYS
},
233 { "ON", "TK_ON", ALWAYS
},
234 { "OR", "TK_OR", ALWAYS
},
235 { "ORDER", "TK_ORDER", ALWAYS
},
236 { "OUTER", "TK_JOIN_KW", ALWAYS
},
237 { "PLAN", "TK_PLAN", EXPLAIN
},
238 { "PRAGMA", "TK_PRAGMA", PRAGMA
},
239 { "PRIMARY", "TK_PRIMARY", ALWAYS
},
240 { "QUERY", "TK_QUERY", EXPLAIN
},
241 { "RAISE", "TK_RAISE", TRIGGER
},
242 { "RECURSIVE", "TK_RECURSIVE", CTE
},
243 { "REFERENCES", "TK_REFERENCES", FKEY
},
244 { "REGEXP", "TK_LIKE_KW", ALWAYS
},
245 { "REINDEX", "TK_REINDEX", REINDEX
},
246 { "RELEASE", "TK_RELEASE", ALWAYS
},
247 { "RENAME", "TK_RENAME", ALTER
},
248 { "REPLACE", "TK_REPLACE", CONFLICT
},
249 { "RESTRICT", "TK_RESTRICT", FKEY
},
250 { "RIGHT", "TK_JOIN_KW", ALWAYS
},
251 { "ROLLBACK", "TK_ROLLBACK", ALWAYS
},
252 { "ROW", "TK_ROW", TRIGGER
},
253 { "SAVEPOINT", "TK_SAVEPOINT", ALWAYS
},
254 { "SELECT", "TK_SELECT", ALWAYS
},
255 { "SET", "TK_SET", ALWAYS
},
256 { "TABLE", "TK_TABLE", ALWAYS
},
257 { "TEMP", "TK_TEMP", ALWAYS
},
258 { "TEMPORARY", "TK_TEMP", ALWAYS
},
259 { "THEN", "TK_THEN", ALWAYS
},
260 { "TO", "TK_TO", ALWAYS
},
261 { "TRANSACTION", "TK_TRANSACTION", ALWAYS
},
262 { "TRIGGER", "TK_TRIGGER", TRIGGER
},
263 { "UNION", "TK_UNION", COMPOUND
},
264 { "UNIQUE", "TK_UNIQUE", ALWAYS
},
265 { "UPDATE", "TK_UPDATE", ALWAYS
},
266 { "USING", "TK_USING", ALWAYS
},
267 { "VACUUM", "TK_VACUUM", VACUUM
},
268 { "VALUES", "TK_VALUES", ALWAYS
},
269 { "VIEW", "TK_VIEW", VIEW
},
270 { "VIRTUAL", "TK_VIRTUAL", VTAB
},
271 { "WITH", "TK_WITH", CTE
},
272 { "WITHOUT", "TK_WITHOUT", ALWAYS
},
273 { "WHEN", "TK_WHEN", ALWAYS
},
274 { "WHERE", "TK_WHERE", ALWAYS
},
277 /* Number of keywords */
278 static int nKeyword
= (sizeof(aKeywordTable
)/sizeof(aKeywordTable
[0]));
280 /* Map all alphabetic characters into lower-case for hashing. This is
281 ** only valid for alphabetics. In particular it does not work for '_'
282 ** and so the hash cannot be on a keyword position that might be an '_'.
284 #define charMap(X) (0x20|(X))
287 ** Comparision function for two Keyword records
289 static int keywordCompare1(const void *a
, const void *b
){
290 const Keyword
*pA
= (Keyword
*)a
;
291 const Keyword
*pB
= (Keyword
*)b
;
292 int n
= pA
->len
- pB
->len
;
294 n
= strcmp(pA
->zName
, pB
->zName
);
299 static int keywordCompare2(const void *a
, const void *b
){
300 const Keyword
*pA
= (Keyword
*)a
;
301 const Keyword
*pB
= (Keyword
*)b
;
302 int n
= pB
->longestSuffix
- pA
->longestSuffix
;
304 n
= strcmp(pA
->zName
, pB
->zName
);
309 static int keywordCompare3(const void *a
, const void *b
){
310 const Keyword
*pA
= (Keyword
*)a
;
311 const Keyword
*pB
= (Keyword
*)b
;
312 int n
= pA
->offset
- pB
->offset
;
313 if( n
==0 ) n
= pB
->id
- pA
->id
;
319 ** Return a KeywordTable entry with the given id
321 static Keyword
*findById(int id
){
323 for(i
=0; i
<nKeyword
; i
++){
324 if( aKeywordTable
[i
].id
==id
) break;
326 return &aKeywordTable
[i
];
330 ** This routine does the work. The generated code is printed on standard
333 int main(int argc
, char **argv
){
335 int bestSize
, bestCount
;
339 int aKWHash
[1000]; /* 1000 is much bigger than nKeyword */
342 /* Remove entries from the list of keywords that have mask==0 */
343 for(i
=j
=0; i
<nKeyword
; i
++){
344 if( aKeywordTable
[i
].mask
==0 ) continue;
346 aKeywordTable
[j
] = aKeywordTable
[i
];
352 /* Fill in the lengths of strings and hashes for all entries. */
353 for(i
=0; i
<nKeyword
; i
++){
354 Keyword
*p
= &aKeywordTable
[i
];
355 p
->len
= (int)strlen(p
->zName
);
356 assert( p
->len
<sizeof(p
->zOrigName
) );
357 memcpy(p
->zOrigName
, p
->zName
, p
->len
+1);
359 p
->hash
= (charMap(p
->zName
[0])*4) ^
360 (charMap(p
->zName
[p
->len
-1])*3) ^ (p
->len
*1);
364 /* Sort the table from shortest to longest keyword */
365 qsort(aKeywordTable
, nKeyword
, sizeof(aKeywordTable
[0]), keywordCompare1
);
367 /* Look for short keywords embedded in longer keywords */
368 for(i
=nKeyword
-2; i
>=0; i
--){
369 Keyword
*p
= &aKeywordTable
[i
];
370 for(j
=nKeyword
-1; j
>i
&& p
->substrId
==0; j
--){
371 Keyword
*pOther
= &aKeywordTable
[j
];
372 if( pOther
->substrId
) continue;
373 if( pOther
->len
<=p
->len
) continue;
374 for(k
=0; k
<=pOther
->len
-p
->len
; k
++){
375 if( memcmp(p
->zName
, &pOther
->zName
[k
], p
->len
)==0 ){
376 p
->substrId
= pOther
->id
;
384 /* Compute the longestSuffix value for every word */
385 for(i
=0; i
<nKeyword
; i
++){
386 Keyword
*p
= &aKeywordTable
[i
];
387 if( p
->substrId
) continue;
388 for(j
=0; j
<nKeyword
; j
++){
391 pOther
= &aKeywordTable
[j
];
392 if( pOther
->substrId
) continue;
393 for(k
=p
->longestSuffix
+1; k
<p
->len
&& k
<pOther
->len
; k
++){
394 if( memcmp(&p
->zName
[p
->len
-k
], pOther
->zName
, k
)==0 ){
395 p
->longestSuffix
= k
;
401 /* Sort the table into reverse order by length */
402 qsort(aKeywordTable
, nKeyword
, sizeof(aKeywordTable
[0]), keywordCompare2
);
404 /* Fill in the offset for all entries */
406 for(i
=0; i
<nKeyword
; i
++){
407 Keyword
*p
= &aKeywordTable
[i
];
408 if( p
->offset
>0 || p
->substrId
) continue;
411 for(k
=p
->len
-1; k
>=1; k
--){
412 for(j
=i
+1; j
<nKeyword
; j
++){
413 Keyword
*pOther
= &aKeywordTable
[j
];
414 if( pOther
->offset
>0 || pOther
->substrId
) continue;
415 if( pOther
->len
<=k
) continue;
416 if( memcmp(&p
->zName
[p
->len
-k
], pOther
->zName
, k
)==0 ){
418 p
->offset
= nChar
- k
;
419 nChar
= p
->offset
+ p
->len
;
429 for(i
=0; i
<nKeyword
; i
++){
430 Keyword
*p
= &aKeywordTable
[i
];
432 p
->offset
= findById(p
->substrId
)->offset
+ p
->substrOffset
;
436 /* Sort the table by offset */
437 qsort(aKeywordTable
, nKeyword
, sizeof(aKeywordTable
[0]), keywordCompare3
);
439 /* Figure out how big to make the hash table in order to minimize the
440 ** number of collisions */
442 bestCount
= nKeyword
*nKeyword
;
443 for(i
=nKeyword
/2; i
<=2*nKeyword
; i
++){
444 for(j
=0; j
<i
; j
++) aKWHash
[j
] = 0;
445 for(j
=0; j
<nKeyword
; j
++){
446 h
= aKeywordTable
[j
].hash
% i
;
450 for(j
=count
=0; j
<i
; j
++) count
+= aKWHash
[j
];
451 if( count
<bestCount
){
457 /* Compute the hash */
458 for(i
=0; i
<bestSize
; i
++) aKWHash
[i
] = 0;
459 for(i
=0; i
<nKeyword
; i
++){
460 h
= aKeywordTable
[i
].hash
% bestSize
;
461 aKeywordTable
[i
].iNext
= aKWHash
[h
];
465 /* Begin generating code */
467 printf("/* Hash score: %d */\n", bestCount
);
468 printf("/* zKWText[] encodes %d bytes of keyword text in %d bytes */\n",
469 totalLen
+ nKeyword
, nChar
+1 );
470 for(i
=j
=k
=0; i
<nKeyword
; i
++){
471 Keyword
*p
= &aKeywordTable
[i
];
472 if( p
->substrId
) continue;
473 memcpy(&zKWText
[k
], p
->zName
, p
->len
);
476 printf("%*s */\n", 74-j
, "");
483 printf("%s", p
->zName
);
487 printf("%*s */\n", 74-j
, "");
489 printf("static const char zKWText[%d] = {\n", nChar
);
491 for(i
=j
=0; i
<k
; i
++){
498 printf("'%c',", zKWText
[i
]);
506 if( j
>0 ) printf("\n");
509 printf("/* aKWHash[i] is the hash value for the i-th keyword */\n");
510 printf("static const unsigned char aKWHash[%d] = {\n", bestSize
);
511 for(i
=j
=0; i
<bestSize
; i
++){
512 if( j
==0 ) printf(" ");
513 printf(" %3d,", aKWHash
[i
]);
520 printf("%s};\n", j
==0 ? "" : "\n");
522 printf("/* aKWNext[] forms the hash collision chain. If aKWHash[i]==0\n");
523 printf("** then the i-th keyword has no more hash collisions. Otherwise,\n");
524 printf("** the next keyword with the same hash is aKWHash[i]-1. */\n");
525 printf("static const unsigned char aKWNext[%d] = {\n", nKeyword
);
526 for(i
=j
=0; i
<nKeyword
; i
++){
527 if( j
==0 ) printf(" ");
528 printf(" %3d,", aKeywordTable
[i
].iNext
);
535 printf("%s};\n", j
==0 ? "" : "\n");
537 printf("/* aKWLen[i] is the length (in bytes) of the i-th keyword */\n");
538 printf("static const unsigned char aKWLen[%d] = {\n", nKeyword
);
539 for(i
=j
=0; i
<nKeyword
; i
++){
540 if( j
==0 ) printf(" ");
541 printf(" %3d,", aKeywordTable
[i
].len
+aKeywordTable
[i
].prefix
);
548 printf("%s};\n", j
==0 ? "" : "\n");
550 printf("/* aKWOffset[i] is the index into zKWText[] of the start of\n");
551 printf("** the text for the i-th keyword. */\n");
552 printf("static const unsigned short int aKWOffset[%d] = {\n", nKeyword
);
553 for(i
=j
=0; i
<nKeyword
; i
++){
554 if( j
==0 ) printf(" ");
555 printf(" %3d,", aKeywordTable
[i
].offset
);
562 printf("%s};\n", j
==0 ? "" : "\n");
564 printf("/* aKWCode[i] is the parser symbol code for the i-th keyword */\n");
565 printf("static const unsigned char aKWCode[%d] = {\n", nKeyword
);
566 for(i
=j
=0; i
<nKeyword
; i
++){
567 char *zToken
= aKeywordTable
[i
].zTokenType
;
568 if( j
==0 ) printf(" ");
569 printf("%s,%*s", zToken
, (int)(14-strlen(zToken
)), "");
576 printf("%s};\n", j
==0 ? "" : "\n");
577 printf("/* Check to see if z[0..n-1] is a keyword. If it is, write the\n");
578 printf("** parser symbol code for that keyword into *pType. Always\n");
579 printf("** return the integer n (the length of the token). */\n");
580 printf("static int keywordCode(const char *z, int n, int *pType){\n");
581 printf(" int i, j;\n");
582 printf(" const char *zKW;\n");
583 printf(" if( n>=2 ){\n");
584 printf(" i = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n) %% %d;\n",
586 printf(" for(i=((int)aKWHash[i])-1; i>=0; i=((int)aKWNext[i])-1){\n");
587 printf(" if( aKWLen[i]!=n ) continue;\n");
589 printf(" zKW = &zKWText[aKWOffset[i]];\n");
590 printf("#ifdef SQLITE_ASCII\n");
591 printf(" while( j<n && (z[j]&~0x20)==zKW[j] ){ j++; }\n");
593 printf("#ifdef SQLITE_EBCDIC\n");
594 printf(" while( j<n && toupper(z[j])==zKW[j] ){ j++; }\n");
596 printf(" if( j<n ) continue;\n");
597 for(i
=0; i
<nKeyword
; i
++){
598 printf(" testcase( i==%d ); /* %s */\n",
599 i
, aKeywordTable
[i
].zOrigName
);
601 printf(" *pType = aKWCode[i];\n");
605 printf(" return n;\n");
607 printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n");
608 printf(" int id = TK_ID;\n");
609 printf(" keywordCode((char*)z, n, &id);\n");
610 printf(" return id;\n");
612 printf("#define SQLITE_N_KEYWORD %d\n", nKeyword
);