2 /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
3 file Copyright.txt or https://cmake.org/licensing for details. */
6 This file must be translated to C and modified to build everywhere.
8 Run flex >= 2.6 like this:
10 flex --nounistd -DFLEXINT_H --noline -ocmListFileLexer.c cmListFileLexer.in.l
12 Modify cmListFileLexer.c:
13 - remove trailing whitespace: sed -i 's/\s*$//' cmListFileLexer.c
14 - remove blank lines at end of file: sed -i '${/^$/d;}' cmListFileLexer.c
15 - #include "cmStandardLexer.h" at the top: sed -i '1i#include "cmStandardLexer.h"' cmListFileLexer.c
19 /* IWYU pragma: no_forward_declare yyguts_t */
22 #include "cmsys/Encoding.h"
25 /* Setup the proper cmListFileLexer_yylex declaration. */
26 #define YY_EXTRA_TYPE cmListFileLexer*
27 #define YY_DECL int cmListFileLexer_yylex (yyscan_t yyscanner, cmListFileLexer* lexer)
29 #include "cmListFileLexer.h"
31 /*--------------------------------------------------------------------------*/
32 struct cmListFileLexer_s
34 cmListFileLexer_Token token;
43 char* string_position;
48 static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
50 static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
52 static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
54 static void cmListFileLexerInit(cmListFileLexer* lexer);
55 static void cmListFileLexerDestroy(cmListFileLexer* lexer);
57 /* Replace the lexer input function. */
59 #define YY_INPUT(buf, result, max_size) \
60 do { result = cmListFileLexerInput(cmListFileLexer_yyget_extra(yyscanner), buf, max_size); } while (0)
62 /*--------------------------------------------------------------------------*/
65 %option prefix="cmListFileLexer_yy"
76 MAKEVAR \$\([A-Za-z0-9_]*\)
77 UNQUOTED ([^ \0\t\r\n\(\)#\\\"[=]|\\[^\0\n])
78 LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
83 lexer->token.type = cmListFileLexer_Token_Newline;
84 cmListFileLexerSetToken(lexer, yytext, yyleng);
92 const char* bracket = yytext;
93 lexer->comment = yytext[0] == '#';
95 lexer->token.type = cmListFileLexer_Token_CommentBracket;
98 lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
100 cmListFileLexerSetToken(lexer, "", 0);
101 lexer->bracket = strchr(bracket+1, '[') - bracket;
102 if (yytext[yyleng-1] == '\n') {
106 lexer->column += yyleng;
112 lexer->column += yyleng;
117 lexer->column += yyleng;
121 lexer->token.type = cmListFileLexer_Token_ParenLeft;
122 cmListFileLexerSetToken(lexer, yytext, yyleng);
123 lexer->column += yyleng;
128 lexer->token.type = cmListFileLexer_Token_ParenRight;
129 cmListFileLexerSetToken(lexer, yytext, yyleng);
130 lexer->column += yyleng;
134 [A-Za-z_][A-Za-z0-9_]* {
135 lexer->token.type = cmListFileLexer_Token_Identifier;
136 cmListFileLexerSetToken(lexer, yytext, yyleng);
137 lexer->column += yyleng;
142 /* Handle ]]====]=======]*/
143 cmListFileLexerAppend(lexer, yytext, yyleng);
144 lexer->column += yyleng;
145 if (yyleng == lexer->bracket) {
151 lexer->column += yyleng;
152 /* Erase the partial bracket from the token. */
153 lexer->token.length -= lexer->bracket;
154 lexer->token.text[lexer->token.length] = 0;
159 <BRACKET>([^]\0\n])+ {
160 cmListFileLexerAppend(lexer, yytext, yyleng);
161 lexer->column += yyleng;
164 <BRACKET,BRACKETEND>\n {
165 cmListFileLexerAppend(lexer, yytext, yyleng);
171 <BRACKET,BRACKETEND>[^\0\n] {
172 cmListFileLexerAppend(lexer, yytext, yyleng);
173 lexer->column += yyleng;
177 <BRACKET,BRACKETEND><<EOF>> {
178 lexer->token.type = cmListFileLexer_Token_BadBracket;
183 ({UNQUOTED}|=|\[=*{UNQUOTED})({UNQUOTED}|[[=])* {
184 lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
185 cmListFileLexerSetToken(lexer, yytext, yyleng);
186 lexer->column += yyleng;
190 ({MAKEVAR}|{UNQUOTED}|=|\[=*{LEGACY})({LEGACY}|[[=])* {
191 lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
192 cmListFileLexerSetToken(lexer, yytext, yyleng);
193 lexer->column += yyleng;
198 lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
199 cmListFileLexerSetToken(lexer, yytext, yyleng);
200 lexer->column += yyleng;
205 lexer->token.type = cmListFileLexer_Token_ArgumentQuoted;
206 cmListFileLexerSetToken(lexer, "", 0);
207 lexer->column += yyleng;
211 <STRING>([^\\\0\n\"]|\\[^\0\n])+ {
212 cmListFileLexerAppend(lexer, yytext, yyleng);
213 lexer->column += yyleng;
217 /* Continuation: text is not part of string */
223 cmListFileLexerAppend(lexer, yytext, yyleng);
229 lexer->column += yyleng;
235 cmListFileLexerAppend(lexer, yytext, yyleng);
236 lexer->column += yyleng;
240 lexer->token.type = cmListFileLexer_Token_BadString;
246 lexer->token.type = cmListFileLexer_Token_Space;
247 cmListFileLexerSetToken(lexer, yytext, yyleng);
248 lexer->column += yyleng;
253 lexer->token.type = cmListFileLexer_Token_BadCharacter;
254 cmListFileLexerSetToken(lexer, yytext, yyleng);
255 lexer->column += yyleng;
260 lexer->token.type = cmListFileLexer_Token_None;
261 cmListFileLexerSetToken(lexer, 0, 0);
267 /*--------------------------------------------------------------------------*/
268 static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
271 /* Set the token line and column number. */
272 lexer->token.line = lexer->line;
273 lexer->token.column = lexer->column;
275 /* Use the same buffer if possible. */
276 if (lexer->token.text) {
277 if (text && length < lexer->size) {
278 strcpy(lexer->token.text, text);
279 lexer->token.length = length;
282 free(lexer->token.text);
283 lexer->token.text = 0;
287 /* Need to extend the buffer. */
289 lexer->token.text = strdup(text);
290 lexer->token.length = length;
291 lexer->size = length + 1;
293 lexer->token.length = 0;
297 /*--------------------------------------------------------------------------*/
298 static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
304 /* If the appended text will fit in the buffer, do not reallocate. */
305 newSize = lexer->token.length + length + 1;
306 if (lexer->token.text && newSize <= lexer->size) {
307 strcpy(lexer->token.text + lexer->token.length, text);
308 lexer->token.length += length;
312 /* We need to extend the buffer. */
313 temp = malloc(newSize);
314 if (lexer->token.text) {
315 memcpy(temp, lexer->token.text, lexer->token.length);
316 free(lexer->token.text);
318 memcpy(temp + lexer->token.length, text, length);
319 temp[lexer->token.length + length] = 0;
320 lexer->token.text = temp;
321 lexer->token.length += length;
322 lexer->size = newSize;
325 /*--------------------------------------------------------------------------*/
326 static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
331 /* Convert CRLF -> LF explicitly. The C FILE "t"ext mode
332 does not convert newlines on all platforms. Move any
333 trailing CR to the start of the buffer for the next read. */
334 size_t cr = lexer->cr;
337 n = fread(buffer + cr, 1, bufferSize - cr, lexer->file);
340 const char* i = buffer;
343 cr = (buffer[n - 1] == '\r') ? 1 : 0;
346 if (i[0] == '\r' && i[1] == '\n') {
358 } else if (lexer->string_left) {
359 int length = lexer->string_left;
360 if ((int)bufferSize < length) {
361 length = (int)bufferSize;
363 memcpy(buffer, lexer->string_position, length);
364 lexer->string_position += length;
365 lexer->string_left -= length;
372 /*--------------------------------------------------------------------------*/
373 static void cmListFileLexerInit(cmListFileLexer* lexer)
375 if (lexer->file || lexer->string_buffer) {
376 cmListFileLexer_yylex_init(&lexer->scanner);
377 cmListFileLexer_yyset_extra(lexer, lexer->scanner);
381 /*--------------------------------------------------------------------------*/
382 static void cmListFileLexerDestroy(cmListFileLexer* lexer)
384 cmListFileLexerSetToken(lexer, 0, 0);
385 if (lexer->file || lexer->string_buffer) {
386 cmListFileLexer_yylex_destroy(lexer->scanner);
391 if (lexer->string_buffer) {
392 free(lexer->string_buffer);
393 lexer->string_buffer = 0;
394 lexer->string_left = 0;
395 lexer->string_position = 0;
400 /*--------------------------------------------------------------------------*/
401 cmListFileLexer* cmListFileLexer_New(void)
403 cmListFileLexer* lexer = (cmListFileLexer*)malloc(sizeof(cmListFileLexer));
407 memset(lexer, 0, sizeof(*lexer));
413 /*--------------------------------------------------------------------------*/
414 void cmListFileLexer_Delete(cmListFileLexer* lexer)
416 cmListFileLexer_SetFileName(lexer, 0, 0);
420 /*--------------------------------------------------------------------------*/
421 static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
424 if (fread(b, 1, 2, f) == 2) {
425 if (b[0] == 0xEF && b[1] == 0xBB) {
426 if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
427 return cmListFileLexer_BOM_UTF8;
429 } else if (b[0] == 0xFE && b[1] == 0xFF) {
431 return cmListFileLexer_BOM_UTF16BE;
432 } else if (b[0] == 0 && b[1] == 0) {
433 if (fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) {
434 return cmListFileLexer_BOM_UTF32BE;
436 } else if (b[0] == 0xFF && b[1] == 0xFE) {
439 if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
440 return cmListFileLexer_BOM_UTF32LE;
442 if (fsetpos(f, &p) != 0) {
443 return cmListFileLexer_BOM_Broken;
445 return cmListFileLexer_BOM_UTF16LE;
448 if (fseek(f, 0, SEEK_SET) != 0) {
449 return cmListFileLexer_BOM_Broken;
451 return cmListFileLexer_BOM_None;
454 /*--------------------------------------------------------------------------*/
455 int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
456 cmListFileLexer_BOM* bom)
459 cmListFileLexerDestroy(lexer);
462 wchar_t* wname = cmsysEncoding_DupToWide(name);
463 lexer->file = _wfopen(wname, L"rb");
466 lexer->file = fopen(name, "rb");
470 *bom = cmListFileLexer_ReadBOM(lexer->file);
476 cmListFileLexerInit(lexer);
480 /*--------------------------------------------------------------------------*/
481 int cmListFileLexer_SetString(cmListFileLexer* lexer, const char* text)
484 cmListFileLexerDestroy(lexer);
486 int length = (int)strlen(text);
487 lexer->string_buffer = (char*)malloc(length + 1);
488 if (lexer->string_buffer) {
489 strcpy(lexer->string_buffer, text);
490 lexer->string_position = lexer->string_buffer;
491 lexer->string_left = length;
496 cmListFileLexerInit(lexer);
500 /*--------------------------------------------------------------------------*/
501 cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
503 if (!lexer->file && !lexer->string_buffer) {
506 if (cmListFileLexer_yylex(lexer->scanner, lexer)) {
507 return &lexer->token;
509 cmListFileLexer_SetFileName(lexer, 0, 0);
514 /*--------------------------------------------------------------------------*/
515 long cmListFileLexer_GetCurrentLine(cmListFileLexer* lexer)
520 /*--------------------------------------------------------------------------*/
521 long cmListFileLexer_GetCurrentColumn(cmListFileLexer* lexer)
523 return lexer->column;
526 /*--------------------------------------------------------------------------*/
527 const char* cmListFileLexer_GetTypeAsString(cmListFileLexer* lexer,
528 cmListFileLexer_Type type)
532 case cmListFileLexer_Token_None:
534 case cmListFileLexer_Token_Space:
536 case cmListFileLexer_Token_Newline:
538 case cmListFileLexer_Token_Identifier:
540 case cmListFileLexer_Token_ParenLeft:
542 case cmListFileLexer_Token_ParenRight:
543 return "right paren";
544 case cmListFileLexer_Token_ArgumentUnquoted:
545 return "unquoted argument";
546 case cmListFileLexer_Token_ArgumentQuoted:
547 return "quoted argument";
548 case cmListFileLexer_Token_ArgumentBracket:
549 return "bracket argument";
550 case cmListFileLexer_Token_CommentBracket:
551 return "bracket comment";
552 case cmListFileLexer_Token_BadCharacter:
553 return "bad character";
554 case cmListFileLexer_Token_BadBracket:
555 return "unterminated bracket";
556 case cmListFileLexer_Token_BadString:
557 return "unterminated string";
559 return "unknown token";