2 /*-------------------------------------------------------------------------
5 * lexical scanner for SQL commands
7 * This lexer used to be part of psql, and that heritage is reflected in
8 * the file name as well as function and typedef names, though it can now
9 * be used by other frontend programs as well. It's also possible to extend
10 * this lexer with a compatible add-on lexer to handle program-specific
13 * This code is mainly concerned with determining where the end of a SQL
14 * statement is: we are looking for semicolons that are not within quotes,
15 * comments, or parentheses. The most reliable way to handle this is to
16 * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 * below are (except for a few) the same as the backend's, but their actions
18 * are just ECHO whereas the backend's actions generally do other things.
20 * XXX The rules in this file must be kept in sync with the backend lexer!!!
22 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
24 * See psqlscan_int.h for additional commentary.
27 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
28 * Portions Copyright (c) 1994, Regents of the University of California
31 * src/fe_utils/psqlscan.l
33 *-------------------------------------------------------------------------
35 #include "postgres_fe.h"
37 #include "common/logging.h"
38 #include "fe_utils/psqlscan.h"
47 #include "fe_utils/psqlscan_int.h"
50 * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
51 * doesn't presently make use of that argument, so just declare it as int.
56 * Set the type of yyextra; we use it as a pointer back to the containing
59 #define YY_EXTRA_TYPE PsqlScanState
62 /* Return values from yylex() */
63 #define LEXRES_EOL 0 /* end of input */
64 #define LEXRES_SEMI 1 /* command-terminating semicolon found */
65 #define LEXRES_BACKSLASH 2 /* backslash command start */
68 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
71 * Work around a bug in flex 2.5.35: it emits a couple of functions that
72 * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
73 * this would cause warnings. Providing our own declarations should be
74 * harmless even when the bug gets fixed.
76 extern int psql_yyget_column(yyscan_t yyscanner);
77 extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
84 %option never-interactive
90 %option prefix="psql_yy"
93 * All of the following definitions and rules should exactly match
94 * src/backend/parser/scan.l so far as the flex patterns are concerned.
95 * The rule bodies are just ECHO as opposed to what the backend does,
96 * however. (But be sure to duplicate code that affects the lexing process,
97 * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
98 * whereas scan.l has a separate one for each exclusive state.
102 * OK, here is a short description of lex/flex rules behavior.
103 * The longest pattern which matches an input string is always chosen.
104 * For equal-length patterns, the first occurring in the rules list is chosen.
105 * INITIAL is the starting state, to which all non-conditional rules apply.
106 * Exclusive states change parsing rules while the state is active. When in
107 * an exclusive state, only those rules defined for that state apply.
109 * We use exclusive states for quoted strings, extended comments,
110 * and to eliminate parsing troubles for numeric strings.
112 * <xb> bit string literal
113 * <xc> extended C-style comments
114 * <xd> delimited identifiers (double-quoted identifiers)
115 * <xh> hexadecimal numeric string
116 * <xq> standard quoted strings
117 * <xe> extended quoted strings (support backslash escape sequences)
118 * <xdolq> $foo$ quoted strings
119 * <xui> quoted identifier with Unicode escapes
120 * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
121 * <xus> quoted string with Unicode escapes
122 * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
124 * Note: we intentionally don't mimic the backend's <xeu> state; we have
125 * no need to distinguish it from <xe> state, and no good way to get out
126 * of it in error cases. The backend just throws yyerror() in those
127 * cases, but that's not an option here.
143 * In order to make the world safe for Windows and Mac clients as well as
144 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
145 * sequence will be seen as two successive newlines, but that doesn't cause
146 * any problems. Comments that start with -- and extend to the next
147 * newline are treated as equivalent to a single whitespace character.
149 * NOTE a fine point: if there is no newline following --, we will absorb
150 * everything to the end of the input as a comment. This is correct. Older
151 * versions of Postgres failed to recognize -- as a comment if the input
152 * did not end with a newline.
154 * XXX perhaps \f (formfeed) should be treated as a newline as well?
156 * XXX if you change the set of whitespace characters, fix scanner_isspace()
165 comment ("--"{non_newline}*)
167 whitespace ({space}+|{comment})
170 * SQL requires at least one newline in the whitespace separating
171 * string literals that are to be concatenated. Silly, but who are we
172 * to argue? Note that {whitespace_with_newline} should not have * after
173 * it, whereas {whitespace} should generally have a * after it...
176 special_whitespace ({space}+|{comment}{newline})
177 horiz_whitespace ({horiz_space}|{comment})
178 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
181 * To ensure that {quotecontinue} can be scanned without having to back up
182 * if the full pattern isn't matched, we include trailing whitespace in
183 * {quotestop}. This matches all cases where {quotecontinue} fails to match,
184 * except for {quote} followed by whitespace and just one "-" (not two,
185 * which would start a {comment}). To cover that we have {quotefail}.
186 * The actions for {quotestop} and {quotefail} must throw back characters
187 * beyond the quote proper.
190 quotestop {quote}{whitespace}*
191 quotecontinue {quote}{whitespace_with_newline}{quote}
192 quotefail {quote}{whitespace}*"-"
195 * It is tempting to scan the string for only those characters
196 * which are allowed. However, this leads to silently swallowed
197 * characters if illegal characters are included in the string.
198 * For example, if xbinside is [01] then B'ABCD' is interpreted
199 * as a zero-length string, and the ABCD' is lost!
200 * Better to pass the string forward and let the input routines
201 * validate the contents.
206 /* Hexadecimal number */
210 /* National character */
213 /* Quoted string that allows backslash escapes */
217 xeoctesc [\\][0-7]{1,3}
218 xehexesc [\\]x[0-9A-Fa-f]{1,2}
219 xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
220 xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
223 * xqdouble implements embedded quote, ''''
226 xqdouble {quote}{quote}
229 /* $foo$ style quotes ("dollar quoting")
230 * The quoted string starts with $foo$ where "foo" is an optional string
231 * in the form of an identifier, except that it may not contain "$",
232 * and extends to the first occurrence of an identical string.
233 * There is *no* processing of the quoted text.
235 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
236 * fails to match its trailing "$".
238 dolq_start [A-Za-z\200-\377_]
239 dolq_cont [A-Za-z\200-\377_0-9]
240 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
241 dolqfailed \${dolq_start}{dolq_cont}*
245 * Allows embedded spaces and other special characters into identifiers.
250 xddouble {dquote}{dquote}
253 /* Unicode escapes */
254 uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
255 /* error rule to avoid backup */
256 uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
258 /* Quoted identifier with Unicode escapes */
259 xuistart [uU]&{dquote}
261 /* Quoted string with Unicode escapes */
262 xusstart [uU]&{quote}
264 /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
265 xustop1 {uescapefail}?
268 /* error rule to avoid backup */
274 * The "extended comment" syntax closely resembles allowable operator syntax.
275 * The tricky part here is to get lex to recognize a string starting with
276 * slash-star as a comment, when interpreting it as an operator would produce
277 * a longer match --- remember lex will prefer a longer match! Also, if we
278 * have something like plus-slash-star, lex will think this is a 3-character
279 * operator whereas we want to see it as a + operator and a comment start.
280 * The solution is two-fold:
281 * 1. append {op_chars}* to xcstart so that it matches as much text as
282 * {operator} would. Then the tie-breaker (first matching rule of same
283 * length) ensures xcstart wins. We put back the extra stuff with yyless()
284 * in case it contains a star-slash that should terminate the comment.
285 * 2. In the operator rule, check for slash-star within the operator, and
286 * if found throw it back with yyless(). This handles the plus-slash-star
288 * Dash-dash comments have similar interactions with the operator rule.
290 xcstart \/\*{op_chars}*
295 ident_start [A-Za-z\200-\377_]
296 ident_cont [A-Za-z\200-\377_0-9\$]
298 identifier {ident_start}{ident_cont}*
300 /* Assorted special-case operators and operator-like tokens */
306 * These operator-like tokens (unlike the above ones) also match the {operator}
307 * rule, which means that they might be overridden by a longer match if they
308 * are followed by a comment start or a + or - character. Accordingly, if you
309 * add to this list, you must also add corresponding code to the {operator}
310 * block to return the correct token in such cases. (This is not needed in
311 * psqlscan.l since the token value is ignored there.)
320 * "self" is the set of chars that should be returned as single-character
321 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
322 * which can be one or more characters long (but if a single-char token
323 * appears in the "self" set, it is not to be returned as an Op). Note
324 * that the sets overlap, but each has some chars that are not in the other.
326 * If you change either set, adjust the character lists appearing in the
327 * rule for "operator"!
329 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
330 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
333 /* we no longer allow unary minus in numbers.
334 * instead we pass it separately to parser. there it gets
335 * coerced via doNegate() -- Leon aug 20 1999
337 * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
339 * {realfail1} and {realfail2} are added to prevent the need for scanner
340 * backup when the {real} rule fails to match completely.
344 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
345 decimalfail {digit}+\.\.
346 real ({integer}|{decimal})[Ee][-+]?{digit}+
347 realfail1 ({integer}|{decimal})[Ee]
348 realfail2 ({integer}|{decimal})[Ee][-+]
352 /* psql-specific: characters allowed in variable names */
353 variable_char [A-Za-z\200-\377_0-9]
358 * Dollar quoted strings are totally opaque, and no escaping is done on them.
359 * Other quoted strings must allow some special characters such as single-quote
361 * Embedded single-quotes are implemented both in the SQL standard
362 * style of two adjacent single quotes "''" and in the Postgres/Java style
363 * of escaped-quote "\'".
364 * Other embedded escaped characters are matched explicitly and the leading
365 * backslash is dropped from the string.
366 * Note that xcstart must appear before operator, as explained above!
367 * Also whitespace (comment) must appear before operator.
373 /* Declare some local variables inside yylex(), for convenience */
374 PsqlScanState cur_state = yyextra;
375 PQExpBuffer output_buf = cur_state->output_buf;
378 * Force flex into the state indicated by start_state. This has a
379 * couple of purposes: it lets some of the functions below set a new
380 * starting state without ugly direct access to flex variables, and it
381 * allows us to transition from one flex lexer to another so that we
382 * can lex different parts of the source string using separate lexers.
384 BEGIN(cur_state->start_state);
389 * Note that the whitespace rule includes both true
390 * whitespace and single-line ("--" style) comments.
391 * We suppress whitespace at the start of the query
392 * buffer. We also suppress all single-line comments,
393 * which is pretty dubious but is the historical
396 if (!(output_buf->len == 0 || yytext[0] == '-'))
401 cur_state->xcdepth = 0;
403 /* Put back any characters past slash-star; see above */
410 cur_state->xcdepth++;
411 /* Put back any characters past slash-star; see above */
417 if (cur_state->xcdepth <= 0)
420 cur_state->xcdepth--;
451 <xh>{quotecontinue} |
452 <xb>{quotecontinue} {
457 /* Hexadecimal bit type.
458 * At some point we should simply pass the string
459 * forward to the parser and label it there.
460 * In the meantime, place a leading "x" on the string
461 * to mark it for the input routine as a hex string.
474 yyless(1); /* eat only 'n' this time */
479 if (cur_state->std_strings)
501 /* throw back all but the quote */
506 <xusend>{whitespace} {
519 <xq,xe,xus>{xqdouble} {
531 <xe>{xeunicodefail} {
543 <xq,xe,xus>{quotecontinue} {
547 /* This is only needed for \ just before EOF */
552 cur_state->dolqstart = pg_strdup(yytext);
557 /* throw back all but the initial "$" */
562 if (strcmp(yytext, cur_state->dolqstart) == 0)
564 free(cur_state->dolqstart);
565 cur_state->dolqstart = NULL;
571 * When we fail to match $...$ to dolqstart, transfer
572 * the $... part to the output, but put back the final
573 * $ for rescanning. Consider $delim$...$junk$delim$
579 <xdolq>{dolqinside} {
582 <xdolq>{dolqfailed} {
586 /* This is only needed for $ inside the quoted text */
607 <xuiend>{whitespace} {
628 /* throw back all but the initial u/U */
666 * These rules are specific to psql --- they implement parenthesis
667 * counting and detection of command-ending semicolon. These must
668 * appear before the {self} rule so that they take precedence over it.
672 cur_state->paren_depth++;
677 if (cur_state->paren_depth > 0)
678 cur_state->paren_depth--;
684 if (cur_state->paren_depth == 0)
686 /* Terminate lexing temporarily */
687 cur_state->start_state = YY_START;
693 * psql-specific rules to handle backslash commands and variable
694 * substitution. We want these before {self}, also.
698 /* Force a semi-colon or colon into the query buffer */
699 psqlscan_emit(cur_state, yytext + 1, 1);
703 /* Terminate lexing temporarily */
704 cur_state->start_state = YY_START;
705 return LEXRES_BACKSLASH;
709 /* Possible psql variable substitution */
713 varname = psqlscan_extract_substring(cur_state,
716 if (cur_state->callbacks->get_variable)
717 value = cur_state->callbacks->get_variable(varname,
719 cur_state->cb_passthrough);
725 /* It is a variable, check for recursion */
726 if (psqlscan_var_is_current_source(cur_state, varname))
728 /* Recursive expansion --- don't go there */
729 pg_log_warning("skipping recursive expansion of variable \"%s\"",
731 /* Instead copy the string as is */
736 /* OK, perform substitution */
737 psqlscan_push_new_buffer(cur_state, value, varname);
738 /* yy_scan_string already made buffer active */
745 * if the variable doesn't exist we'll copy the string
754 :'{variable_char}+' {
755 psqlscan_escape_variable(cur_state, yytext, yyleng,
759 :\"{variable_char}+\" {
760 psqlscan_escape_variable(cur_state, yytext, yyleng,
764 :\{\?{variable_char}+\} {
765 psqlscan_test_variable(cur_state, yytext, yyleng);
769 * These rules just avoid the need for scanner backup if one of the
770 * three rules above fails to match completely.
774 /* Throw back everything but the colon */
779 :\"{variable_char}* {
780 /* Throw back everything but the colon */
785 :\{\?{variable_char}* {
786 /* Throw back everything but the colon */
791 /* Throw back everything but the colon */
797 * Back to backend-compatible rules.
806 * Check for embedded slash-star or dash-dash; those
807 * are comment starts, so operator must stop there.
808 * Note that slash-star or dash-dash at the first
809 * character will match a prior rule, not this one.
812 char *slashstar = strstr(yytext, "/*");
813 char *dashdash = strstr(yytext, "--");
815 if (slashstar && dashdash)
817 /* if both appear, take the first one */
818 if (slashstar > dashdash)
819 slashstar = dashdash;
822 slashstar = dashdash;
824 nchars = slashstar - yytext;
827 * For SQL compatibility, '+' and '-' cannot be the
828 * last char of a multi-char operator unless the operator
829 * contains chars that are not in SQL operators.
830 * The idea is to lex '=-' as two operators, but not
831 * to forbid operator names like '?-' that could not be
832 * sequences of SQL operators.
835 (yytext[nchars - 1] == '+' ||
836 yytext[nchars - 1] == '-'))
840 for (ic = nchars - 2; ic >= 0; ic--)
843 if (c == '~' || c == '!' || c == '@' ||
844 c == '#' || c == '^' || c == '&' ||
845 c == '|' || c == '`' || c == '?' ||
852 * didn't find a qualifying character, so remove
857 } while (nchars > 1 &&
858 (yytext[nchars - 1] == '+' ||
859 yytext[nchars - 1] == '-'));
865 /* Strip the unwanted chars from the token */
882 /* throw back the .., and treat as integer */
891 * throw back the [Ee], and figure out whether what
892 * remains is an {integer} or {decimal}.
893 * (in psql, we don't actually care...)
899 /* throw back the [Ee][+-], and proceed as above */
914 if (cur_state->buffer_stack == NULL)
916 cur_state->start_state = YY_START;
917 return LEXRES_EOL; /* end of input reached */
921 * We were expanding a variable, so pop the inclusion
922 * stack and keep lexing
924 psqlscan_pop_buffer_stack(cur_state);
925 psqlscan_select_top_buffer(cur_state);
933 * Create a lexer working state struct.
935 * callbacks is a struct of function pointers that encapsulate some
936 * behavior we need from the surrounding program. This struct must
937 * remain valid for the lifespan of the PsqlScanState.
940 psql_scan_create(const PsqlScanCallbacks *callbacks)
944 state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
946 state->callbacks = callbacks;
948 yylex_init(&state->scanner);
950 yyset_extra(state, state->scanner);
952 psql_scan_reset(state);
958 * Destroy a lexer working state struct, releasing all resources.
961 psql_scan_destroy(PsqlScanState state)
963 psql_scan_finish(state);
965 psql_scan_reset(state);
967 yylex_destroy(state->scanner);
973 * Set the callback passthrough pointer for the lexer.
975 * This could have been integrated into psql_scan_create, but keeping it
976 * separate allows the application to change the pointer later, which might
980 psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
982 state->cb_passthrough = passthrough;
986 * Set up to perform lexing of the given input line.
988 * The text at *line, extending for line_len bytes, will be scanned by
989 * subsequent calls to the psql_scan routines. psql_scan_finish should
990 * be called when scanning is complete. Note that the lexer retains
991 * a pointer to the storage at *line --- this string must not be altered
992 * or freed until after psql_scan_finish is called.
994 * encoding is the libpq identifier for the character encoding in use,
995 * and std_strings says whether standard_conforming_strings is on.
998 psql_scan_setup(PsqlScanState state,
999 const char *line, int line_len,
1000 int encoding, bool std_strings)
1002 /* Mustn't be scanning already */
1003 Assert(state->scanbufhandle == NULL);
1004 Assert(state->buffer_stack == NULL);
1006 /* Do we need to hack the character set encoding? */
1007 state->encoding = encoding;
1008 state->safe_encoding = pg_valid_server_encoding_id(encoding);
1010 /* Save standard-strings flag as well */
1011 state->std_strings = std_strings;
1013 /* Set up flex input buffer with appropriate translation and padding */
1014 state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
1016 state->scanline = line;
1018 /* Set lookaside data in case we have to map unsafe encoding */
1019 state->curline = state->scanbuf;
1020 state->refline = state->scanline;
1024 * Do lexical analysis of SQL command text.
1026 * The text previously passed to psql_scan_setup is scanned, and appended
1027 * (possibly with transformation) to query_buf.
1029 * The return value indicates the condition that stopped scanning:
1031 * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1032 * transferred to query_buf.) The command accumulated in query_buf should
1033 * be executed, then clear query_buf and call again to scan the remainder
1036 * PSCAN_BACKSLASH: found a backslash that starts a special command.
1037 * Any previous data on the line has been transferred to query_buf.
1038 * The caller will typically next apply a separate flex lexer to scan
1039 * the special command.
1041 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1042 * incomplete SQL command. *prompt is set to the appropriate prompt type.
1044 * PSCAN_EOL: the end of the line was reached, and there is no lexical
1045 * reason to consider the command incomplete. The caller may or may not
1046 * choose to send it. *prompt is set to the appropriate prompt type if
1047 * the caller chooses to collect more input.
1049 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1050 * be called next, then the cycle may be repeated with a fresh input line.
1052 * In all cases, *prompt is set to an appropriate prompt type code for the
1053 * next line-input operation.
1056 psql_scan(PsqlScanState state,
1057 PQExpBuffer query_buf,
1058 promptStatus_t *prompt)
1060 PsqlScanResult result;
1063 /* Must be scanning already */
1064 Assert(state->scanbufhandle != NULL);
1066 /* Set current output target */
1067 state->output_buf = query_buf;
1069 /* Set input source */
1070 if (state->buffer_stack != NULL)
1071 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1073 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1076 lexresult = yylex(NULL, state->scanner);
1079 * Check termination state and return appropriate result info.
1083 case LEXRES_EOL: /* end of input */
1084 switch (state->start_state)
1087 case xuiend: /* we treat these like INITIAL */
1089 if (state->paren_depth > 0)
1091 result = PSCAN_INCOMPLETE;
1092 *prompt = PROMPT_PAREN;
1094 else if (query_buf->len > 0)
1097 *prompt = PROMPT_CONTINUE;
1101 /* never bother to send an empty buffer */
1102 result = PSCAN_INCOMPLETE;
1103 *prompt = PROMPT_READY;
1107 result = PSCAN_INCOMPLETE;
1108 *prompt = PROMPT_SINGLEQUOTE;
1111 result = PSCAN_INCOMPLETE;
1112 *prompt = PROMPT_COMMENT;
1115 result = PSCAN_INCOMPLETE;
1116 *prompt = PROMPT_DOUBLEQUOTE;
1119 result = PSCAN_INCOMPLETE;
1120 *prompt = PROMPT_SINGLEQUOTE;
1123 result = PSCAN_INCOMPLETE;
1124 *prompt = PROMPT_SINGLEQUOTE;
1127 result = PSCAN_INCOMPLETE;
1128 *prompt = PROMPT_SINGLEQUOTE;
1131 result = PSCAN_INCOMPLETE;
1132 *prompt = PROMPT_DOLLARQUOTE;
1135 result = PSCAN_INCOMPLETE;
1136 *prompt = PROMPT_DOUBLEQUOTE;
1139 result = PSCAN_INCOMPLETE;
1140 *prompt = PROMPT_SINGLEQUOTE;
1143 /* can't get here */
1144 fprintf(stderr, "invalid YY_START\n");
1148 case LEXRES_SEMI: /* semicolon */
1149 result = PSCAN_SEMICOLON;
1150 *prompt = PROMPT_READY;
1152 case LEXRES_BACKSLASH: /* backslash */
1153 result = PSCAN_BACKSLASH;
1154 *prompt = PROMPT_READY;
1157 /* can't get here */
1158 fprintf(stderr, "invalid yylex result\n");
1166 * Clean up after scanning a string. This flushes any unread input and
1167 * releases resources (but not the PsqlScanState itself). Note however
1168 * that this does not reset the lexer scan state; that can be done by
1169 * psql_scan_reset(), which is an orthogonal operation.
1171 * It is legal to call this when not scanning anything (makes it easier
1172 * to deal with error recovery).
1175 psql_scan_finish(PsqlScanState state)
1177 /* Drop any incomplete variable expansions. */
1178 while (state->buffer_stack != NULL)
1179 psqlscan_pop_buffer_stack(state);
1181 /* Done with the outer scan buffer, too */
1182 if (state->scanbufhandle)
1183 yy_delete_buffer(state->scanbufhandle, state->scanner);
1184 state->scanbufhandle = NULL;
1186 free(state->scanbuf);
1187 state->scanbuf = NULL;
1191 * Reset lexer scanning state to start conditions. This is appropriate
1192 * for executing \r psql commands (or any other time that we discard the
1193 * prior contents of query_buf). It is not, however, necessary to do this
1194 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1195 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1196 * conditions are returned.
1198 * Note that this is unrelated to flushing unread input; that task is
1199 * done by psql_scan_finish().
1202 psql_scan_reset(PsqlScanState state)
1204 state->start_state = INITIAL;
1205 state->paren_depth = 0;
1206 state->xcdepth = 0; /* not really necessary */
1207 if (state->dolqstart)
1208 free(state->dolqstart);
1209 state->dolqstart = NULL;
1213 * Reselect this lexer (psqlscan.l) after using another one.
1215 * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1216 * state, because we'd never switch to another lexer in a different state.
1217 * However, we don't want to reset e.g. paren_depth, so this can't be
1218 * the same as psql_scan_reset().
1220 * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1221 * must be a superset of this.
1223 * Note: it seems likely that other lexers could just assign INITIAL for
1224 * themselves, since that probably has the value zero in every flex-generated
1225 * lexer. But let's not assume that.
1228 psql_scan_reselect_sql_lexer(PsqlScanState state)
1230 state->start_state = INITIAL;
1234 * Return true if lexer is currently in an "inside quotes" state.
1236 * This is pretty grotty but is needed to preserve the old behavior
1237 * that mainloop.c drops blank lines not inside quotes without even
1241 psql_scan_in_quote(PsqlScanState state)
1243 return state->start_state != INITIAL;
1247 * Push the given string onto the stack of stuff to scan.
1249 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1252 psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1253 const char *varname)
1255 StackElem *stackelem;
1257 stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1260 * In current usage, the passed varname points at the current flex input
1261 * buffer; we must copy it before calling psqlscan_prepare_buffer()
1262 * because that will change the buffer state.
1264 stackelem->varname = varname ? pg_strdup(varname) : NULL;
1266 stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1267 &stackelem->bufstring);
1268 state->curline = stackelem->bufstring;
1269 if (state->safe_encoding)
1271 stackelem->origstring = NULL;
1272 state->refline = stackelem->bufstring;
1276 stackelem->origstring = pg_strdup(newstr);
1277 state->refline = stackelem->origstring;
1279 stackelem->next = state->buffer_stack;
1280 state->buffer_stack = stackelem;
1284 * Pop the topmost buffer stack item (there must be one!)
1286 * NB: after this, the flex input state is unspecified; caller must
1287 * switch to an appropriate buffer to continue lexing.
1288 * See psqlscan_select_top_buffer().
1291 psqlscan_pop_buffer_stack(PsqlScanState state)
1293 StackElem *stackelem = state->buffer_stack;
1295 state->buffer_stack = stackelem->next;
1296 yy_delete_buffer(stackelem->buf, state->scanner);
1297 free(stackelem->bufstring);
1298 if (stackelem->origstring)
1299 free(stackelem->origstring);
1300 if (stackelem->varname)
1301 free(stackelem->varname);
1306 * Select the topmost surviving buffer as the active input.
1309 psqlscan_select_top_buffer(PsqlScanState state)
1311 StackElem *stackelem = state->buffer_stack;
1313 if (stackelem != NULL)
1315 yy_switch_to_buffer(stackelem->buf, state->scanner);
1316 state->curline = stackelem->bufstring;
1317 state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1321 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1322 state->curline = state->scanbuf;
1323 state->refline = state->scanline;
1328 * Check if specified variable name is the source for any string
1329 * currently being scanned
1332 psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1334 StackElem *stackelem;
1336 for (stackelem = state->buffer_stack;
1338 stackelem = stackelem->next)
1340 if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1347 * Set up a flex input buffer to scan the given data. We always make a
1348 * copy of the data. If working in an unsafe encoding, the copy has
1349 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1351 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1354 psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1359 /* Flex wants two \0 characters after the actual data */
1360 newtxt = pg_malloc(len + 2);
1362 newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1364 if (state->safe_encoding)
1365 memcpy(newtxt, txt, len);
1368 /* Gotta do it the hard way */
1373 int thislen = PQmblen(txt + i, state->encoding);
1375 /* first byte should always be okay... */
1378 while (--thislen > 0 && i < len)
1379 newtxt[i++] = (char) 0xFF;
1383 return yy_scan_buffer(newtxt, len + 2, state->scanner);
1387 * psqlscan_emit() --- body for ECHO macro
1389 * NB: this must be used for ALL and ONLY the text copied from the flex
1390 * input data. If you pass it something that is not part of the yytext
1391 * string, you are making a mistake. Internally generated text can be
1392 * appended directly to state->output_buf.
1395 psqlscan_emit(PsqlScanState state, const char *txt, int len)
1397 PQExpBuffer output_buf = state->output_buf;
1399 if (state->safe_encoding)
1400 appendBinaryPQExpBuffer(output_buf, txt, len);
1403 /* Gotta do it the hard way */
1404 const char *reference = state->refline;
1407 reference += (txt - state->curline);
1409 for (i = 0; i < len; i++)
1413 if (ch == (char) 0xFF)
1415 appendPQExpBufferChar(output_buf, ch);
1421 * psqlscan_extract_substring --- fetch value of (part of) the current token
1423 * This is like psqlscan_emit(), except that the data is returned as a
1424 * malloc'd string rather than being pushed directly to state->output_buf.
1427 psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1429 char *result = (char *) pg_malloc(len + 1);
1431 if (state->safe_encoding)
1432 memcpy(result, txt, len);
1435 /* Gotta do it the hard way */
1436 const char *reference = state->refline;
1439 reference += (txt - state->curline);
1441 for (i = 0; i < len; i++)
1445 if (ch == (char) 0xFF)
1455 * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1457 * If the variable name is found, escape its value using the appropriate
1458 * quoting method and emit the value to output_buf. (Since the result is
1459 * surely quoted, there is never any reason to rescan it.) If we don't
1460 * find the variable or escaping fails, emit the token as-is.
1463 psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1464 PsqlScanQuoteType quote)
1469 /* Variable lookup. */
1470 varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1471 if (state->callbacks->get_variable)
1472 value = state->callbacks->get_variable(varname, quote,
1473 state->cb_passthrough);
1480 /* Emit the suitably-escaped value */
1481 appendPQExpBufferStr(state->output_buf, value);
1486 /* Emit original token as-is */
1487 psqlscan_emit(state, txt, len);
1492 psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
1497 varname = psqlscan_extract_substring(state, txt + 3, len - 4);
1498 if (state->callbacks->get_variable)
1499 value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1500 state->cb_passthrough);
1507 psqlscan_emit(state, "TRUE", 4);
1512 psqlscan_emit(state, "FALSE", 5);