src/fe_utils/psqlscan.l

   1 %top{
   2 /*-------------------------------------------------------------------------
   3  *
   4  * psqlscan.l
   5  *        lexical scanner for SQL commands
   6  *
   7  * This lexer used to be part of psql, and that heritage is reflected in
   8  * the file name as well as function and typedef names, though it can now
   9  * be used by other frontend programs as well.  It's also possible to extend
  10  * this lexer with a compatible add-on lexer to handle program-specific
  11  * backslash commands.
  12  *
  13  * This code is mainly concerned with determining where the end of a SQL
  14  * statement is: we are looking for semicolons that are not within quotes,
  15  * comments, or parentheses.  The most reliable way to handle this is to
  16  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
  17  * below are (except for a few) the same as the backend's, but their actions
  18  * are just ECHO whereas the backend's actions generally do other things.
  19  *
  20  * XXX The rules in this file must be kept in sync with the backend lexer!!!
  21  *
  22  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
  23  *
  24  * See psqlscan_int.h for additional commentary.
  25  *
  26  *
  27  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  28  * Portions Copyright (c) 1994, Regents of the University of California
  29  *
  30  * IDENTIFICATION
  31  *        src/fe_utils/psqlscan.l
  32  *
  33  *-------------------------------------------------------------------------
  34  */
  35 #include "postgres_fe.h"
  36
  37 #include "common/logging.h"
  38 #include "fe_utils/psqlscan.h"
  39
  40 #include "libpq-fe.h"
  41 }
  42
  43 %{
  44
  45 /* LCOV_EXCL_START */
  46
  47 #include "fe_utils/psqlscan_int.h"
  48
  49 /*
  50  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
  51  * doesn't presently make use of that argument, so just declare it as int.
  52  */
  53 typedef int YYSTYPE;
  54
  55 /*
  56  * Set the type of yyextra; we use it as a pointer back to the containing
  57  * PsqlScanState.
  58  */
  59 #define YY_EXTRA_TYPE PsqlScanState
  60
  61
  62 /* Return values from yylex() */
  63 #define LEXRES_EOL                      0       /* end of input */
  64 #define LEXRES_SEMI                     1       /* command-terminating semicolon found */
  65 #define LEXRES_BACKSLASH        2       /* backslash command start */
  66
  67
  68 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
  69
  70 /*
  71  * Work around a bug in flex 2.5.35: it emits a couple of functions that
  72  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
  73  * this would cause warnings.  Providing our own declarations should be
  74  * harmless even when the bug gets fixed.
  75  */
  76 extern int      psql_yyget_column(yyscan_t yyscanner);
  77 extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
  78
  79 %}
  80
  81 %option reentrant
  82 %option bison-bridge
  83 %option 8bit
  84 %option never-interactive
  85 %option nodefault
  86 %option noinput
  87 %option nounput
  88 %option noyywrap
  89 %option warn
  90 %option prefix="psql_yy"
  91
  92 /*
  93  * All of the following definitions and rules should exactly match
  94  * src/backend/parser/scan.l so far as the flex patterns are concerned.
  95  * The rule bodies are just ECHO as opposed to what the backend does,
  96  * however.  (But be sure to duplicate code that affects the lexing process,
  97  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
  98  * whereas scan.l has a separate one for each exclusive state.
  99  */
 100
 101 /*
 102  * OK, here is a short description of lex/flex rules behavior.
 103  * The longest pattern which matches an input string is always chosen.
 104  * For equal-length patterns, the first occurring in the rules list is chosen.
 105  * INITIAL is the starting state, to which all non-conditional rules apply.
 106  * Exclusive states change parsing rules while the state is active.  When in
 107  * an exclusive state, only those rules defined for that state apply.
 108  *
 109  * We use exclusive states for quoted strings, extended comments,
 110  * and to eliminate parsing troubles for numeric strings.
 111  * Exclusive states:
 112  *  <xb> bit string literal
 113  *  <xc> extended C-style comments
 114  *  <xd> delimited identifiers (double-quoted identifiers)
 115  *  <xh> hexadecimal byte string
 116  *  <xq> standard quoted strings
 117  *  <xqs> quote stop (detect continued strings)
 118  *  <xe> extended quoted strings (support backslash escape sequences)
 119  *  <xdolq> $foo$ quoted strings
 120  *  <xui> quoted identifier with Unicode escapes
 121  *  <xus> quoted string with Unicode escapes
 122  *
 123  * Note: we intentionally don't mimic the backend's <xeu> state; we have
 124  * no need to distinguish it from <xe> state, and no good way to get out
 125  * of it in error cases.  The backend just throws yyerror() in those
 126  * cases, but that's not an option here.
 127  */
 128
 129 %x xb
 130 %x xc
 131 %x xd
 132 %x xh
 133 %x xq
 134 %x xqs
 135 %x xe
 136 %x xdolq
 137 %x xui
 138 %x xus
 139
 140 /*
 141  * In order to make the world safe for Windows and Mac clients as well as
 142  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 143  * sequence will be seen as two successive newlines, but that doesn't cause
 144  * any problems.  Comments that start with -- and extend to the next
 145  * newline are treated as equivalent to a single whitespace character.
 146  *
 147  * NOTE a fine point: if there is no newline following --, we will absorb
 148  * everything to the end of the input as a comment.  This is correct.  Older
 149  * versions of Postgres failed to recognize -- as a comment if the input
 150  * did not end with a newline.
 151  *
 152  * XXX perhaps \f (formfeed) should be treated as a newline as well?
 153  *
 154  * XXX if you change the set of whitespace characters, fix scanner_isspace()
 155  * to agree.
 156  */
 157
 158 space                   [ \t\n\r\f]
 159 horiz_space             [ \t\f]
 160 newline                 [\n\r]
 161 non_newline             [^\n\r]
 162
 163 comment                 ("--"{non_newline}*)
 164
 165 whitespace              ({space}+|{comment})
 166
 167 /*
 168  * SQL requires at least one newline in the whitespace separating
 169  * string literals that are to be concatenated.  Silly, but who are we
 170  * to argue?  Note that {whitespace_with_newline} should not have * after
 171  * it, whereas {whitespace} should generally have a * after it...
 172  */
 173
 174 special_whitespace              ({space}+|{comment}{newline})
 175 horiz_whitespace                ({horiz_space}|{comment})
 176 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
 177
 178 quote                   '
 179 /* If we see {quote} then {quotecontinue}, the quoted string continues */
 180 quotecontinue   {whitespace_with_newline}{quote}
 181
 182 /*
 183  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
 184  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
 185  * but if there's a dash after {whitespace_with_newline}, it must be consumed
 186  * to see if there's another dash --- which would start a {comment} and thus
 187  * allow continuation of the {quotecontinue} token.
 188  */
 189 quotecontinuefail       {whitespace}*"-"?
 190
 191 /* Bit string
 192  * It is tempting to scan the string for only those characters
 193  * which are allowed. However, this leads to silently swallowed
 194  * characters if illegal characters are included in the string.
 195  * For example, if xbinside is [01] then B'ABCD' is interpreted
 196  * as a zero-length string, and the ABCD' is lost!
 197  * Better to pass the string forward and let the input routines
 198  * validate the contents.
 199  */
 200 xbstart                 [bB]{quote}
 201 xbinside                [^']*
 202
 203 /* Hexadecimal byte string */
 204 xhstart                 [xX]{quote}
 205 xhinside                [^']*
 206
 207 /* National character */
 208 xnstart                 [nN]{quote}
 209
 210 /* Quoted string that allows backslash escapes */
 211 xestart                 [eE]{quote}
 212 xeinside                [^\\']+
 213 xeescape                [\\][^0-7]
 214 xeoctesc                [\\][0-7]{1,3}
 215 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
 216 xeunicode               [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
 217 xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
 218
 219 /* Extended quote
 220  * xqdouble implements embedded quote, ''''
 221  */
 222 xqstart                 {quote}
 223 xqdouble                {quote}{quote}
 224 xqinside                [^']+
 225
 226 /* $foo$ style quotes ("dollar quoting")
 227  * The quoted string starts with $foo$ where "foo" is an optional string
 228  * in the form of an identifier, except that it may not contain "$",
 229  * and extends to the first occurrence of an identical string.
 230  * There is *no* processing of the quoted text.
 231  *
 232  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 233  * fails to match its trailing "$".
 234  */
 235 dolq_start              [A-Za-z\200-\377_]
 236 dolq_cont               [A-Za-z\200-\377_0-9]
 237 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
 238 dolqfailed              \${dolq_start}{dolq_cont}*
 239 dolqinside              [^$]+
 240
 241 /* Double quote
 242  * Allows embedded spaces and other special characters into identifiers.
 243  */
 244 dquote                  \"
 245 xdstart                 {dquote}
 246 xdstop                  {dquote}
 247 xddouble                {dquote}{dquote}
 248 xdinside                [^"]+
 249
 250 /* Quoted identifier with Unicode escapes */
 251 xuistart                [uU]&{dquote}
 252
 253 /* Quoted string with Unicode escapes */
 254 xusstart                [uU]&{quote}
 255
 256 /* error rule to avoid backup */
 257 xufailed                [uU]&
 258
 259
 260 /* C-style comments
 261  *
 262  * The "extended comment" syntax closely resembles allowable operator syntax.
 263  * The tricky part here is to get lex to recognize a string starting with
 264  * slash-star as a comment, when interpreting it as an operator would produce
 265  * a longer match --- remember lex will prefer a longer match!  Also, if we
 266  * have something like plus-slash-star, lex will think this is a 3-character
 267  * operator whereas we want to see it as a + operator and a comment start.
 268  * The solution is two-fold:
 269  * 1. append {op_chars}* to xcstart so that it matches as much text as
 270  *    {operator} would. Then the tie-breaker (first matching rule of same
 271  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 272  *    in case it contains a star-slash that should terminate the comment.
 273  * 2. In the operator rule, check for slash-star within the operator, and
 274  *    if found throw it back with yyless().  This handles the plus-slash-star
 275  *    problem.
 276  * Dash-dash comments have similar interactions with the operator rule.
 277  */
 278 xcstart                 \/\*{op_chars}*
 279 xcstop                  \*+\/
 280 xcinside                [^*/]+
 281
 282 ident_start             [A-Za-z\200-\377_]
 283 ident_cont              [A-Za-z\200-\377_0-9\$]
 284
 285 identifier              {ident_start}{ident_cont}*
 286
 287 /* Assorted special-case operators and operator-like tokens */
 288 typecast                "::"
 289 dot_dot                 \.\.
 290 colon_equals    ":="
 291
 292 /*
 293  * These operator-like tokens (unlike the above ones) also match the {operator}
 294  * rule, which means that they might be overridden by a longer match if they
 295  * are followed by a comment start or a + or - character. Accordingly, if you
 296  * add to this list, you must also add corresponding code to the {operator}
 297  * block to return the correct token in such cases. (This is not needed in
 298  * psqlscan.l since the token value is ignored there.)
 299  */
 300 equals_greater  "=>"
 301 less_equals             "<="
 302 greater_equals  ">="
 303 less_greater    "<>"
 304 not_equals              "!="
 305
 306 /*
 307  * "self" is the set of chars that should be returned as single-character
 308  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 309  * which can be one or more characters long (but if a single-char token
 310  * appears in the "self" set, it is not to be returned as an Op).  Note
 311  * that the sets overlap, but each has some chars that are not in the other.
 312  *
 313  * If you change either set, adjust the character lists appearing in the
 314  * rule for "operator"!
 315  */
 316 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 317 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 318 operator                {op_chars}+
 319
 320 /*
 321  * Numbers
 322  *
 323  * Unary minus is not part of a number here.  Instead we pass it separately to
 324  * the parser, and there it gets coerced via doNegate().
 325  *
 326  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
 327  *
 328  * {realfail1} and {realfail2} are added to prevent the need for scanner
 329  * backup when the {real} rule fails to match completely.
 330  */
 331 digit                   [0-9]
 332
 333 integer                 {digit}+
 334 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
 335 decimalfail             {digit}+\.\.
 336 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
 337 realfail1               ({integer}|{decimal})[Ee]
 338 realfail2               ({integer}|{decimal})[Ee][-+]
 339
 340 param                   \${integer}
 341
 342 /* psql-specific: characters allowed in variable names */
 343 variable_char   [A-Za-z\200-\377_0-9]
 344
 345 other                   .
 346
 347 /*
 348  * Dollar quoted strings are totally opaque, and no escaping is done on them.
 349  * Other quoted strings must allow some special characters such as single-quote
 350  *  and newline.
 351  * Embedded single-quotes are implemented both in the SQL standard
 352  *  style of two adjacent single quotes "''" and in the Postgres/Java style
 353  *  of escaped-quote "\'".
 354  * Other embedded escaped characters are matched explicitly and the leading
 355  *  backslash is dropped from the string.
 356  * Note that xcstart must appear before operator, as explained above!
 357  *  Also whitespace (comment) must appear before operator.
 358  */
 359
 360 %%
 361
 362 %{
 363                 /* Declare some local variables inside yylex(), for convenience */
 364                 PsqlScanState cur_state = yyextra;
 365                 PQExpBuffer output_buf = cur_state->output_buf;
 366
 367                 /*
 368                  * Force flex into the state indicated by start_state.  This has a
 369                  * couple of purposes: it lets some of the functions below set a new
 370                  * starting state without ugly direct access to flex variables, and it
 371                  * allows us to transition from one flex lexer to another so that we
 372                  * can lex different parts of the source string using separate lexers.
 373                  */
 374                 BEGIN(cur_state->start_state);
 375 %}
 376
 377 {whitespace}    {
 378                                         /*
 379                                          * Note that the whitespace rule includes both true
 380                                          * whitespace and single-line ("--" style) comments.
 381                                          * We suppress whitespace until we have collected some
 382                                          * non-whitespace data.  (This interacts with some
 383                                          * decisions in MainLoop(); see there for details.)
 384                                          */
 385                                         if (output_buf->len > 0)
 386                                                 ECHO;
 387                                 }
 388
 389 {xcstart}               {
 390                                         cur_state->xcdepth = 0;
 391                                         BEGIN(xc);
 392                                         /* Put back any characters past slash-star; see above */
 393                                         yyless(2);
 394                                         ECHO;
 395                                 }
 396
 397 <xc>{
 398 {xcstart}               {
 399                                         cur_state->xcdepth++;
 400                                         /* Put back any characters past slash-star; see above */
 401                                         yyless(2);
 402                                         ECHO;
 403                                 }
 404
 405 {xcstop}                {
 406                                         if (cur_state->xcdepth <= 0)
 407                                                 BEGIN(INITIAL);
 408                                         else
 409                                                 cur_state->xcdepth--;
 410                                         ECHO;
 411                                 }
 412
 413 {xcinside}              {
 414                                         ECHO;
 415                                 }
 416
 417 {op_chars}              {
 418                                         ECHO;
 419                                 }
 420
 421 \*+                             {
 422                                         ECHO;
 423                                 }
 424 } /* <xc> */
 425
 426 {xbstart}               {
 427                                         BEGIN(xb);
 428                                         ECHO;
 429                                 }
 430 <xh>{xhinside}  |
 431 <xb>{xbinside}  {
 432                                         ECHO;
 433                                 }
 434
 435 {xhstart}               {
 436                                         /* Hexadecimal bit type.
 437                                          * At some point we should simply pass the string
 438                                          * forward to the parser and label it there.
 439                                          * In the meantime, place a leading "x" on the string
 440                                          * to mark it for the input routine as a hex string.
 441                                          */
 442                                         BEGIN(xh);
 443                                         ECHO;
 444                                 }
 445
 446 {xnstart}               {
 447                                         yyless(1);      /* eat only 'n' this time */
 448                                         ECHO;
 449                                 }
 450
 451 {xqstart}               {
 452                                         if (cur_state->std_strings)
 453                                                 BEGIN(xq);
 454                                         else
 455                                                 BEGIN(xe);
 456                                         ECHO;
 457                                 }
 458 {xestart}               {
 459                                         BEGIN(xe);
 460                                         ECHO;
 461                                 }
 462 {xusstart}              {
 463                                         BEGIN(xus);
 464                                         ECHO;
 465                                 }
 466
 467 <xb,xh,xq,xe,xus>{quote} {
 468                                         /*
 469                                          * When we are scanning a quoted string and see an end
 470                                          * quote, we must look ahead for a possible continuation.
 471                                          * If we don't see one, we know the end quote was in fact
 472                                          * the end of the string.  To reduce the lexer table size,
 473                                          * we use a single "xqs" state to do the lookahead for all
 474                                          * types of strings.
 475                                          */
 476                                         cur_state->state_before_str_stop = YYSTATE;
 477                                         BEGIN(xqs);
 478                                         ECHO;
 479                                 }
 480 <xqs>{quotecontinue} {
 481                                         /*
 482                                          * Found a quote continuation, so return to the in-quote
 483                                          * state and continue scanning the literal.  Nothing is
 484                                          * added to the literal's contents.
 485                                          */
 486                                         BEGIN(cur_state->state_before_str_stop);
 487                                         ECHO;
 488                                 }
 489 <xqs>{quotecontinuefail} |
 490 <xqs>{other}    {
 491                                         /*
 492                                          * Failed to see a quote continuation.  Throw back
 493                                          * everything after the end quote, and handle the string
 494                                          * according to the state we were in previously.
 495                                          */
 496                                         yyless(0);
 497                                         BEGIN(INITIAL);
 498                                         /* There's nothing to echo ... */
 499                                 }
 500
 501 <xq,xe,xus>{xqdouble} {
 502                                         ECHO;
 503                                 }
 504 <xq,xus>{xqinside}  {
 505                                         ECHO;
 506                                 }
 507 <xe>{xeinside}  {
 508                                         ECHO;
 509                                 }
 510 <xe>{xeunicode} {
 511                                         ECHO;
 512                                 }
 513 <xe>{xeunicodefail}     {
 514                                         ECHO;
 515                                 }
 516 <xe>{xeescape}  {
 517                                         ECHO;
 518                                 }
 519 <xe>{xeoctesc}  {
 520                                         ECHO;
 521                                 }
 522 <xe>{xehexesc}  {
 523                                         ECHO;
 524                                 }
 525 <xe>.                   {
 526                                         /* This is only needed for \ just before EOF */
 527                                         ECHO;
 528                                 }
 529
 530 {dolqdelim}             {
 531                                         cur_state->dolqstart = pg_strdup(yytext);
 532                                         BEGIN(xdolq);
 533                                         ECHO;
 534                                 }
 535 {dolqfailed}    {
 536                                         /* throw back all but the initial "$" */
 537                                         yyless(1);
 538                                         ECHO;
 539                                 }
 540 <xdolq>{dolqdelim} {
 541                                         if (strcmp(yytext, cur_state->dolqstart) == 0)
 542                                         {
 543                                                 free(cur_state->dolqstart);
 544                                                 cur_state->dolqstart = NULL;
 545                                                 BEGIN(INITIAL);
 546                                         }
 547                                         else
 548                                         {
 549                                                 /*
 550                                                  * When we fail to match $...$ to dolqstart, transfer
 551                                                  * the $... part to the output, but put back the final
 552                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
 553                                                  */
 554                                                 yyless(yyleng - 1);
 555                                         }
 556                                         ECHO;
 557                                 }
 558 <xdolq>{dolqinside} {
 559                                         ECHO;
 560                                 }
 561 <xdolq>{dolqfailed} {
 562                                         ECHO;
 563                                 }
 564 <xdolq>.                {
 565                                         /* This is only needed for $ inside the quoted text */
 566                                         ECHO;
 567                                 }
 568
 569 {xdstart}               {
 570                                         BEGIN(xd);
 571                                         ECHO;
 572                                 }
 573 {xuistart}              {
 574                                         BEGIN(xui);
 575                                         ECHO;
 576                                 }
 577 <xd>{xdstop}    {
 578                                         BEGIN(INITIAL);
 579                                         ECHO;
 580                                 }
 581 <xui>{dquote}   {
 582                                         BEGIN(INITIAL);
 583                                         ECHO;
 584                                 }
 585 <xd,xui>{xddouble}      {
 586                                         ECHO;
 587                                 }
 588 <xd,xui>{xdinside}      {
 589                                         ECHO;
 590                                 }
 591
 592 {xufailed}      {
 593                                         /* throw back all but the initial u/U */
 594                                         yyless(1);
 595                                         ECHO;
 596                                 }
 597
 598 {typecast}              {
 599                                         ECHO;
 600                                 }
 601
 602 {dot_dot}               {
 603                                         ECHO;
 604                                 }
 605
 606 {colon_equals}  {
 607                                         ECHO;
 608                                 }
 609
 610 {equals_greater} {
 611                                         ECHO;
 612                                 }
 613
 614 {less_equals}   {
 615                                         ECHO;
 616                                 }
 617
 618 {greater_equals} {
 619                                         ECHO;
 620                                 }
 621
 622 {less_greater}  {
 623                                         ECHO;
 624                                 }
 625
 626 {not_equals}    {
 627                                         ECHO;
 628                                 }
 629
 630         /*
 631          * These rules are specific to psql --- they implement parenthesis
 632          * counting and detection of command-ending semicolon.  These must
 633          * appear before the {self} rule so that they take precedence over it.
 634          */
 635
 636 "("                             {
 637                                         cur_state->paren_depth++;
 638                                         ECHO;
 639                                 }
 640
 641 ")"                             {
 642                                         if (cur_state->paren_depth > 0)
 643                                                 cur_state->paren_depth--;
 644                                         ECHO;
 645                                 }
 646
 647 ";"                             {
 648                                         ECHO;
 649                                         if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
 650                                         {
 651                                                 /* Terminate lexing temporarily */
 652                                                 cur_state->start_state = YY_START;
 653                                                 cur_state->identifier_count = 0;
 654                                                 return LEXRES_SEMI;
 655                                         }
 656                                 }
 657
 658         /*
 659          * psql-specific rules to handle backslash commands and variable
 660          * substitution.  We want these before {self}, also.
 661          */
 662
 663 "\\"[;:]                {
 664                                         /* Force a semi-colon or colon into the query buffer */
 665                                         psqlscan_emit(cur_state, yytext + 1, 1);
 666                                         if (yytext[1] == ';')
 667                                                 cur_state->identifier_count = 0;
 668                                 }
 669
 670 "\\"                    {
 671                                         /* Terminate lexing temporarily */
 672                                         cur_state->start_state = YY_START;
 673                                         return LEXRES_BACKSLASH;
 674                                 }
 675
 676 :{variable_char}+       {
 677                                         /* Possible psql variable substitution */
 678                                         char       *varname;
 679                                         char       *value;
 680
 681                                         varname = psqlscan_extract_substring(cur_state,
 682                                                                                                                  yytext + 1,
 683                                                                                                                  yyleng - 1);
 684                                         if (cur_state->callbacks->get_variable)
 685                                                 value = cur_state->callbacks->get_variable(varname,
 686                                                                                                                                    PQUOTE_PLAIN,
 687                                                                                                                                    cur_state->cb_passthrough);
 688                                         else
 689                                                 value = NULL;
 690
 691                                         if (value)
 692                                         {
 693                                                 /* It is a variable, check for recursion */
 694                                                 if (psqlscan_var_is_current_source(cur_state, varname))
 695                                                 {
 696                                                         /* Recursive expansion --- don't go there */
 697                                                         pg_log_warning("skipping recursive expansion of variable \"%s\"",
 698                                                                                                                           varname);
 699                                                         /* Instead copy the string as is */
 700                                                         ECHO;
 701                                                 }
 702                                                 else
 703                                                 {
 704                                                         /* OK, perform substitution */
 705                                                         psqlscan_push_new_buffer(cur_state, value, varname);
 706                                                         /* yy_scan_string already made buffer active */
 707                                                 }
 708                                                 free(value);
 709                                         }
 710                                         else
 711                                         {
 712                                                 /*
 713                                                  * if the variable doesn't exist we'll copy the string
 714                                                  * as is
 715                                                  */
 716                                                 ECHO;
 717                                         }
 718
 719                                         free(varname);
 720                                 }
 721
 722 :'{variable_char}+'     {
 723                                         psqlscan_escape_variable(cur_state, yytext, yyleng,
 724                                                                                          PQUOTE_SQL_LITERAL);
 725                                 }
 726
 727 :\"{variable_char}+\"   {
 728                                         psqlscan_escape_variable(cur_state, yytext, yyleng,
 729                                                                                          PQUOTE_SQL_IDENT);
 730                                 }
 731
 732 :\{\?{variable_char}+\} {
 733                                         psqlscan_test_variable(cur_state, yytext, yyleng);
 734                                 }
 735
 736         /*
 737          * These rules just avoid the need for scanner backup if one of the
 738          * three rules above fails to match completely.
 739          */
 740
 741 :'{variable_char}*      {
 742                                         /* Throw back everything but the colon */
 743                                         yyless(1);
 744                                         ECHO;
 745                                 }
 746
 747 :\"{variable_char}*     {
 748                                         /* Throw back everything but the colon */
 749                                         yyless(1);
 750                                         ECHO;
 751                                 }
 752
 753 :\{\?{variable_char}*   {
 754                                         /* Throw back everything but the colon */
 755                                         yyless(1);
 756                                         ECHO;
 757                                 }
 758 :\{     {
 759                                         /* Throw back everything but the colon */
 760                                         yyless(1);
 761                                         ECHO;
 762                                 }
 763
 764         /*
 765          * Back to backend-compatible rules.
 766          */
 767
 768 {self}                  {
 769                                         ECHO;
 770                                 }
 771
 772 {operator}              {
 773                                         /*
 774                                          * Check for embedded slash-star or dash-dash; those
 775                                          * are comment starts, so operator must stop there.
 776                                          * Note that slash-star or dash-dash at the first
 777                                          * character will match a prior rule, not this one.
 778                                          */
 779                                         int                     nchars = yyleng;
 780                                         char       *slashstar = strstr(yytext, "/*");
 781                                         char       *dashdash = strstr(yytext, "--");
 782
 783                                         if (slashstar && dashdash)
 784                                         {
 785                                                 /* if both appear, take the first one */
 786                                                 if (slashstar > dashdash)
 787                                                         slashstar = dashdash;
 788                                         }
 789                                         else if (!slashstar)
 790                                                 slashstar = dashdash;
 791                                         if (slashstar)
 792                                                 nchars = slashstar - yytext;
 793
 794                                         /*
 795                                          * For SQL compatibility, '+' and '-' cannot be the
 796                                          * last char of a multi-char operator unless the operator
 797                                          * contains chars that are not in SQL operators.
 798                                          * The idea is to lex '=-' as two operators, but not
 799                                          * to forbid operator names like '?-' that could not be
 800                                          * sequences of SQL operators.
 801                                          */
 802                                         if (nchars > 1 &&
 803                                                 (yytext[nchars - 1] == '+' ||
 804                                                  yytext[nchars - 1] == '-'))
 805                                         {
 806                                                 int                     ic;
 807
 808                                                 for (ic = nchars - 2; ic >= 0; ic--)
 809                                                 {
 810                                                         char c = yytext[ic];
 811                                                         if (c == '~' || c == '!' || c == '@' ||
 812                                                                 c == '#' || c == '^' || c == '&' ||
 813                                                                 c == '|' || c == '`' || c == '?' ||
 814                                                                 c == '%')
 815                                                                 break;
 816                                                 }
 817                                                 if (ic < 0)
 818                                                 {
 819                                                         /*
 820                                                          * didn't find a qualifying character, so remove
 821                                                          * all trailing [+-]
 822                                                          */
 823                                                         do {
 824                                                                 nchars--;
 825                                                         } while (nchars > 1 &&
 826                                                                  (yytext[nchars - 1] == '+' ||
 827                                                                   yytext[nchars - 1] == '-'));
 828                                                 }
 829                                         }
 830
 831                                         if (nchars < yyleng)
 832                                         {
 833                                                 /* Strip the unwanted chars from the token */
 834                                                 yyless(nchars);
 835                                         }
 836                                         ECHO;
 837                                 }
 838
 839 {param}                 {
 840                                         ECHO;
 841                                 }
 842
 843 {integer}               {
 844                                         ECHO;
 845                                 }
 846 {decimal}               {
 847                                         ECHO;
 848                                 }
 849 {decimalfail}   {
 850                                         /* throw back the .., and treat as integer */
 851                                         yyless(yyleng - 2);
 852                                         ECHO;
 853                                 }
 854 {real}                  {
 855                                         ECHO;
 856                                 }
 857 {realfail1}             {
 858                                         /*
 859                                          * throw back the [Ee], and figure out whether what
 860                                          * remains is an {integer} or {decimal}.
 861                                          * (in psql, we don't actually care...)
 862                                          */
 863                                         yyless(yyleng - 1);
 864                                         ECHO;
 865                                 }
 866 {realfail2}             {
 867                                         /* throw back the [Ee][+-], and proceed as above */
 868                                         yyless(yyleng - 2);
 869                                         ECHO;
 870                                 }
 871
 872
 873 {identifier}    {
 874                                         /*
 875                                          * We need to track if we are inside a BEGIN .. END block
 876                                          * in a function definition, so that semicolons contained
 877                                          * therein don't terminate the whole statement.  Short of
 878                                          * writing a full parser here, the following heuristic
 879                                          * should work.  First, we track whether the beginning of
 880                                          * the statement matches CREATE [OR REPLACE]
 881                                          * {FUNCTION|PROCEDURE}
 882                                          */
 883
 884                                         if (cur_state->identifier_count == 0)
 885                                                 memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
 886
 887                                         if (pg_strcasecmp(yytext, "create") == 0 ||
 888                                                 pg_strcasecmp(yytext, "function") == 0 ||
 889                                                 pg_strcasecmp(yytext, "procedure") == 0 ||
 890                                                 pg_strcasecmp(yytext, "or") == 0 ||
 891                                                 pg_strcasecmp(yytext, "replace") == 0)
 892                                         {
 893                                                 if (cur_state->identifier_count < sizeof(cur_state->identifiers))
 894                                                         cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
 895                                         }
 896
 897                                         cur_state->identifier_count++;
 898
 899                                         if (cur_state->identifiers[0] == 'c' &&
 900                                                 (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
 901                                                  (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
 902                                                   (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
 903                                                 cur_state->paren_depth == 0)
 904                                         {
 905                                                 if (pg_strcasecmp(yytext, "begin") == 0)
 906                                                         cur_state->begin_depth++;
 907                                                 else if (pg_strcasecmp(yytext, "case") == 0)
 908                                                 {
 909                                                         /*
 910                                                          * CASE also ends with END.  We only need to track
 911                                                          * this if we are already inside a BEGIN.
 912                                                          */
 913                                                         if (cur_state->begin_depth >= 1)
 914                                                                 cur_state->begin_depth++;
 915                                                 }
 916                                                 else if (pg_strcasecmp(yytext, "end") == 0)
 917                                                 {
 918                                                         if (cur_state->begin_depth > 0)
 919                                                                 cur_state->begin_depth--;
 920                                                 }
 921                                         }
 922
 923                                         ECHO;
 924                                 }
 925
 926 {other}                 {
 927                                         ECHO;
 928                                 }
 929
 930 <<EOF>>                 {
 931                                         if (cur_state->buffer_stack == NULL)
 932                                         {
 933                                                 cur_state->start_state = YY_START;
 934                                                 return LEXRES_EOL;              /* end of input reached */
 935                                         }
 936
 937                                         /*
 938                                          * We were expanding a variable, so pop the inclusion
 939                                          * stack and keep lexing
 940                                          */
 941                                         psqlscan_pop_buffer_stack(cur_state);
 942                                         psqlscan_select_top_buffer(cur_state);
 943                                 }
 944
 945 %%
 946
 947 /* LCOV_EXCL_STOP */
 948
 949 /*
 950  * Create a lexer working state struct.
 951  *
 952  * callbacks is a struct of function pointers that encapsulate some
 953  * behavior we need from the surrounding program.  This struct must
 954  * remain valid for the lifespan of the PsqlScanState.
 955  */
 956 PsqlScanState
 957 psql_scan_create(const PsqlScanCallbacks *callbacks)
 958 {
 959         PsqlScanState state;
 960
 961         state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
 962
 963         state->callbacks = callbacks;
 964
 965         yylex_init(&state->scanner);
 966
 967         yyset_extra(state, state->scanner);
 968
 969         psql_scan_reset(state);
 970
 971         return state;
 972 }
 973
 974 /*
 975  * Destroy a lexer working state struct, releasing all resources.
 976  */
 977 void
 978 psql_scan_destroy(PsqlScanState state)
 979 {
 980         psql_scan_finish(state);
 981
 982         psql_scan_reset(state);
 983
 984         yylex_destroy(state->scanner);
 985
 986         free(state);
 987 }
 988
 989 /*
 990  * Set the callback passthrough pointer for the lexer.
 991  *
 992  * This could have been integrated into psql_scan_create, but keeping it
 993  * separate allows the application to change the pointer later, which might
 994  * be useful.
 995  */
 996 void
 997 psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
 998 {
 999         state->cb_passthrough = passthrough;
1000 }
1001
1002 /*
1003  * Set up to perform lexing of the given input line.
1004  *
1005  * The text at *line, extending for line_len bytes, will be scanned by
1006  * subsequent calls to the psql_scan routines.  psql_scan_finish should
1007  * be called when scanning is complete.  Note that the lexer retains
1008  * a pointer to the storage at *line --- this string must not be altered
1009  * or freed until after psql_scan_finish is called.
1010  *
1011  * encoding is the libpq identifier for the character encoding in use,
1012  * and std_strings says whether standard_conforming_strings is on.
1013  */
1014 void
1015 psql_scan_setup(PsqlScanState state,
1016                                 const char *line, int line_len,
1017                                 int encoding, bool std_strings)
1018 {
1019         /* Mustn't be scanning already */
1020         Assert(state->scanbufhandle == NULL);
1021         Assert(state->buffer_stack == NULL);
1022
1023         /* Do we need to hack the character set encoding? */
1024         state->encoding = encoding;
1025         state->safe_encoding = pg_valid_server_encoding_id(encoding);
1026
1027         /* Save standard-strings flag as well */
1028         state->std_strings = std_strings;
1029
1030         /* Set up flex input buffer with appropriate translation and padding */
1031         state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
1032                                                                                                    &state->scanbuf);
1033         state->scanline = line;
1034
1035         /* Set lookaside data in case we have to map unsafe encoding */
1036         state->curline = state->scanbuf;
1037         state->refline = state->scanline;
1038 }
1039
1040 /*
1041  * Do lexical analysis of SQL command text.
1042  *
1043  * The text previously passed to psql_scan_setup is scanned, and appended
1044  * (possibly with transformation) to query_buf.
1045  *
1046  * The return value indicates the condition that stopped scanning:
1047  *
1048  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
1049  * transferred to query_buf.)  The command accumulated in query_buf should
1050  * be executed, then clear query_buf and call again to scan the remainder
1051  * of the line.
1052  *
1053  * PSCAN_BACKSLASH: found a backslash that starts a special command.
1054  * Any previous data on the line has been transferred to query_buf.
1055  * The caller will typically next apply a separate flex lexer to scan
1056  * the special command.
1057  *
1058  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1059  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
1060  *
1061  * PSCAN_EOL: the end of the line was reached, and there is no lexical
1062  * reason to consider the command incomplete.  The caller may or may not
1063  * choose to send it.  *prompt is set to the appropriate prompt type if
1064  * the caller chooses to collect more input.
1065  *
1066  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1067  * be called next, then the cycle may be repeated with a fresh input line.
1068  *
1069  * In all cases, *prompt is set to an appropriate prompt type code for the
1070  * next line-input operation.
1071  */
1072 PsqlScanResult
1073 psql_scan(PsqlScanState state,
1074                   PQExpBuffer query_buf,
1075                   promptStatus_t *prompt)
1076 {
1077         PsqlScanResult result;
1078         int                     lexresult;
1079
1080         /* Must be scanning already */
1081         Assert(state->scanbufhandle != NULL);
1082
1083         /* Set current output target */
1084         state->output_buf = query_buf;
1085
1086         /* Set input source */
1087         if (state->buffer_stack != NULL)
1088                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1089         else
1090                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1091
1092         /* And lex. */
1093         lexresult = yylex(NULL, state->scanner);
1094
1095         /*
1096          * Check termination state and return appropriate result info.
1097          */
1098         switch (lexresult)
1099         {
1100                 case LEXRES_EOL:                /* end of input */
1101                         switch (state->start_state)
1102                         {
1103                                 case INITIAL:
1104                                 case xqs:               /* we treat this like INITIAL */
1105                                         if (state->paren_depth > 0)
1106                                         {
1107                                                 result = PSCAN_INCOMPLETE;
1108                                                 *prompt = PROMPT_PAREN;
1109                                         }
1110                                         else if (state->begin_depth > 0)
1111                                         {
1112                                                 result = PSCAN_INCOMPLETE;
1113                                                 *prompt = PROMPT_CONTINUE;
1114                                         }
1115                                         else if (query_buf->len > 0)
1116                                         {
1117                                                 result = PSCAN_EOL;
1118                                                 *prompt = PROMPT_CONTINUE;
1119                                         }
1120                                         else
1121                                         {
1122                                                 /* never bother to send an empty buffer */
1123                                                 result = PSCAN_INCOMPLETE;
1124                                                 *prompt = PROMPT_READY;
1125                                         }
1126                                         break;
1127                                 case xb:
1128                                         result = PSCAN_INCOMPLETE;
1129                                         *prompt = PROMPT_SINGLEQUOTE;
1130                                         break;
1131                                 case xc:
1132                                         result = PSCAN_INCOMPLETE;
1133                                         *prompt = PROMPT_COMMENT;
1134                                         break;
1135                                 case xd:
1136                                         result = PSCAN_INCOMPLETE;
1137                                         *prompt = PROMPT_DOUBLEQUOTE;
1138                                         break;
1139                                 case xh:
1140                                         result = PSCAN_INCOMPLETE;
1141                                         *prompt = PROMPT_SINGLEQUOTE;
1142                                         break;
1143                                 case xe:
1144                                         result = PSCAN_INCOMPLETE;
1145                                         *prompt = PROMPT_SINGLEQUOTE;
1146                                         break;
1147                                 case xq:
1148                                         result = PSCAN_INCOMPLETE;
1149                                         *prompt = PROMPT_SINGLEQUOTE;
1150                                         break;
1151                                 case xdolq:
1152                                         result = PSCAN_INCOMPLETE;
1153                                         *prompt = PROMPT_DOLLARQUOTE;
1154                                         break;
1155                                 case xui:
1156                                         result = PSCAN_INCOMPLETE;
1157                                         *prompt = PROMPT_DOUBLEQUOTE;
1158                                         break;
1159                                 case xus:
1160                                         result = PSCAN_INCOMPLETE;
1161                                         *prompt = PROMPT_SINGLEQUOTE;
1162                                         break;
1163                                 default:
1164                                         /* can't get here */
1165                                         fprintf(stderr, "invalid YY_START\n");
1166                                         exit(1);
1167                         }
1168                         break;
1169                 case LEXRES_SEMI:               /* semicolon */
1170                         result = PSCAN_SEMICOLON;
1171                         *prompt = PROMPT_READY;
1172                         break;
1173                 case LEXRES_BACKSLASH:  /* backslash */
1174                         result = PSCAN_BACKSLASH;
1175                         *prompt = PROMPT_READY;
1176                         break;
1177                 default:
1178                         /* can't get here */
1179                         fprintf(stderr, "invalid yylex result\n");
1180                         exit(1);
1181         }
1182
1183         return result;
1184 }
1185
1186 /*
1187  * Clean up after scanning a string.  This flushes any unread input and
1188  * releases resources (but not the PsqlScanState itself).  Note however
1189  * that this does not reset the lexer scan state; that can be done by
1190  * psql_scan_reset(), which is an orthogonal operation.
1191  *
1192  * It is legal to call this when not scanning anything (makes it easier
1193  * to deal with error recovery).
1194  */
1195 void
1196 psql_scan_finish(PsqlScanState state)
1197 {
1198         /* Drop any incomplete variable expansions. */
1199         while (state->buffer_stack != NULL)
1200                 psqlscan_pop_buffer_stack(state);
1201
1202         /* Done with the outer scan buffer, too */
1203         if (state->scanbufhandle)
1204                 yy_delete_buffer(state->scanbufhandle, state->scanner);
1205         state->scanbufhandle = NULL;
1206         if (state->scanbuf)
1207                 free(state->scanbuf);
1208         state->scanbuf = NULL;
1209 }
1210
1211 /*
1212  * Reset lexer scanning state to start conditions.  This is appropriate
1213  * for executing \r psql commands (or any other time that we discard the
1214  * prior contents of query_buf).  It is not, however, necessary to do this
1215  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1216  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1217  * conditions are returned.
1218  *
1219  * Note that this is unrelated to flushing unread input; that task is
1220  * done by psql_scan_finish().
1221  */
1222 void
1223 psql_scan_reset(PsqlScanState state)
1224 {
1225         state->start_state = INITIAL;
1226         state->paren_depth = 0;
1227         state->xcdepth = 0;                     /* not really necessary */
1228         if (state->dolqstart)
1229                 free(state->dolqstart);
1230         state->dolqstart = NULL;
1231         state->identifier_count = 0;
1232         state->begin_depth = 0;
1233 }
1234
1235 /*
1236  * Reselect this lexer (psqlscan.l) after using another one.
1237  *
1238  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1239  * state, because we'd never switch to another lexer in a different state.
1240  * However, we don't want to reset e.g. paren_depth, so this can't be
1241  * the same as psql_scan_reset().
1242  *
1243  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1244  * must be a superset of this.
1245  *
1246  * Note: it seems likely that other lexers could just assign INITIAL for
1247  * themselves, since that probably has the value zero in every flex-generated
1248  * lexer.  But let's not assume that.
1249  */
1250 void
1251 psql_scan_reselect_sql_lexer(PsqlScanState state)
1252 {
1253         state->start_state = INITIAL;
1254 }
1255
1256 /*
1257  * Return true if lexer is currently in an "inside quotes" state.
1258  *
1259  * This is pretty grotty but is needed to preserve the old behavior
1260  * that mainloop.c drops blank lines not inside quotes without even
1261  * echoing them.
1262  */
1263 bool
1264 psql_scan_in_quote(PsqlScanState state)
1265 {
1266         return state->start_state != INITIAL &&
1267                         state->start_state != xqs;
1268 }
1269
1270 /*
1271  * Push the given string onto the stack of stuff to scan.
1272  *
1273  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1274  */
1275 void
1276 psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1277                                                  const char *varname)
1278 {
1279         StackElem  *stackelem;
1280
1281         stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1282
1283         /*
1284          * In current usage, the passed varname points at the current flex input
1285          * buffer; we must copy it before calling psqlscan_prepare_buffer()
1286          * because that will change the buffer state.
1287          */
1288         stackelem->varname = varname ? pg_strdup(varname) : NULL;
1289
1290         stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1291                                                                                          &stackelem->bufstring);
1292         state->curline = stackelem->bufstring;
1293         if (state->safe_encoding)
1294         {
1295                 stackelem->origstring = NULL;
1296                 state->refline = stackelem->bufstring;
1297         }
1298         else
1299         {
1300                 stackelem->origstring = pg_strdup(newstr);
1301                 state->refline = stackelem->origstring;
1302         }
1303         stackelem->next = state->buffer_stack;
1304         state->buffer_stack = stackelem;
1305 }
1306
1307 /*
1308  * Pop the topmost buffer stack item (there must be one!)
1309  *
1310  * NB: after this, the flex input state is unspecified; caller must
1311  * switch to an appropriate buffer to continue lexing.
1312  * See psqlscan_select_top_buffer().
1313  */
1314 void
1315 psqlscan_pop_buffer_stack(PsqlScanState state)
1316 {
1317         StackElem  *stackelem = state->buffer_stack;
1318
1319         state->buffer_stack = stackelem->next;
1320         yy_delete_buffer(stackelem->buf, state->scanner);
1321         free(stackelem->bufstring);
1322         if (stackelem->origstring)
1323                 free(stackelem->origstring);
1324         if (stackelem->varname)
1325                 free(stackelem->varname);
1326         free(stackelem);
1327 }
1328
1329 /*
1330  * Select the topmost surviving buffer as the active input.
1331  */
1332 void
1333 psqlscan_select_top_buffer(PsqlScanState state)
1334 {
1335         StackElem  *stackelem = state->buffer_stack;
1336
1337         if (stackelem != NULL)
1338         {
1339                 yy_switch_to_buffer(stackelem->buf, state->scanner);
1340                 state->curline = stackelem->bufstring;
1341                 state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1342         }
1343         else
1344         {
1345                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1346                 state->curline = state->scanbuf;
1347                 state->refline = state->scanline;
1348         }
1349 }
1350
1351 /*
1352  * Check if specified variable name is the source for any string
1353  * currently being scanned
1354  */
1355 bool
1356 psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1357 {
1358         StackElem  *stackelem;
1359
1360         for (stackelem = state->buffer_stack;
1361                  stackelem != NULL;
1362                  stackelem = stackelem->next)
1363         {
1364                 if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1365                         return true;
1366         }
1367         return false;
1368 }
1369
1370 /*
1371  * Set up a flex input buffer to scan the given data.  We always make a
1372  * copy of the data.  If working in an unsafe encoding, the copy has
1373  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1374  *
1375  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1376  */
1377 YY_BUFFER_STATE
1378 psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1379                                                 char **txtcopy)
1380 {
1381         char       *newtxt;
1382
1383         /* Flex wants two \0 characters after the actual data */
1384         newtxt = pg_malloc(len + 2);
1385         *txtcopy = newtxt;
1386         newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1387
1388         if (state->safe_encoding)
1389                 memcpy(newtxt, txt, len);
1390         else
1391         {
1392                 /* Gotta do it the hard way */
1393                 int                     i = 0;
1394
1395                 while (i < len)
1396                 {
1397                         int                     thislen = PQmblen(txt + i, state->encoding);
1398
1399                         /* first byte should always be okay... */
1400                         newtxt[i] = txt[i];
1401                         i++;
1402                         while (--thislen > 0 && i < len)
1403                                 newtxt[i++] = (char) 0xFF;
1404                 }
1405         }
1406
1407         return yy_scan_buffer(newtxt, len + 2, state->scanner);
1408 }
1409
1410 /*
1411  * psqlscan_emit() --- body for ECHO macro
1412  *
1413  * NB: this must be used for ALL and ONLY the text copied from the flex
1414  * input data.  If you pass it something that is not part of the yytext
1415  * string, you are making a mistake.  Internally generated text can be
1416  * appended directly to state->output_buf.
1417  */
1418 void
1419 psqlscan_emit(PsqlScanState state, const char *txt, int len)
1420 {
1421         PQExpBuffer output_buf = state->output_buf;
1422
1423         if (state->safe_encoding)
1424                 appendBinaryPQExpBuffer(output_buf, txt, len);
1425         else
1426         {
1427                 /* Gotta do it the hard way */
1428                 const char *reference = state->refline;
1429                 int                     i;
1430
1431                 reference += (txt - state->curline);
1432
1433                 for (i = 0; i < len; i++)
1434                 {
1435                         char            ch = txt[i];
1436
1437                         if (ch == (char) 0xFF)
1438                                 ch = reference[i];
1439                         appendPQExpBufferChar(output_buf, ch);
1440                 }
1441         }
1442 }
1443
1444 /*
1445  * psqlscan_extract_substring --- fetch value of (part of) the current token
1446  *
1447  * This is like psqlscan_emit(), except that the data is returned as a
1448  * malloc'd string rather than being pushed directly to state->output_buf.
1449  */
1450 char *
1451 psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1452 {
1453         char       *result = (char *) pg_malloc(len + 1);
1454
1455         if (state->safe_encoding)
1456                 memcpy(result, txt, len);
1457         else
1458         {
1459                 /* Gotta do it the hard way */
1460                 const char *reference = state->refline;
1461                 int                     i;
1462
1463                 reference += (txt - state->curline);
1464
1465                 for (i = 0; i < len; i++)
1466                 {
1467                         char            ch = txt[i];
1468
1469                         if (ch == (char) 0xFF)
1470                                 ch = reference[i];
1471                         result[i] = ch;
1472                 }
1473         }
1474         result[len] = '\0';
1475         return result;
1476 }
1477
1478 /*
1479  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1480  *
1481  * If the variable name is found, escape its value using the appropriate
1482  * quoting method and emit the value to output_buf.  (Since the result is
1483  * surely quoted, there is never any reason to rescan it.)      If we don't
1484  * find the variable or escaping fails, emit the token as-is.
1485  */
1486 void
1487 psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1488                                                  PsqlScanQuoteType quote)
1489 {
1490         char       *varname;
1491         char       *value;
1492
1493         /* Variable lookup. */
1494         varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1495         if (state->callbacks->get_variable)
1496                 value = state->callbacks->get_variable(varname, quote,
1497                                                                                            state->cb_passthrough);
1498         else
1499                 value = NULL;
1500         free(varname);
1501
1502         if (value)
1503         {
1504                 /* Emit the suitably-escaped value */
1505                 appendPQExpBufferStr(state->output_buf, value);
1506                 free(value);
1507         }
1508         else
1509         {
1510                 /* Emit original token as-is */
1511                 psqlscan_emit(state, txt, len);
1512         }
1513 }
1514
1515 void
1516 psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
1517 {
1518         char    *varname;
1519         char    *value;
1520
1521         varname = psqlscan_extract_substring(state, txt + 3, len - 4);
1522         if (state->callbacks->get_variable)
1523                 value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1524                                                                                            state->cb_passthrough);
1525         else
1526                 value = NULL;
1527         free(varname);
1528
1529         if (value != NULL)
1530         {
1531                 psqlscan_emit(state, "TRUE", 4);
1532                 free(value);
1533         }
1534         else
1535         {
1536                 psqlscan_emit(state, "FALSE", 5);
1537         }
1538 }