src/fe_utils/psqlscan.l

   1 %top{
   2 /*-------------------------------------------------------------------------
   3  *
   4  * psqlscan.l
   5  *        lexical scanner for SQL commands
   6  *
   7  * This lexer used to be part of psql, and that heritage is reflected in
   8  * the file name as well as function and typedef names, though it can now
   9  * be used by other frontend programs as well.  It's also possible to extend
  10  * this lexer with a compatible add-on lexer to handle program-specific
  11  * backslash commands.
  12  *
  13  * This code is mainly concerned with determining where the end of a SQL
  14  * statement is: we are looking for semicolons that are not within quotes,
  15  * comments, or parentheses.  The most reliable way to handle this is to
  16  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
  17  * below are (except for a few) the same as the backend's, but their actions
  18  * are just ECHO whereas the backend's actions generally do other things.
  19  *
  20  * XXX The rules in this file must be kept in sync with the backend lexer!!!
  21  *
  22  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
  23  *
  24  * See psqlscan_int.h for additional commentary.
  25  *
  26  *
  27  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  28  * Portions Copyright (c) 1994, Regents of the University of California
  29  *
  30  * IDENTIFICATION
  31  *        src/fe_utils/psqlscan.l
  32  *
  33  *-------------------------------------------------------------------------
  34  */
  35 #include "postgres_fe.h"
  36
  37 #include "common/logging.h"
  38 #include "fe_utils/psqlscan.h"
  39
  40 #include "libpq-fe.h"
  41 }
  42
  43 %{
  44
  45 /* LCOV_EXCL_START */
  46
  47 #include "fe_utils/psqlscan_int.h"
  48
  49 /*
  50  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
  51  * doesn't presently make use of that argument, so just declare it as int.
  52  */
  53 typedef int YYSTYPE;
  54
  55 /*
  56  * Set the type of yyextra; we use it as a pointer back to the containing
  57  * PsqlScanState.
  58  */
  59 #define YY_EXTRA_TYPE PsqlScanState
  60
  61
  62 /* Return values from yylex() */
  63 #define LEXRES_EOL                      0       /* end of input */
  64 #define LEXRES_SEMI                     1       /* command-terminating semicolon found */
  65 #define LEXRES_BACKSLASH        2       /* backslash command start */
  66
  67
  68 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
  69
  70 /*
  71  * Work around a bug in flex 2.5.35: it emits a couple of functions that
  72  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
  73  * this would cause warnings.  Providing our own declarations should be
  74  * harmless even when the bug gets fixed.
  75  */
  76 extern int      psql_yyget_column(yyscan_t yyscanner);
  77 extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
  78
  79 %}
  80
  81 %option reentrant
  82 %option bison-bridge
  83 %option 8bit
  84 %option never-interactive
  85 %option nodefault
  86 %option noinput
  87 %option nounput
  88 %option noyywrap
  89 %option warn
  90 %option prefix="psql_yy"
  91
  92 /*
  93  * All of the following definitions and rules should exactly match
  94  * src/backend/parser/scan.l so far as the flex patterns are concerned.
  95  * The rule bodies are just ECHO as opposed to what the backend does,
  96  * however.  (But be sure to duplicate code that affects the lexing process,
  97  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
  98  * whereas scan.l has a separate one for each exclusive state.
  99  */
 100
 101 /*
 102  * OK, here is a short description of lex/flex rules behavior.
 103  * The longest pattern which matches an input string is always chosen.
 104  * For equal-length patterns, the first occurring in the rules list is chosen.
 105  * INITIAL is the starting state, to which all non-conditional rules apply.
 106  * Exclusive states change parsing rules while the state is active.  When in
 107  * an exclusive state, only those rules defined for that state apply.
 108  *
 109  * We use exclusive states for quoted strings, extended comments,
 110  * and to eliminate parsing troubles for numeric strings.
 111  * Exclusive states:
 112  *  <xb> bit string literal
 113  *  <xc> extended C-style comments
 114  *  <xd> delimited identifiers (double-quoted identifiers)
 115  *  <xh> hexadecimal numeric string
 116  *  <xq> standard quoted strings
 117  *  <xe> extended quoted strings (support backslash escape sequences)
 118  *  <xdolq> $foo$ quoted strings
 119  *  <xui> quoted identifier with Unicode escapes
 120  *  <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
 121  *  <xus> quoted string with Unicode escapes
 122  *  <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
 123  *
 124  * Note: we intentionally don't mimic the backend's <xeu> state; we have
 125  * no need to distinguish it from <xe> state, and no good way to get out
 126  * of it in error cases.  The backend just throws yyerror() in those
 127  * cases, but that's not an option here.
 128  */
 129
 130 %x xb
 131 %x xc
 132 %x xd
 133 %x xh
 134 %x xq
 135 %x xe
 136 %x xdolq
 137 %x xui
 138 %x xuiend
 139 %x xus
 140 %x xusend
 141
 142 /*
 143  * In order to make the world safe for Windows and Mac clients as well as
 144  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 145  * sequence will be seen as two successive newlines, but that doesn't cause
 146  * any problems.  Comments that start with -- and extend to the next
 147  * newline are treated as equivalent to a single whitespace character.
 148  *
 149  * NOTE a fine point: if there is no newline following --, we will absorb
 150  * everything to the end of the input as a comment.  This is correct.  Older
 151  * versions of Postgres failed to recognize -- as a comment if the input
 152  * did not end with a newline.
 153  *
 154  * XXX perhaps \f (formfeed) should be treated as a newline as well?
 155  *
 156  * XXX if you change the set of whitespace characters, fix scanner_isspace()
 157  * to agree.
 158  */
 159
 160 space                   [ \t\n\r\f]
 161 horiz_space             [ \t\f]
 162 newline                 [\n\r]
 163 non_newline             [^\n\r]
 164
 165 comment                 ("--"{non_newline}*)
 166
 167 whitespace              ({space}+|{comment})
 168
 169 /*
 170  * SQL requires at least one newline in the whitespace separating
 171  * string literals that are to be concatenated.  Silly, but who are we
 172  * to argue?  Note that {whitespace_with_newline} should not have * after
 173  * it, whereas {whitespace} should generally have a * after it...
 174  */
 175
 176 special_whitespace              ({space}+|{comment}{newline})
 177 horiz_whitespace                ({horiz_space}|{comment})
 178 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
 179
 180 /*
 181  * To ensure that {quotecontinue} can be scanned without having to back up
 182  * if the full pattern isn't matched, we include trailing whitespace in
 183  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
 184  * except for {quote} followed by whitespace and just one "-" (not two,
 185  * which would start a {comment}).  To cover that we have {quotefail}.
 186  * The actions for {quotestop} and {quotefail} must throw back characters
 187  * beyond the quote proper.
 188  */
 189 quote                   '
 190 quotestop               {quote}{whitespace}*
 191 quotecontinue   {quote}{whitespace_with_newline}{quote}
 192 quotefail               {quote}{whitespace}*"-"
 193
 194 /* Bit string
 195  * It is tempting to scan the string for only those characters
 196  * which are allowed. However, this leads to silently swallowed
 197  * characters if illegal characters are included in the string.
 198  * For example, if xbinside is [01] then B'ABCD' is interpreted
 199  * as a zero-length string, and the ABCD' is lost!
 200  * Better to pass the string forward and let the input routines
 201  * validate the contents.
 202  */
 203 xbstart                 [bB]{quote}
 204 xbinside                [^']*
 205
 206 /* Hexadecimal number */
 207 xhstart                 [xX]{quote}
 208 xhinside                [^']*
 209
 210 /* National character */
 211 xnstart                 [nN]{quote}
 212
 213 /* Quoted string that allows backslash escapes */
 214 xestart                 [eE]{quote}
 215 xeinside                [^\\']+
 216 xeescape                [\\][^0-7]
 217 xeoctesc                [\\][0-7]{1,3}
 218 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
 219 xeunicode               [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
 220 xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
 221
 222 /* Extended quote
 223  * xqdouble implements embedded quote, ''''
 224  */
 225 xqstart                 {quote}
 226 xqdouble                {quote}{quote}
 227 xqinside                [^']+
 228
 229 /* $foo$ style quotes ("dollar quoting")
 230  * The quoted string starts with $foo$ where "foo" is an optional string
 231  * in the form of an identifier, except that it may not contain "$",
 232  * and extends to the first occurrence of an identical string.
 233  * There is *no* processing of the quoted text.
 234  *
 235  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 236  * fails to match its trailing "$".
 237  */
 238 dolq_start              [A-Za-z\200-\377_]
 239 dolq_cont               [A-Za-z\200-\377_0-9]
 240 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
 241 dolqfailed              \${dolq_start}{dolq_cont}*
 242 dolqinside              [^$]+
 243
 244 /* Double quote
 245  * Allows embedded spaces and other special characters into identifiers.
 246  */
 247 dquote                  \"
 248 xdstart                 {dquote}
 249 xdstop                  {dquote}
 250 xddouble                {dquote}{dquote}
 251 xdinside                [^"]+
 252
 253 /* Unicode escapes */
 254 uescape                 [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
 255 /* error rule to avoid backup */
 256 uescapefail             [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
 257
 258 /* Quoted identifier with Unicode escapes */
 259 xuistart                [uU]&{dquote}
 260
 261 /* Quoted string with Unicode escapes */
 262 xusstart                [uU]&{quote}
 263
 264 /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
 265 xustop1         {uescapefail}?
 266 xustop2         {uescape}
 267
 268 /* error rule to avoid backup */
 269 xufailed                [uU]&
 270
 271
 272 /* C-style comments
 273  *
 274  * The "extended comment" syntax closely resembles allowable operator syntax.
 275  * The tricky part here is to get lex to recognize a string starting with
 276  * slash-star as a comment, when interpreting it as an operator would produce
 277  * a longer match --- remember lex will prefer a longer match!  Also, if we
 278  * have something like plus-slash-star, lex will think this is a 3-character
 279  * operator whereas we want to see it as a + operator and a comment start.
 280  * The solution is two-fold:
 281  * 1. append {op_chars}* to xcstart so that it matches as much text as
 282  *    {operator} would. Then the tie-breaker (first matching rule of same
 283  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 284  *    in case it contains a star-slash that should terminate the comment.
 285  * 2. In the operator rule, check for slash-star within the operator, and
 286  *    if found throw it back with yyless().  This handles the plus-slash-star
 287  *    problem.
 288  * Dash-dash comments have similar interactions with the operator rule.
 289  */
 290 xcstart                 \/\*{op_chars}*
 291 xcstop                  \*+\/
 292 xcinside                [^*/]+
 293
 294 digit                   [0-9]
 295 ident_start             [A-Za-z\200-\377_]
 296 ident_cont              [A-Za-z\200-\377_0-9\$]
 297
 298 identifier              {ident_start}{ident_cont}*
 299
 300 /* Assorted special-case operators and operator-like tokens */
 301 typecast                "::"
 302 dot_dot                 \.\.
 303 colon_equals    ":="
 304
 305 /*
 306  * These operator-like tokens (unlike the above ones) also match the {operator}
 307  * rule, which means that they might be overridden by a longer match if they
 308  * are followed by a comment start or a + or - character. Accordingly, if you
 309  * add to this list, you must also add corresponding code to the {operator}
 310  * block to return the correct token in such cases. (This is not needed in
 311  * psqlscan.l since the token value is ignored there.)
 312  */
 313 equals_greater  "=>"
 314 less_equals             "<="
 315 greater_equals  ">="
 316 less_greater    "<>"
 317 not_equals              "!="
 318
 319 /*
 320  * "self" is the set of chars that should be returned as single-character
 321  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 322  * which can be one or more characters long (but if a single-char token
 323  * appears in the "self" set, it is not to be returned as an Op).  Note
 324  * that the sets overlap, but each has some chars that are not in the other.
 325  *
 326  * If you change either set, adjust the character lists appearing in the
 327  * rule for "operator"!
 328  */
 329 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 330 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 331 operator                {op_chars}+
 332
 333 /* we no longer allow unary minus in numbers.
 334  * instead we pass it separately to parser. there it gets
 335  * coerced via doNegate() -- Leon aug 20 1999
 336  *
 337  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
 338  *
 339  * {realfail1} and {realfail2} are added to prevent the need for scanner
 340  * backup when the {real} rule fails to match completely.
 341  */
 342
 343 integer                 {digit}+
 344 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
 345 decimalfail             {digit}+\.\.
 346 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
 347 realfail1               ({integer}|{decimal})[Ee]
 348 realfail2               ({integer}|{decimal})[Ee][-+]
 349
 350 param                   \${integer}
 351
 352 /* psql-specific: characters allowed in variable names */
 353 variable_char   [A-Za-z\200-\377_0-9]
 354
 355 other                   .
 356
 357 /*
 358  * Dollar quoted strings are totally opaque, and no escaping is done on them.
 359  * Other quoted strings must allow some special characters such as single-quote
 360  *  and newline.
 361  * Embedded single-quotes are implemented both in the SQL standard
 362  *  style of two adjacent single quotes "''" and in the Postgres/Java style
 363  *  of escaped-quote "\'".
 364  * Other embedded escaped characters are matched explicitly and the leading
 365  *  backslash is dropped from the string.
 366  * Note that xcstart must appear before operator, as explained above!
 367  *  Also whitespace (comment) must appear before operator.
 368  */
 369
 370 %%
 371
 372 %{
 373                 /* Declare some local variables inside yylex(), for convenience */
 374                 PsqlScanState cur_state = yyextra;
 375                 PQExpBuffer output_buf = cur_state->output_buf;
 376
 377                 /*
 378                  * Force flex into the state indicated by start_state.  This has a
 379                  * couple of purposes: it lets some of the functions below set a new
 380                  * starting state without ugly direct access to flex variables, and it
 381                  * allows us to transition from one flex lexer to another so that we
 382                  * can lex different parts of the source string using separate lexers.
 383                  */
 384                 BEGIN(cur_state->start_state);
 385 %}
 386
 387 {whitespace}    {
 388                                         /*
 389                                          * Note that the whitespace rule includes both true
 390                                          * whitespace and single-line ("--" style) comments.
 391                                          * We suppress whitespace at the start of the query
 392                                          * buffer.  We also suppress all single-line comments,
 393                                          * which is pretty dubious but is the historical
 394                                          * behavior.
 395                                          */
 396                                         if (!(output_buf->len == 0 || yytext[0] == '-'))
 397                                                 ECHO;
 398                                 }
 399
 400 {xcstart}               {
 401                                         cur_state->xcdepth = 0;
 402                                         BEGIN(xc);
 403                                         /* Put back any characters past slash-star; see above */
 404                                         yyless(2);
 405                                         ECHO;
 406                                 }
 407
 408 <xc>{
 409 {xcstart}               {
 410                                         cur_state->xcdepth++;
 411                                         /* Put back any characters past slash-star; see above */
 412                                         yyless(2);
 413                                         ECHO;
 414                                 }
 415
 416 {xcstop}                {
 417                                         if (cur_state->xcdepth <= 0)
 418                                                 BEGIN(INITIAL);
 419                                         else
 420                                                 cur_state->xcdepth--;
 421                                         ECHO;
 422                                 }
 423
 424 {xcinside}              {
 425                                         ECHO;
 426                                 }
 427
 428 {op_chars}              {
 429                                         ECHO;
 430                                 }
 431
 432 \*+                             {
 433                                         ECHO;
 434                                 }
 435 } /* <xc> */
 436
 437 {xbstart}               {
 438                                         BEGIN(xb);
 439                                         ECHO;
 440                                 }
 441 <xb>{quotestop} |
 442 <xb>{quotefail} {
 443                                         yyless(1);
 444                                         BEGIN(INITIAL);
 445                                         ECHO;
 446                                 }
 447 <xh>{xhinside}  |
 448 <xb>{xbinside}  {
 449                                         ECHO;
 450                                 }
 451 <xh>{quotecontinue}     |
 452 <xb>{quotecontinue}     {
 453                                         ECHO;
 454                                 }
 455
 456 {xhstart}               {
 457                                         /* Hexadecimal bit type.
 458                                          * At some point we should simply pass the string
 459                                          * forward to the parser and label it there.
 460                                          * In the meantime, place a leading "x" on the string
 461                                          * to mark it for the input routine as a hex string.
 462                                          */
 463                                         BEGIN(xh);
 464                                         ECHO;
 465                                 }
 466 <xh>{quotestop} |
 467 <xh>{quotefail} {
 468                                         yyless(1);
 469                                         BEGIN(INITIAL);
 470                                         ECHO;
 471                                 }
 472
 473 {xnstart}               {
 474                                         yyless(1);      /* eat only 'n' this time */
 475                                         ECHO;
 476                                 }
 477
 478 {xqstart}               {
 479                                         if (cur_state->std_strings)
 480                                                 BEGIN(xq);
 481                                         else
 482                                                 BEGIN(xe);
 483                                         ECHO;
 484                                 }
 485 {xestart}               {
 486                                         BEGIN(xe);
 487                                         ECHO;
 488                                 }
 489 {xusstart}              {
 490                                         BEGIN(xus);
 491                                         ECHO;
 492                                 }
 493 <xq,xe>{quotestop}      |
 494 <xq,xe>{quotefail} {
 495                                         yyless(1);
 496                                         BEGIN(INITIAL);
 497                                         ECHO;
 498                                 }
 499 <xus>{quotestop} |
 500 <xus>{quotefail} {
 501                                         /* throw back all but the quote */
 502                                         yyless(1);
 503                                         BEGIN(xusend);
 504                                         ECHO;
 505                                 }
 506 <xusend>{whitespace} {
 507                                         ECHO;
 508                                 }
 509 <xusend>{other} |
 510 <xusend>{xustop1} {
 511                                         yyless(0);
 512                                         BEGIN(INITIAL);
 513                                         ECHO;
 514                                 }
 515 <xusend>{xustop2} {
 516                                         BEGIN(INITIAL);
 517                                         ECHO;
 518                                 }
 519 <xq,xe,xus>{xqdouble} {
 520                                         ECHO;
 521                                 }
 522 <xq,xus>{xqinside}  {
 523                                         ECHO;
 524                                 }
 525 <xe>{xeinside}  {
 526                                         ECHO;
 527                                 }
 528 <xe>{xeunicode} {
 529                                         ECHO;
 530                                 }
 531 <xe>{xeunicodefail}     {
 532                                         ECHO;
 533                                 }
 534 <xe>{xeescape}  {
 535                                         ECHO;
 536                                 }
 537 <xe>{xeoctesc}  {
 538                                         ECHO;
 539                                 }
 540 <xe>{xehexesc}  {
 541                                         ECHO;
 542                                 }
 543 <xq,xe,xus>{quotecontinue} {
 544                                         ECHO;
 545                                 }
 546 <xe>.                   {
 547                                         /* This is only needed for \ just before EOF */
 548                                         ECHO;
 549                                 }
 550
 551 {dolqdelim}             {
 552                                         cur_state->dolqstart = pg_strdup(yytext);
 553                                         BEGIN(xdolq);
 554                                         ECHO;
 555                                 }
 556 {dolqfailed}    {
 557                                         /* throw back all but the initial "$" */
 558                                         yyless(1);
 559                                         ECHO;
 560                                 }
 561 <xdolq>{dolqdelim} {
 562                                         if (strcmp(yytext, cur_state->dolqstart) == 0)
 563                                         {
 564                                                 free(cur_state->dolqstart);
 565                                                 cur_state->dolqstart = NULL;
 566                                                 BEGIN(INITIAL);
 567                                         }
 568                                         else
 569                                         {
 570                                                 /*
 571                                                  * When we fail to match $...$ to dolqstart, transfer
 572                                                  * the $... part to the output, but put back the final
 573                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
 574                                                  */
 575                                                 yyless(yyleng - 1);
 576                                         }
 577                                         ECHO;
 578                                 }
 579 <xdolq>{dolqinside} {
 580                                         ECHO;
 581                                 }
 582 <xdolq>{dolqfailed} {
 583                                         ECHO;
 584                                 }
 585 <xdolq>.                {
 586                                         /* This is only needed for $ inside the quoted text */
 587                                         ECHO;
 588                                 }
 589
 590 {xdstart}               {
 591                                         BEGIN(xd);
 592                                         ECHO;
 593                                 }
 594 {xuistart}              {
 595                                         BEGIN(xui);
 596                                         ECHO;
 597                                 }
 598 <xd>{xdstop}    {
 599                                         BEGIN(INITIAL);
 600                                         ECHO;
 601                                 }
 602 <xui>{dquote} {
 603                                         yyless(1);
 604                                         BEGIN(xuiend);
 605                                         ECHO;
 606                                 }
 607 <xuiend>{whitespace} {
 608                                         ECHO;
 609                                 }
 610 <xuiend>{other} |
 611 <xuiend>{xustop1} {
 612                                         yyless(0);
 613                                         BEGIN(INITIAL);
 614                                         ECHO;
 615                                 }
 616 <xuiend>{xustop2}       {
 617                                         BEGIN(INITIAL);
 618                                         ECHO;
 619                                 }
 620 <xd,xui>{xddouble}      {
 621                                         ECHO;
 622                                 }
 623 <xd,xui>{xdinside}      {
 624                                         ECHO;
 625                                 }
 626
 627 {xufailed}      {
 628                                         /* throw back all but the initial u/U */
 629                                         yyless(1);
 630                                         ECHO;
 631                                 }
 632
 633 {typecast}              {
 634                                         ECHO;
 635                                 }
 636
 637 {dot_dot}               {
 638                                         ECHO;
 639                                 }
 640
 641 {colon_equals}  {
 642                                         ECHO;
 643                                 }
 644
 645 {equals_greater} {
 646                                         ECHO;
 647                                 }
 648
 649 {less_equals}   {
 650                                         ECHO;
 651                                 }
 652
 653 {greater_equals} {
 654                                         ECHO;
 655                                 }
 656
 657 {less_greater}  {
 658                                         ECHO;
 659                                 }
 660
 661 {not_equals}    {
 662                                         ECHO;
 663                                 }
 664
 665         /*
 666          * These rules are specific to psql --- they implement parenthesis
 667          * counting and detection of command-ending semicolon.  These must
 668          * appear before the {self} rule so that they take precedence over it.
 669          */
 670
 671 "("                             {
 672                                         cur_state->paren_depth++;
 673                                         ECHO;
 674                                 }
 675
 676 ")"                             {
 677                                         if (cur_state->paren_depth > 0)
 678                                                 cur_state->paren_depth--;
 679                                         ECHO;
 680                                 }
 681
 682 ";"                             {
 683                                         ECHO;
 684                                         if (cur_state->paren_depth == 0)
 685                                         {
 686                                                 /* Terminate lexing temporarily */
 687                                                 cur_state->start_state = YY_START;
 688                                                 return LEXRES_SEMI;
 689                                         }
 690                                 }
 691
 692         /*
 693          * psql-specific rules to handle backslash commands and variable
 694          * substitution.  We want these before {self}, also.
 695          */
 696
 697 "\\"[;:]                {
 698                                         /* Force a semi-colon or colon into the query buffer */
 699                                         psqlscan_emit(cur_state, yytext + 1, 1);
 700                                 }
 701
 702 "\\"                    {
 703                                         /* Terminate lexing temporarily */
 704                                         cur_state->start_state = YY_START;
 705                                         return LEXRES_BACKSLASH;
 706                                 }
 707
 708 :{variable_char}+       {
 709                                         /* Possible psql variable substitution */
 710                                         char       *varname;
 711                                         char       *value;
 712
 713                                         varname = psqlscan_extract_substring(cur_state,
 714                                                                                                                  yytext + 1,
 715                                                                                                                  yyleng - 1);
 716                                         if (cur_state->callbacks->get_variable)
 717                                                 value = cur_state->callbacks->get_variable(varname,
 718                                                                                                                                    PQUOTE_PLAIN,
 719                                                                                                                                    cur_state->cb_passthrough);
 720                                         else
 721                                                 value = NULL;
 722
 723                                         if (value)
 724                                         {
 725                                                 /* It is a variable, check for recursion */
 726                                                 if (psqlscan_var_is_current_source(cur_state, varname))
 727                                                 {
 728                                                         /* Recursive expansion --- don't go there */
 729                                                         pg_log_warning("skipping recursive expansion of variable \"%s\"",
 730                                                                                                                           varname);
 731                                                         /* Instead copy the string as is */
 732                                                         ECHO;
 733                                                 }
 734                                                 else
 735                                                 {
 736                                                         /* OK, perform substitution */
 737                                                         psqlscan_push_new_buffer(cur_state, value, varname);
 738                                                         /* yy_scan_string already made buffer active */
 739                                                 }
 740                                                 free(value);
 741                                         }
 742                                         else
 743                                         {
 744                                                 /*
 745                                                  * if the variable doesn't exist we'll copy the string
 746                                                  * as is
 747                                                  */
 748                                                 ECHO;
 749                                         }
 750
 751                                         free(varname);
 752                                 }
 753
 754 :'{variable_char}+'     {
 755                                         psqlscan_escape_variable(cur_state, yytext, yyleng,
 756                                                                                          PQUOTE_SQL_LITERAL);
 757                                 }
 758
 759 :\"{variable_char}+\"   {
 760                                         psqlscan_escape_variable(cur_state, yytext, yyleng,
 761                                                                                          PQUOTE_SQL_IDENT);
 762                                 }
 763
 764 :\{\?{variable_char}+\} {
 765                                         psqlscan_test_variable(cur_state, yytext, yyleng);
 766                                 }
 767
 768         /*
 769          * These rules just avoid the need for scanner backup if one of the
 770          * three rules above fails to match completely.
 771          */
 772
 773 :'{variable_char}*      {
 774                                         /* Throw back everything but the colon */
 775                                         yyless(1);
 776                                         ECHO;
 777                                 }
 778
 779 :\"{variable_char}*     {
 780                                         /* Throw back everything but the colon */
 781                                         yyless(1);
 782                                         ECHO;
 783                                 }
 784
 785 :\{\?{variable_char}*   {
 786                                         /* Throw back everything but the colon */
 787                                         yyless(1);
 788                                         ECHO;
 789                                 }
 790 :\{     {
 791                                         /* Throw back everything but the colon */
 792                                         yyless(1);
 793                                         ECHO;
 794                                 }
 795
 796         /*
 797          * Back to backend-compatible rules.
 798          */
 799
 800 {self}                  {
 801                                         ECHO;
 802                                 }
 803
 804 {operator}              {
 805                                         /*
 806                                          * Check for embedded slash-star or dash-dash; those
 807                                          * are comment starts, so operator must stop there.
 808                                          * Note that slash-star or dash-dash at the first
 809                                          * character will match a prior rule, not this one.
 810                                          */
 811                                         int                     nchars = yyleng;
 812                                         char       *slashstar = strstr(yytext, "/*");
 813                                         char       *dashdash = strstr(yytext, "--");
 814
 815                                         if (slashstar && dashdash)
 816                                         {
 817                                                 /* if both appear, take the first one */
 818                                                 if (slashstar > dashdash)
 819                                                         slashstar = dashdash;
 820                                         }
 821                                         else if (!slashstar)
 822                                                 slashstar = dashdash;
 823                                         if (slashstar)
 824                                                 nchars = slashstar - yytext;
 825
 826                                         /*
 827                                          * For SQL compatibility, '+' and '-' cannot be the
 828                                          * last char of a multi-char operator unless the operator
 829                                          * contains chars that are not in SQL operators.
 830                                          * The idea is to lex '=-' as two operators, but not
 831                                          * to forbid operator names like '?-' that could not be
 832                                          * sequences of SQL operators.
 833                                          */
 834                                         if (nchars > 1 &&
 835                                                 (yytext[nchars - 1] == '+' ||
 836                                                  yytext[nchars - 1] == '-'))
 837                                         {
 838                                                 int                     ic;
 839
 840                                                 for (ic = nchars - 2; ic >= 0; ic--)
 841                                                 {
 842                                                         char c = yytext[ic];
 843                                                         if (c == '~' || c == '!' || c == '@' ||
 844                                                                 c == '#' || c == '^' || c == '&' ||
 845                                                                 c == '|' || c == '`' || c == '?' ||
 846                                                                 c == '%')
 847                                                                 break;
 848                                                 }
 849                                                 if (ic < 0)
 850                                                 {
 851                                                         /*
 852                                                          * didn't find a qualifying character, so remove
 853                                                          * all trailing [+-]
 854                                                          */
 855                                                         do {
 856                                                                 nchars--;
 857                                                         } while (nchars > 1 &&
 858                                                                  (yytext[nchars - 1] == '+' ||
 859                                                                   yytext[nchars - 1] == '-'));
 860                                                 }
 861                                         }
 862
 863                                         if (nchars < yyleng)
 864                                         {
 865                                                 /* Strip the unwanted chars from the token */
 866                                                 yyless(nchars);
 867                                         }
 868                                         ECHO;
 869                                 }
 870
 871 {param}                 {
 872                                         ECHO;
 873                                 }
 874
 875 {integer}               {
 876                                         ECHO;
 877                                 }
 878 {decimal}               {
 879                                         ECHO;
 880                                 }
 881 {decimalfail}   {
 882                                         /* throw back the .., and treat as integer */
 883                                         yyless(yyleng - 2);
 884                                         ECHO;
 885                                 }
 886 {real}                  {
 887                                         ECHO;
 888                                 }
 889 {realfail1}             {
 890                                         /*
 891                                          * throw back the [Ee], and figure out whether what
 892                                          * remains is an {integer} or {decimal}.
 893                                          * (in psql, we don't actually care...)
 894                                          */
 895                                         yyless(yyleng - 1);
 896                                         ECHO;
 897                                 }
 898 {realfail2}             {
 899                                         /* throw back the [Ee][+-], and proceed as above */
 900                                         yyless(yyleng - 2);
 901                                         ECHO;
 902                                 }
 903
 904
 905 {identifier}    {
 906                                         ECHO;
 907                                 }
 908
 909 {other}                 {
 910                                         ECHO;
 911                                 }
 912
 913 <<EOF>>                 {
 914                                         if (cur_state->buffer_stack == NULL)
 915                                         {
 916                                                 cur_state->start_state = YY_START;
 917                                                 return LEXRES_EOL;              /* end of input reached */
 918                                         }
 919
 920                                         /*
 921                                          * We were expanding a variable, so pop the inclusion
 922                                          * stack and keep lexing
 923                                          */
 924                                         psqlscan_pop_buffer_stack(cur_state);
 925                                         psqlscan_select_top_buffer(cur_state);
 926                                 }
 927
 928 %%
 929
 930 /* LCOV_EXCL_STOP */
 931
 932 /*
 933  * Create a lexer working state struct.
 934  *
 935  * callbacks is a struct of function pointers that encapsulate some
 936  * behavior we need from the surrounding program.  This struct must
 937  * remain valid for the lifespan of the PsqlScanState.
 938  */
 939 PsqlScanState
 940 psql_scan_create(const PsqlScanCallbacks *callbacks)
 941 {
 942         PsqlScanState state;
 943
 944         state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
 945
 946         state->callbacks = callbacks;
 947
 948         yylex_init(&state->scanner);
 949
 950         yyset_extra(state, state->scanner);
 951
 952         psql_scan_reset(state);
 953
 954         return state;
 955 }
 956
 957 /*
 958  * Destroy a lexer working state struct, releasing all resources.
 959  */
 960 void
 961 psql_scan_destroy(PsqlScanState state)
 962 {
 963         psql_scan_finish(state);
 964
 965         psql_scan_reset(state);
 966
 967         yylex_destroy(state->scanner);
 968
 969         free(state);
 970 }
 971
 972 /*
 973  * Set the callback passthrough pointer for the lexer.
 974  *
 975  * This could have been integrated into psql_scan_create, but keeping it
 976  * separate allows the application to change the pointer later, which might
 977  * be useful.
 978  */
 979 void
 980 psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
 981 {
 982         state->cb_passthrough = passthrough;
 983 }
 984
 985 /*
 986  * Set up to perform lexing of the given input line.
 987  *
 988  * The text at *line, extending for line_len bytes, will be scanned by
 989  * subsequent calls to the psql_scan routines.  psql_scan_finish should
 990  * be called when scanning is complete.  Note that the lexer retains
 991  * a pointer to the storage at *line --- this string must not be altered
 992  * or freed until after psql_scan_finish is called.
 993  *
 994  * encoding is the libpq identifier for the character encoding in use,
 995  * and std_strings says whether standard_conforming_strings is on.
 996  */
 997 void
 998 psql_scan_setup(PsqlScanState state,
 999                                 const char *line, int line_len,
1000                                 int encoding, bool std_strings)
1001 {
1002         /* Mustn't be scanning already */
1003         Assert(state->scanbufhandle == NULL);
1004         Assert(state->buffer_stack == NULL);
1005
1006         /* Do we need to hack the character set encoding? */
1007         state->encoding = encoding;
1008         state->safe_encoding = pg_valid_server_encoding_id(encoding);
1009
1010         /* Save standard-strings flag as well */
1011         state->std_strings = std_strings;
1012
1013         /* Set up flex input buffer with appropriate translation and padding */
1014         state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
1015                                                                                                    &state->scanbuf);
1016         state->scanline = line;
1017
1018         /* Set lookaside data in case we have to map unsafe encoding */
1019         state->curline = state->scanbuf;
1020         state->refline = state->scanline;
1021 }
1022
1023 /*
1024  * Do lexical analysis of SQL command text.
1025  *
1026  * The text previously passed to psql_scan_setup is scanned, and appended
1027  * (possibly with transformation) to query_buf.
1028  *
1029  * The return value indicates the condition that stopped scanning:
1030  *
1031  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
1032  * transferred to query_buf.)  The command accumulated in query_buf should
1033  * be executed, then clear query_buf and call again to scan the remainder
1034  * of the line.
1035  *
1036  * PSCAN_BACKSLASH: found a backslash that starts a special command.
1037  * Any previous data on the line has been transferred to query_buf.
1038  * The caller will typically next apply a separate flex lexer to scan
1039  * the special command.
1040  *
1041  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1042  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
1043  *
1044  * PSCAN_EOL: the end of the line was reached, and there is no lexical
1045  * reason to consider the command incomplete.  The caller may or may not
1046  * choose to send it.  *prompt is set to the appropriate prompt type if
1047  * the caller chooses to collect more input.
1048  *
1049  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1050  * be called next, then the cycle may be repeated with a fresh input line.
1051  *
1052  * In all cases, *prompt is set to an appropriate prompt type code for the
1053  * next line-input operation.
1054  */
1055 PsqlScanResult
1056 psql_scan(PsqlScanState state,
1057                   PQExpBuffer query_buf,
1058                   promptStatus_t *prompt)
1059 {
1060         PsqlScanResult result;
1061         int                     lexresult;
1062
1063         /* Must be scanning already */
1064         Assert(state->scanbufhandle != NULL);
1065
1066         /* Set current output target */
1067         state->output_buf = query_buf;
1068
1069         /* Set input source */
1070         if (state->buffer_stack != NULL)
1071                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1072         else
1073                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1074
1075         /* And lex. */
1076         lexresult = yylex(NULL, state->scanner);
1077
1078         /*
1079          * Check termination state and return appropriate result info.
1080          */
1081         switch (lexresult)
1082         {
1083                 case LEXRES_EOL:                /* end of input */
1084                         switch (state->start_state)
1085                         {
1086                                 case INITIAL:
1087                                 case xuiend:    /* we treat these like INITIAL */
1088                                 case xusend:
1089                                         if (state->paren_depth > 0)
1090                                         {
1091                                                 result = PSCAN_INCOMPLETE;
1092                                                 *prompt = PROMPT_PAREN;
1093                                         }
1094                                         else if (query_buf->len > 0)
1095                                         {
1096                                                 result = PSCAN_EOL;
1097                                                 *prompt = PROMPT_CONTINUE;
1098                                         }
1099                                         else
1100                                         {
1101                                                 /* never bother to send an empty buffer */
1102                                                 result = PSCAN_INCOMPLETE;
1103                                                 *prompt = PROMPT_READY;
1104                                         }
1105                                         break;
1106                                 case xb:
1107                                         result = PSCAN_INCOMPLETE;
1108                                         *prompt = PROMPT_SINGLEQUOTE;
1109                                         break;
1110                                 case xc:
1111                                         result = PSCAN_INCOMPLETE;
1112                                         *prompt = PROMPT_COMMENT;
1113                                         break;
1114                                 case xd:
1115                                         result = PSCAN_INCOMPLETE;
1116                                         *prompt = PROMPT_DOUBLEQUOTE;
1117                                         break;
1118                                 case xh:
1119                                         result = PSCAN_INCOMPLETE;
1120                                         *prompt = PROMPT_SINGLEQUOTE;
1121                                         break;
1122                                 case xe:
1123                                         result = PSCAN_INCOMPLETE;
1124                                         *prompt = PROMPT_SINGLEQUOTE;
1125                                         break;
1126                                 case xq:
1127                                         result = PSCAN_INCOMPLETE;
1128                                         *prompt = PROMPT_SINGLEQUOTE;
1129                                         break;
1130                                 case xdolq:
1131                                         result = PSCAN_INCOMPLETE;
1132                                         *prompt = PROMPT_DOLLARQUOTE;
1133                                         break;
1134                                 case xui:
1135                                         result = PSCAN_INCOMPLETE;
1136                                         *prompt = PROMPT_DOUBLEQUOTE;
1137                                         break;
1138                                 case xus:
1139                                         result = PSCAN_INCOMPLETE;
1140                                         *prompt = PROMPT_SINGLEQUOTE;
1141                                         break;
1142                                 default:
1143                                         /* can't get here */
1144                                         fprintf(stderr, "invalid YY_START\n");
1145                                         exit(1);
1146                         }
1147                         break;
1148                 case LEXRES_SEMI:               /* semicolon */
1149                         result = PSCAN_SEMICOLON;
1150                         *prompt = PROMPT_READY;
1151                         break;
1152                 case LEXRES_BACKSLASH:  /* backslash */
1153                         result = PSCAN_BACKSLASH;
1154                         *prompt = PROMPT_READY;
1155                         break;
1156                 default:
1157                         /* can't get here */
1158                         fprintf(stderr, "invalid yylex result\n");
1159                         exit(1);
1160         }
1161
1162         return result;
1163 }
1164
1165 /*
1166  * Clean up after scanning a string.  This flushes any unread input and
1167  * releases resources (but not the PsqlScanState itself).  Note however
1168  * that this does not reset the lexer scan state; that can be done by
1169  * psql_scan_reset(), which is an orthogonal operation.
1170  *
1171  * It is legal to call this when not scanning anything (makes it easier
1172  * to deal with error recovery).
1173  */
1174 void
1175 psql_scan_finish(PsqlScanState state)
1176 {
1177         /* Drop any incomplete variable expansions. */
1178         while (state->buffer_stack != NULL)
1179                 psqlscan_pop_buffer_stack(state);
1180
1181         /* Done with the outer scan buffer, too */
1182         if (state->scanbufhandle)
1183                 yy_delete_buffer(state->scanbufhandle, state->scanner);
1184         state->scanbufhandle = NULL;
1185         if (state->scanbuf)
1186                 free(state->scanbuf);
1187         state->scanbuf = NULL;
1188 }
1189
1190 /*
1191  * Reset lexer scanning state to start conditions.  This is appropriate
1192  * for executing \r psql commands (or any other time that we discard the
1193  * prior contents of query_buf).  It is not, however, necessary to do this
1194  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1195  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1196  * conditions are returned.
1197  *
1198  * Note that this is unrelated to flushing unread input; that task is
1199  * done by psql_scan_finish().
1200  */
1201 void
1202 psql_scan_reset(PsqlScanState state)
1203 {
1204         state->start_state = INITIAL;
1205         state->paren_depth = 0;
1206         state->xcdepth = 0;                     /* not really necessary */
1207         if (state->dolqstart)
1208                 free(state->dolqstart);
1209         state->dolqstart = NULL;
1210 }
1211
1212 /*
1213  * Reselect this lexer (psqlscan.l) after using another one.
1214  *
1215  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1216  * state, because we'd never switch to another lexer in a different state.
1217  * However, we don't want to reset e.g. paren_depth, so this can't be
1218  * the same as psql_scan_reset().
1219  *
1220  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1221  * must be a superset of this.
1222  *
1223  * Note: it seems likely that other lexers could just assign INITIAL for
1224  * themselves, since that probably has the value zero in every flex-generated
1225  * lexer.  But let's not assume that.
1226  */
1227 void
1228 psql_scan_reselect_sql_lexer(PsqlScanState state)
1229 {
1230         state->start_state = INITIAL;
1231 }
1232
1233 /*
1234  * Return true if lexer is currently in an "inside quotes" state.
1235  *
1236  * This is pretty grotty but is needed to preserve the old behavior
1237  * that mainloop.c drops blank lines not inside quotes without even
1238  * echoing them.
1239  */
1240 bool
1241 psql_scan_in_quote(PsqlScanState state)
1242 {
1243         return state->start_state != INITIAL;
1244 }
1245
1246 /*
1247  * Push the given string onto the stack of stuff to scan.
1248  *
1249  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1250  */
1251 void
1252 psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1253                                                  const char *varname)
1254 {
1255         StackElem  *stackelem;
1256
1257         stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1258
1259         /*
1260          * In current usage, the passed varname points at the current flex input
1261          * buffer; we must copy it before calling psqlscan_prepare_buffer()
1262          * because that will change the buffer state.
1263          */
1264         stackelem->varname = varname ? pg_strdup(varname) : NULL;
1265
1266         stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1267                                                                                          &stackelem->bufstring);
1268         state->curline = stackelem->bufstring;
1269         if (state->safe_encoding)
1270         {
1271                 stackelem->origstring = NULL;
1272                 state->refline = stackelem->bufstring;
1273         }
1274         else
1275         {
1276                 stackelem->origstring = pg_strdup(newstr);
1277                 state->refline = stackelem->origstring;
1278         }
1279         stackelem->next = state->buffer_stack;
1280         state->buffer_stack = stackelem;
1281 }
1282
1283 /*
1284  * Pop the topmost buffer stack item (there must be one!)
1285  *
1286  * NB: after this, the flex input state is unspecified; caller must
1287  * switch to an appropriate buffer to continue lexing.
1288  * See psqlscan_select_top_buffer().
1289  */
1290 void
1291 psqlscan_pop_buffer_stack(PsqlScanState state)
1292 {
1293         StackElem  *stackelem = state->buffer_stack;
1294
1295         state->buffer_stack = stackelem->next;
1296         yy_delete_buffer(stackelem->buf, state->scanner);
1297         free(stackelem->bufstring);
1298         if (stackelem->origstring)
1299                 free(stackelem->origstring);
1300         if (stackelem->varname)
1301                 free(stackelem->varname);
1302         free(stackelem);
1303 }
1304
1305 /*
1306  * Select the topmost surviving buffer as the active input.
1307  */
1308 void
1309 psqlscan_select_top_buffer(PsqlScanState state)
1310 {
1311         StackElem  *stackelem = state->buffer_stack;
1312
1313         if (stackelem != NULL)
1314         {
1315                 yy_switch_to_buffer(stackelem->buf, state->scanner);
1316                 state->curline = stackelem->bufstring;
1317                 state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1318         }
1319         else
1320         {
1321                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1322                 state->curline = state->scanbuf;
1323                 state->refline = state->scanline;
1324         }
1325 }
1326
1327 /*
1328  * Check if specified variable name is the source for any string
1329  * currently being scanned
1330  */
1331 bool
1332 psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1333 {
1334         StackElem  *stackelem;
1335
1336         for (stackelem = state->buffer_stack;
1337                  stackelem != NULL;
1338                  stackelem = stackelem->next)
1339         {
1340                 if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1341                         return true;
1342         }
1343         return false;
1344 }
1345
1346 /*
1347  * Set up a flex input buffer to scan the given data.  We always make a
1348  * copy of the data.  If working in an unsafe encoding, the copy has
1349  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1350  *
1351  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1352  */
1353 YY_BUFFER_STATE
1354 psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1355                                                 char **txtcopy)
1356 {
1357         char       *newtxt;
1358
1359         /* Flex wants two \0 characters after the actual data */
1360         newtxt = pg_malloc(len + 2);
1361         *txtcopy = newtxt;
1362         newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1363
1364         if (state->safe_encoding)
1365                 memcpy(newtxt, txt, len);
1366         else
1367         {
1368                 /* Gotta do it the hard way */
1369                 int                     i = 0;
1370
1371                 while (i < len)
1372                 {
1373                         int                     thislen = PQmblen(txt + i, state->encoding);
1374
1375                         /* first byte should always be okay... */
1376                         newtxt[i] = txt[i];
1377                         i++;
1378                         while (--thislen > 0 && i < len)
1379                                 newtxt[i++] = (char) 0xFF;
1380                 }
1381         }
1382
1383         return yy_scan_buffer(newtxt, len + 2, state->scanner);
1384 }
1385
1386 /*
1387  * psqlscan_emit() --- body for ECHO macro
1388  *
1389  * NB: this must be used for ALL and ONLY the text copied from the flex
1390  * input data.  If you pass it something that is not part of the yytext
1391  * string, you are making a mistake.  Internally generated text can be
1392  * appended directly to state->output_buf.
1393  */
1394 void
1395 psqlscan_emit(PsqlScanState state, const char *txt, int len)
1396 {
1397         PQExpBuffer output_buf = state->output_buf;
1398
1399         if (state->safe_encoding)
1400                 appendBinaryPQExpBuffer(output_buf, txt, len);
1401         else
1402         {
1403                 /* Gotta do it the hard way */
1404                 const char *reference = state->refline;
1405                 int                     i;
1406
1407                 reference += (txt - state->curline);
1408
1409                 for (i = 0; i < len; i++)
1410                 {
1411                         char            ch = txt[i];
1412
1413                         if (ch == (char) 0xFF)
1414                                 ch = reference[i];
1415                         appendPQExpBufferChar(output_buf, ch);
1416                 }
1417         }
1418 }
1419
1420 /*
1421  * psqlscan_extract_substring --- fetch value of (part of) the current token
1422  *
1423  * This is like psqlscan_emit(), except that the data is returned as a
1424  * malloc'd string rather than being pushed directly to state->output_buf.
1425  */
1426 char *
1427 psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1428 {
1429         char       *result = (char *) pg_malloc(len + 1);
1430
1431         if (state->safe_encoding)
1432                 memcpy(result, txt, len);
1433         else
1434         {
1435                 /* Gotta do it the hard way */
1436                 const char *reference = state->refline;
1437                 int                     i;
1438
1439                 reference += (txt - state->curline);
1440
1441                 for (i = 0; i < len; i++)
1442                 {
1443                         char            ch = txt[i];
1444
1445                         if (ch == (char) 0xFF)
1446                                 ch = reference[i];
1447                         result[i] = ch;
1448                 }
1449         }
1450         result[len] = '\0';
1451         return result;
1452 }
1453
1454 /*
1455  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1456  *
1457  * If the variable name is found, escape its value using the appropriate
1458  * quoting method and emit the value to output_buf.  (Since the result is
1459  * surely quoted, there is never any reason to rescan it.)      If we don't
1460  * find the variable or escaping fails, emit the token as-is.
1461  */
1462 void
1463 psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1464                                                  PsqlScanQuoteType quote)
1465 {
1466         char       *varname;
1467         char       *value;
1468
1469         /* Variable lookup. */
1470         varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1471         if (state->callbacks->get_variable)
1472                 value = state->callbacks->get_variable(varname, quote,
1473                                                                                            state->cb_passthrough);
1474         else
1475                 value = NULL;
1476         free(varname);
1477
1478         if (value)
1479         {
1480                 /* Emit the suitably-escaped value */
1481                 appendPQExpBufferStr(state->output_buf, value);
1482                 free(value);
1483         }
1484         else
1485         {
1486                 /* Emit original token as-is */
1487                 psqlscan_emit(state, txt, len);
1488         }
1489 }
1490
1491 void
1492 psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
1493 {
1494         char    *varname;
1495         char    *value;
1496
1497         varname = psqlscan_extract_substring(state, txt + 3, len - 4);
1498         if (state->callbacks->get_variable)
1499                 value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1500                                                                                            state->cb_passthrough);
1501         else
1502                 value = NULL;
1503         free(varname);
1504
1505         if (value != NULL)
1506         {
1507                 psqlscan_emit(state, "TRUE", 4);
1508                 free(value);
1509         }
1510         else
1511         {
1512                 psqlscan_emit(state, "FALSE", 5);
1513         }
1514 }