Update copyright for 2022
[pgsql.git] / src / include / parser / scanner.h
blob0843481517ee381f375a2432fd63551b99ce6fa9
1 /*-------------------------------------------------------------------------
3 * scanner.h
4 * API for the core scanner (flex machine)
6 * The core scanner is also used by PL/pgSQL, so we provide a public API
7 * for it. However, the rest of the backend is only expected to use the
8 * higher-level API provided by parser.h.
11 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
14 * src/include/parser/scanner.h
16 *-------------------------------------------------------------------------
19 #ifndef SCANNER_H
20 #define SCANNER_H
22 #include "common/keywords.h"
25 * The scanner returns extra data about scanned tokens in this union type.
26 * Note that this is a subset of the fields used in YYSTYPE of the bison
27 * parsers built atop the scanner.
29 typedef union core_YYSTYPE
31 int ival; /* for integer literals */
32 char *str; /* for identifiers and non-integer literals */
33 const char *keyword; /* canonical spelling of keywords */
34 } core_YYSTYPE;
37 * We track token locations in terms of byte offsets from the start of the
38 * source string, not the column number/line number representation that
39 * bison uses by default. Also, to minimize overhead we track only one
40 * location (usually the first token location) for each construct, not
41 * the beginning and ending locations as bison does by default. It's
42 * therefore sufficient to make YYLTYPE an int.
44 #define YYLTYPE int
47 * Another important component of the scanner's API is the token code numbers.
48 * However, those are not defined in this file, because bison insists on
49 * defining them for itself. The token codes used by the core scanner are
50 * the ASCII characters plus these:
51 * %token <str> IDENT UIDENT FCONST SCONST USCONST BCONST XCONST Op
52 * %token <ival> ICONST PARAM
53 * %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
54 * %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
55 * The above token definitions *must* be the first ones declared in any
56 * bison parser built atop this scanner, so that they will have consistent
57 * numbers assigned to them (specifically, IDENT = 258 and so on).
61 * The YY_EXTRA data that a flex scanner allows us to pass around.
62 * Private state needed by the core scanner goes here. Note that the actual
63 * yy_extra struct may be larger and have this as its first component, thus
64 * allowing the calling parser to keep some fields of its own in YY_EXTRA.
66 typedef struct core_yy_extra_type
69 * The string the scanner is physically scanning. We keep this mainly so
70 * that we can cheaply compute the offset of the current token (yytext).
72 char *scanbuf;
73 Size scanbuflen;
76 * The keyword list to use, and the associated grammar token codes.
78 const ScanKeywordList *keywordlist;
79 const uint16 *keyword_tokens;
82 * Scanner settings to use. These are initialized from the corresponding
83 * GUC variables by scanner_init(). Callers can modify them after
84 * scanner_init() if they don't want the scanner's behavior to follow the
85 * prevailing GUC settings.
87 int backslash_quote;
88 bool escape_string_warning;
89 bool standard_conforming_strings;
92 * literalbuf is used to accumulate literal values when multiple rules are
93 * needed to parse a single literal. Call startlit() to reset buffer to
94 * empty, addlit() to add text. NOTE: the string in literalbuf is NOT
95 * necessarily null-terminated, but there always IS room to add a trailing
96 * null at offset literallen. We store a null only when we need it.
98 char *literalbuf; /* palloc'd expandable buffer */
99 int literallen; /* actual current string length */
100 int literalalloc; /* current allocated buffer size */
103 * Random assorted scanner state.
105 int state_before_str_stop; /* start cond. before end quote */
106 int xcdepth; /* depth of nesting in slash-star comments */
107 char *dolqstart; /* current $foo$ quote start string */
108 YYLTYPE save_yylloc; /* one-element stack for PUSH_YYLLOC() */
110 /* first part of UTF16 surrogate pair for Unicode escapes */
111 int32 utf16_first_part;
113 /* state variables for literal-lexing warnings */
114 bool warn_on_first_escape;
115 bool saw_non_ascii;
116 } core_yy_extra_type;
119 * The type of yyscanner is opaque outside scan.l.
121 typedef void *core_yyscan_t;
123 /* Support for scanner_errposition_callback function */
124 typedef struct ScannerCallbackState
126 core_yyscan_t yyscanner;
127 int location;
128 ErrorContextCallback errcallback;
129 } ScannerCallbackState;
132 /* Constant data exported from parser/scan.l */
133 extern PGDLLIMPORT const uint16 ScanKeywordTokens[];
135 /* Entry points in parser/scan.l */
136 extern core_yyscan_t scanner_init(const char *str,
137 core_yy_extra_type *yyext,
138 const ScanKeywordList *keywordlist,
139 const uint16 *keyword_tokens);
140 extern void scanner_finish(core_yyscan_t yyscanner);
141 extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
142 core_yyscan_t yyscanner);
143 extern int scanner_errposition(int location, core_yyscan_t yyscanner);
144 extern void setup_scanner_errposition_callback(ScannerCallbackState *scbstate,
145 core_yyscan_t yyscanner,
146 int location);
147 extern void cancel_scanner_errposition_callback(ScannerCallbackState *scbstate);
148 extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn();
150 #endif /* SCANNER_H */