Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * scan.l
5 : * lexical scanner for PostgreSQL
6 : *
7 : * NOTE NOTE NOTE:
8 : *
9 : * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l
10 : * and src/interfaces/ecpg/preproc/pgc.l!
11 : *
12 : * The rules are designed so that the scanner never has to backtrack,
13 : * in the sense that there is always a rule that can match the input
14 : * consumed so far (the rule action may internally throw back some input
15 : * with yyless(), however). As explained in the flex manual, this makes
16 : * for a useful speed increase --- several percent faster when measuring
17 : * raw parsing (Flex + Bison). The extra complexity is mostly in the rules
18 : * for handling float numbers and continued string literals. If you change
19 : * the lexical rules, verify that you haven't broken the no-backtrack
20 : * property by running flex with the "-b" option and checking that the
21 : * resulting "lex.backup" file says that no backing up is needed. (As of
22 : * Postgres 9.2, this check is made automatically by the Makefile.)
23 : *
24 : *
25 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
26 : * Portions Copyright (c) 1994, Regents of the University of California
27 : *
28 : * IDENTIFICATION
29 : * src/backend/parser/scan.l
30 : *
31 : *-------------------------------------------------------------------------
32 : */
33 : #include "postgres.h"
34 :
35 : #include <ctype.h>
36 : #include <unistd.h>
37 :
38 : #include "common/string.h"
39 : #include "gramparse.h"
40 : #include "nodes/miscnodes.h"
41 : #include "parser/parser.h" /* only needed for GUC variables */
42 : #include "parser/scansup.h"
43 : #include "port/pg_bitutils.h"
44 : #include "mb/pg_wchar.h"
45 : #include "utils/builtins.h"
46 : }
47 :
48 : %{
49 :
50 : /* LCOV_EXCL_START */
51 :
52 : /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
53 : #undef fprintf
54 : #define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
55 :
56 : static void
57 : fprintf_to_ereport(const char *fmt, const char *msg)
58 : {
59 : ereport(ERROR, (errmsg_internal("%s", msg)));
60 : }
61 :
62 : /*
63 : * GUC variables. This is a DIRECT violation of the warning given at the
64 : * head of gram.y, ie flex/bison code must not depend on any GUC variables;
65 : * as such, changing their values can induce very unintuitive behavior.
66 : * But we shall have to live with it until we can remove these variables.
67 : */
68 : int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
69 : bool escape_string_warning = true;
70 : bool standard_conforming_strings = true;
71 :
72 : /*
73 : * Constant data exported from this file. This array maps from the
74 : * zero-based keyword numbers returned by ScanKeywordLookup to the
75 : * Bison token numbers needed by gram.y. This is exported because
76 : * callers need to pass it to scanner_init, if they are using the
77 : * standard keyword list ScanKeywords.
78 : */
79 : #define PG_KEYWORD(kwname, value, category, collabel) value,
80 :
81 : const uint16 ScanKeywordTokens[] = {
82 : #include "parser/kwlist.h"
83 : };
84 :
85 : #undef PG_KEYWORD
86 :
87 : /*
88 : * Set the type of YYSTYPE.
89 : */
90 : #define YYSTYPE core_YYSTYPE
91 :
92 : /*
93 : * Set the type of yyextra. All state variables used by the scanner should
94 : * be in yyextra, *not* statically allocated.
95 : */
96 : #define YY_EXTRA_TYPE core_yy_extra_type *
97 :
98 : /*
99 : * Each call to yylex must set yylloc to the location of the found token
100 : * (expressed as a byte offset from the start of the input text).
101 : * When we parse a token that requires multiple lexer rules to process,
102 : * this should be done in the first such rule, else yylloc will point
103 : * into the middle of the token.
104 : */
105 : #define SET_YYLLOC() (*(yylloc) = yytext - yyextra->scanbuf)
106 :
107 : /*
108 : * Advance yylloc by the given number of bytes.
109 : */
110 : #define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) )
111 :
112 : /*
113 : * Sometimes, we do want yylloc to point into the middle of a token; this is
114 : * useful for instance to throw an error about an escape sequence within a
115 : * string literal. But if we find no error there, we want to revert yylloc
116 : * to the token start, so that that's the location reported to the parser.
117 : * Use PUSH_YYLLOC/POP_YYLLOC to save/restore yylloc around such code.
118 : * (Currently the implied "stack" is just one location, but someday we might
119 : * need to nest these.)
120 : */
121 : #define PUSH_YYLLOC() (yyextra->save_yylloc = *(yylloc))
122 : #define POP_YYLLOC() (*(yylloc) = yyextra->save_yylloc)
123 :
124 : #define startlit() ( yyextra->literallen = 0 )
125 : static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
126 : static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
127 : static char *litbufdup(core_yyscan_t yyscanner);
128 : static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
129 : static int process_integer_literal(const char *token, YYSTYPE *lval, int base);
130 : static void addunicode(pg_wchar c, yyscan_t yyscanner);
131 :
132 : #define yyerror(msg) scanner_yyerror(msg, yyscanner)
133 :
134 : #define lexer_errposition() scanner_errposition(*(yylloc), yyscanner)
135 :
136 : static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner);
137 : static void check_escape_warning(core_yyscan_t yyscanner);
138 :
139 : /*
140 : * Work around a bug in flex 2.5.35: it emits a couple of functions that
141 : * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
142 : * this would cause warnings. Providing our own declarations should be
143 : * harmless even when the bug gets fixed.
144 : */
145 : extern int core_yyget_column(yyscan_t yyscanner);
146 : extern void core_yyset_column(int column_no, yyscan_t yyscanner);
147 :
148 : %}
149 :
150 : %option reentrant
151 : %option bison-bridge
152 : %option bison-locations
153 : %option 8bit
154 : %option never-interactive
155 : %option nodefault
156 : %option noinput
157 : %option nounput
158 : %option noyywrap
159 : %option noyyalloc
160 : %option noyyrealloc
161 : %option noyyfree
162 : %option warn
163 : %option prefix="core_yy"
164 :
165 : /*
166 : * OK, here is a short description of lex/flex rules behavior.
167 : * The longest pattern which matches an input string is always chosen.
168 : * For equal-length patterns, the first occurring in the rules list is chosen.
169 : * INITIAL is the starting state, to which all non-conditional rules apply.
170 : * Exclusive states change parsing rules while the state is active. When in
171 : * an exclusive state, only those rules defined for that state apply.
172 : *
173 : * We use exclusive states for quoted strings, extended comments,
174 : * and to eliminate parsing troubles for numeric strings.
175 : * Exclusive states:
176 : * <xb> bit string literal
177 : * <xc> extended C-style comments
178 : * <xd> delimited identifiers (double-quoted identifiers)
179 : * <xh> hexadecimal byte string
180 : * <xq> standard quoted strings
181 : * <xqs> quote stop (detect continued strings)
182 : * <xe> extended quoted strings (support backslash escape sequences)
183 : * <xdolq> $foo$ quoted strings
184 : * <xui> quoted identifier with Unicode escapes
185 : * <xus> quoted string with Unicode escapes
186 : * <xeu> Unicode surrogate pair in extended quoted string
187 : *
188 : * Remember to add an <<EOF>> case whenever you add a new exclusive state!
189 : * The default one is probably not the right thing.
190 : */
191 :
192 : %x xb
193 : %x xc
194 : %x xd
195 : %x xh
196 : %x xq
197 : %x xqs
198 : %x xe
199 : %x xdolq
200 : %x xui
201 : %x xus
202 : %x xeu
203 :
204 : /*
205 : * In order to make the world safe for Windows and Mac clients as well as
206 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
207 : * sequence will be seen as two successive newlines, but that doesn't cause
208 : * any problems. Comments that start with -- and extend to the next
209 : * newline are treated as equivalent to a single whitespace character.
210 : *
211 : * NOTE a fine point: if there is no newline following --, we will absorb
212 : * everything to the end of the input as a comment. This is correct. Older
213 : * versions of Postgres failed to recognize -- as a comment if the input
214 : * did not end with a newline.
215 : *
216 : * non_newline_space tracks all the other space characters except newlines.
217 : *
218 : * XXX if you change the set of whitespace characters, fix scanner_isspace()
219 : * to agree.
220 : */
221 :
222 : space [ \t\n\r\f\v]
223 : non_newline_space [ \t\f\v]
224 : newline [\n\r]
225 : non_newline [^\n\r]
226 :
227 : comment ("--"{non_newline}*)
228 :
229 : whitespace ({space}+|{comment})
230 :
231 : /*
232 : * SQL requires at least one newline in the whitespace separating
233 : * string literals that are to be concatenated. Silly, but who are we
234 : * to argue? Note that {whitespace_with_newline} should not have * after
235 : * it, whereas {whitespace} should generally have a * after it...
236 : */
237 :
238 : special_whitespace ({space}+|{comment}{newline})
239 : non_newline_whitespace ({non_newline_space}|{comment})
240 : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
241 :
242 : quote '
243 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
244 : quotecontinue {whitespace_with_newline}{quote}
245 :
246 : /*
247 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
248 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
249 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
250 : * to see if there's another dash --- which would start a {comment} and thus
251 : * allow continuation of the {quotecontinue} token.
252 : */
253 : quotecontinuefail {whitespace}*"-"?
254 :
255 : /* Bit string
256 : * It is tempting to scan the string for only those characters
257 : * which are allowed. However, this leads to silently swallowed
258 : * characters if illegal characters are included in the string.
259 : * For example, if xbinside is [01] then B'ABCD' is interpreted
260 : * as a zero-length string, and the ABCD' is lost!
261 : * Better to pass the string forward and let the input routines
262 : * validate the contents.
263 : */
264 : xbstart [bB]{quote}
265 : xbinside [^']*
266 :
267 : /* Hexadecimal byte string */
268 : xhstart [xX]{quote}
269 : xhinside [^']*
270 :
271 : /* National character */
272 : xnstart [nN]{quote}
273 :
274 : /* Quoted string that allows backslash escapes */
275 : xestart [eE]{quote}
276 : xeinside [^\\']+
277 : xeescape [\\][^0-7]
278 : xeoctesc [\\][0-7]{1,3}
279 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
280 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
281 : xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
282 :
283 : /* Extended quote
284 : * xqdouble implements embedded quote, ''''
285 : */
286 : xqstart {quote}
287 : xqdouble {quote}{quote}
288 : xqinside [^']+
289 :
290 : /* $foo$ style quotes ("dollar quoting")
291 : * The quoted string starts with $foo$ where "foo" is an optional string
292 : * in the form of an identifier, except that it may not contain "$",
293 : * and extends to the first occurrence of an identical string.
294 : * There is *no* processing of the quoted text.
295 : *
296 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
297 : * fails to match its trailing "$".
298 : */
299 : dolq_start [A-Za-z\200-\377_]
300 : dolq_cont [A-Za-z\200-\377_0-9]
301 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
302 : dolqfailed \${dolq_start}{dolq_cont}*
303 : dolqinside [^$]+
304 :
305 : /* Double quote
306 : * Allows embedded spaces and other special characters into identifiers.
307 : */
308 : dquote \"
309 : xdstart {dquote}
310 : xdstop {dquote}
311 : xddouble {dquote}{dquote}
312 : xdinside [^"]+
313 :
314 : /* Quoted identifier with Unicode escapes */
315 : xuistart [uU]&{dquote}
316 :
317 : /* Quoted string with Unicode escapes */
318 : xusstart [uU]&{quote}
319 :
320 : /* error rule to avoid backup */
321 : xufailed [uU]&
322 :
323 :
324 : /* C-style comments
325 : *
326 : * The "extended comment" syntax closely resembles allowable operator syntax.
327 : * The tricky part here is to get lex to recognize a string starting with
328 : * slash-star as a comment, when interpreting it as an operator would produce
329 : * a longer match --- remember lex will prefer a longer match! Also, if we
330 : * have something like plus-slash-star, lex will think this is a 3-character
331 : * operator whereas we want to see it as a + operator and a comment start.
332 : * The solution is two-fold:
333 : * 1. append {op_chars}* to xcstart so that it matches as much text as
334 : * {operator} would. Then the tie-breaker (first matching rule of same
335 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
336 : * in case it contains a star-slash that should terminate the comment.
337 : * 2. In the operator rule, check for slash-star within the operator, and
338 : * if found throw it back with yyless(). This handles the plus-slash-star
339 : * problem.
340 : * Dash-dash comments have similar interactions with the operator rule.
341 : */
342 : xcstart \/\*{op_chars}*
343 : xcstop \*+\/
344 : xcinside [^*/]+
345 :
346 : ident_start [A-Za-z\200-\377_]
347 : ident_cont [A-Za-z\200-\377_0-9\$]
348 :
349 : identifier {ident_start}{ident_cont}*
350 :
351 : /* Assorted special-case operators and operator-like tokens */
352 : typecast "::"
353 : dot_dot \.\.
354 : colon_equals ":="
355 :
356 : /*
357 : * These operator-like tokens (unlike the above ones) also match the {operator}
358 : * rule, which means that they might be overridden by a longer match if they
359 : * are followed by a comment start or a + or - character. Accordingly, if you
360 : * add to this list, you must also add corresponding code to the {operator}
361 : * block to return the correct token in such cases. (This is not needed in
362 : * psqlscan.l since the token value is ignored there.)
363 : */
364 : equals_greater "=>"
365 : less_equals "<="
366 : greater_equals ">="
367 : less_greater "<>"
368 : not_equals "!="
369 :
370 : /*
371 : * "self" is the set of chars that should be returned as single-character
372 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
373 : * which can be one or more characters long (but if a single-char token
374 : * appears in the "self" set, it is not to be returned as an Op). Note
375 : * that the sets overlap, but each has some chars that are not in the other.
376 : *
377 : * If you change either set, adjust the character lists appearing in the
378 : * rule for "operator"!
379 : */
380 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
381 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
382 : operator {op_chars}+
383 :
384 : /*
385 : * Numbers
386 : *
387 : * Unary minus is not part of a number here. Instead we pass it separately to
388 : * the parser, and there it gets coerced via doNegate().
389 : *
390 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
391 : *
392 : * {realfail} is added to prevent the need for scanner
393 : * backup when the {real} rule fails to match completely.
394 : */
395 : decdigit [0-9]
396 : hexdigit [0-9A-Fa-f]
397 : octdigit [0-7]
398 : bindigit [0-1]
399 :
400 : decinteger {decdigit}(_?{decdigit})*
401 : hexinteger 0[xX](_?{hexdigit})+
402 : octinteger 0[oO](_?{octdigit})+
403 : bininteger 0[bB](_?{bindigit})+
404 :
405 : hexfail 0[xX]_?
406 : octfail 0[oO]_?
407 : binfail 0[bB]_?
408 :
409 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
410 : numericfail {decinteger}\.\.
411 :
412 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
413 : realfail ({decinteger}|{numeric})[Ee][-+]
414 :
415 : /* Positional parameters don't accept underscores. */
416 : param \${decdigit}+
417 :
418 : /*
419 : * An identifier immediately following an integer literal is disallowed because
420 : * in some cases it's ambiguous what is meant: for example, 0x1234 could be
421 : * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
422 : * detect such problems by seeing if integer_junk matches a longer substring
423 : * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
424 : * bininteger). One "junk" pattern is sufficient because
425 : * {decinteger}{identifier} will match all the same strings we'd match with
426 : * {hexinteger}{identifier} etc.
427 : *
428 : * Note that the rule for integer_junk must appear after the ones for
429 : * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
430 : * and integer_junk, and we need hexinteger to be chosen in that case.
431 : *
432 : * Also disallow strings matched by numeric_junk, real_junk and param_junk
433 : * for consistency.
434 : */
435 : integer_junk {decinteger}{identifier}
436 : numeric_junk {numeric}{identifier}
437 : real_junk {real}{identifier}
438 : param_junk \${decdigit}+{identifier}
439 :
440 : other .
441 :
442 : /*
443 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
444 : * Other quoted strings must allow some special characters such as single-quote
445 : * and newline.
446 : * Embedded single-quotes are implemented both in the SQL standard
447 : * style of two adjacent single quotes "''" and in the Postgres/Java style
448 : * of escaped-quote "\'".
449 : * Other embedded escaped characters are matched explicitly and the leading
450 : * backslash is dropped from the string.
451 : * Note that xcstart must appear before operator, as explained above!
452 : * Also whitespace (comment) must appear before operator.
453 : */
454 :
455 : %%
456 :
457 : {whitespace} {
458 : /* ignore */
459 : }
460 :
461 : {xcstart} {
462 : /* Set location in case of syntax error in comment */
463 : SET_YYLLOC();
464 : yyextra->xcdepth = 0;
465 : BEGIN(xc);
466 : /* Put back any characters past slash-star; see above */
467 : yyless(2);
468 : }
469 :
470 : <xc>{
471 : {xcstart} {
472 : (yyextra->xcdepth)++;
473 : /* Put back any characters past slash-star; see above */
474 : yyless(2);
475 : }
476 :
477 : {xcstop} {
478 : if (yyextra->xcdepth <= 0)
479 : BEGIN(INITIAL);
480 : else
481 : (yyextra->xcdepth)--;
482 : }
483 :
484 : {xcinside} {
485 : /* ignore */
486 : }
487 :
488 : {op_chars} {
489 : /* ignore */
490 : }
491 :
492 : \*+ {
493 : /* ignore */
494 : }
495 :
496 : <<EOF>> {
497 : yyerror("unterminated /* comment");
498 : }
499 : } /* <xc> */
500 :
501 : {xbstart} {
502 : /* Binary bit type.
503 : * At some point we should simply pass the string
504 : * forward to the parser and label it there.
505 : * In the meantime, place a leading "b" on the string
506 : * to mark it for the input routine as a binary string.
507 : */
508 : SET_YYLLOC();
509 : BEGIN(xb);
510 : startlit();
511 : addlitchar('b', yyscanner);
512 : }
513 : <xh>{xhinside} |
514 : <xb>{xbinside} {
515 : addlit(yytext, yyleng, yyscanner);
516 : }
517 : <xb><<EOF>> { yyerror("unterminated bit string literal"); }
518 :
519 : {xhstart} {
520 : /* Hexadecimal bit type.
521 : * At some point we should simply pass the string
522 : * forward to the parser and label it there.
523 : * In the meantime, place a leading "x" on the string
524 : * to mark it for the input routine as a hex string.
525 : */
526 : SET_YYLLOC();
527 : BEGIN(xh);
528 : startlit();
529 : addlitchar('x', yyscanner);
530 : }
531 : <xh><<EOF>> { yyerror("unterminated hexadecimal string literal"); }
532 :
533 : {xnstart} {
534 : /* National character.
535 : * We will pass this along as a normal character string,
536 : * but preceded with an internally-generated "NCHAR".
537 : */
538 : int kwnum;
539 :
540 : SET_YYLLOC();
541 : yyless(1); /* eat only 'n' this time */
542 :
543 : kwnum = ScanKeywordLookup("nchar",
544 : yyextra->keywordlist);
545 : if (kwnum >= 0)
546 : {
547 : yylval->keyword = GetScanKeyword(kwnum,
548 : yyextra->keywordlist);
549 : return yyextra->keyword_tokens[kwnum];
550 : }
551 : else
552 : {
553 : /* If NCHAR isn't a keyword, just return "n" */
554 : yylval->str = pstrdup("n");
555 : return IDENT;
556 : }
557 : }
558 :
559 : {xqstart} {
560 : yyextra->warn_on_first_escape = true;
561 : yyextra->saw_non_ascii = false;
562 : SET_YYLLOC();
563 : if (yyextra->standard_conforming_strings)
564 : BEGIN(xq);
565 : else
566 : BEGIN(xe);
567 : startlit();
568 : }
569 : {xestart} {
570 : yyextra->warn_on_first_escape = false;
571 : yyextra->saw_non_ascii = false;
572 : SET_YYLLOC();
573 : BEGIN(xe);
574 : startlit();
575 : }
576 : {xusstart} {
577 : SET_YYLLOC();
578 : if (!yyextra->standard_conforming_strings)
579 : ereport(ERROR,
580 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
581 : errmsg("unsafe use of string constant with Unicode escapes"),
582 : errdetail("String constants with Unicode escapes cannot be used when \"standard_conforming_strings\" is off."),
583 : lexer_errposition()));
584 : BEGIN(xus);
585 : startlit();
586 : }
587 :
588 : <xb,xh,xq,xe,xus>{quote} {
589 : /*
590 : * When we are scanning a quoted string and see an end
591 : * quote, we must look ahead for a possible continuation.
592 : * If we don't see one, we know the end quote was in fact
593 : * the end of the string. To reduce the lexer table size,
594 : * we use a single "xqs" state to do the lookahead for all
595 : * types of strings.
596 : */
597 : yyextra->state_before_str_stop = YYSTATE;
598 : BEGIN(xqs);
599 : }
600 : <xqs>{quotecontinue} {
601 : /*
602 : * Found a quote continuation, so return to the in-quote
603 : * state and continue scanning the literal. Nothing is
604 : * added to the literal's contents.
605 : */
606 : BEGIN(yyextra->state_before_str_stop);
607 : }
608 : <xqs>{quotecontinuefail} |
609 : <xqs>{other} |
610 : <xqs><<EOF>> {
611 : /*
612 : * Failed to see a quote continuation. Throw back
613 : * everything after the end quote, and handle the string
614 : * according to the state we were in previously.
615 : */
616 : yyless(0);
617 : BEGIN(INITIAL);
618 :
619 : switch (yyextra->state_before_str_stop)
620 : {
621 : case xb:
622 : yylval->str = litbufdup(yyscanner);
623 : return BCONST;
624 : case xh:
625 : yylval->str = litbufdup(yyscanner);
626 : return XCONST;
627 : case xq:
628 : case xe:
629 : /*
630 : * Check that the data remains valid, if it might
631 : * have been made invalid by unescaping any chars.
632 : */
633 : if (yyextra->saw_non_ascii)
634 : pg_verifymbstr(yyextra->literalbuf,
635 : yyextra->literallen,
636 : false);
637 : yylval->str = litbufdup(yyscanner);
638 : return SCONST;
639 : case xus:
640 : yylval->str = litbufdup(yyscanner);
641 : return USCONST;
642 : default:
643 : yyerror("unhandled previous state in xqs");
644 : }
645 : }
646 :
647 : <xq,xe,xus>{xqdouble} {
648 : addlitchar('\'', yyscanner);
649 : }
650 : <xq,xus>{xqinside} {
651 : addlit(yytext, yyleng, yyscanner);
652 : }
653 : <xe>{xeinside} {
654 : addlit(yytext, yyleng, yyscanner);
655 : }
656 : <xe>{xeunicode} {
657 : pg_wchar c = strtoul(yytext + 2, NULL, 16);
658 :
659 : /*
660 : * For consistency with other productions, issue any
661 : * escape warning with cursor pointing to start of string.
662 : * We might want to change that, someday.
663 : */
664 : check_escape_warning(yyscanner);
665 :
666 : /* Remember start of overall string token ... */
667 : PUSH_YYLLOC();
668 : /* ... and set the error cursor to point at this esc seq */
669 : SET_YYLLOC();
670 :
671 : if (is_utf16_surrogate_first(c))
672 : {
673 : yyextra->utf16_first_part = c;
674 : BEGIN(xeu);
675 : }
676 : else if (is_utf16_surrogate_second(c))
677 : yyerror("invalid Unicode surrogate pair");
678 : else
679 : addunicode(c, yyscanner);
680 :
681 : /* Restore yylloc to be start of string token */
682 : POP_YYLLOC();
683 : }
684 : <xeu>{xeunicode} {
685 : pg_wchar c = strtoul(yytext + 2, NULL, 16);
686 :
687 : /* Remember start of overall string token ... */
688 : PUSH_YYLLOC();
689 : /* ... and set the error cursor to point at this esc seq */
690 : SET_YYLLOC();
691 :
692 : if (!is_utf16_surrogate_second(c))
693 : yyerror("invalid Unicode surrogate pair");
694 :
695 : c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);
696 :
697 : addunicode(c, yyscanner);
698 :
699 : /* Restore yylloc to be start of string token */
700 : POP_YYLLOC();
701 :
702 : BEGIN(xe);
703 : }
704 : <xeu>. |
705 : <xeu>\n |
706 : <xeu><<EOF>> {
707 : /* Set the error cursor to point at missing esc seq */
708 : SET_YYLLOC();
709 : yyerror("invalid Unicode surrogate pair");
710 : }
711 : <xe,xeu>{xeunicodefail} {
712 : /* Set the error cursor to point at malformed esc seq */
713 : SET_YYLLOC();
714 : ereport(ERROR,
715 : (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
716 : errmsg("invalid Unicode escape"),
717 : errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
718 : lexer_errposition()));
719 : }
720 : <xe>{xeescape} {
721 : if (yytext[1] == '\'')
722 : {
723 : if (yyextra->backslash_quote == BACKSLASH_QUOTE_OFF ||
724 : (yyextra->backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
725 : PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
726 : ereport(ERROR,
727 : (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
728 : errmsg("unsafe use of \\' in a string literal"),
729 : errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
730 : lexer_errposition()));
731 : }
732 : check_string_escape_warning(yytext[1], yyscanner);
733 : addlitchar(unescape_single_char(yytext[1], yyscanner),
734 : yyscanner);
735 : }
736 : <xe>{xeoctesc} {
737 : unsigned char c = strtoul(yytext + 1, NULL, 8);
738 :
739 : check_escape_warning(yyscanner);
740 : addlitchar(c, yyscanner);
741 : if (c == '\0' || IS_HIGHBIT_SET(c))
742 : yyextra->saw_non_ascii = true;
743 : }
744 : <xe>{xehexesc} {
745 : unsigned char c = strtoul(yytext + 2, NULL, 16);
746 :
747 : check_escape_warning(yyscanner);
748 : addlitchar(c, yyscanner);
749 : if (c == '\0' || IS_HIGHBIT_SET(c))
750 : yyextra->saw_non_ascii = true;
751 : }
752 : <xe>. {
753 : /* This is only needed for \ just before EOF */
754 : addlitchar(yytext[0], yyscanner);
755 : }
756 : <xq,xe,xus><<EOF>> { yyerror("unterminated quoted string"); }
757 :
758 : {dolqdelim} {
759 : SET_YYLLOC();
760 : yyextra->dolqstart = pstrdup(yytext);
761 : BEGIN(xdolq);
762 : startlit();
763 : }
764 : {dolqfailed} {
765 : SET_YYLLOC();
766 : /* throw back all but the initial "$" */
767 : yyless(1);
768 : /* and treat it as {other} */
769 : return yytext[0];
770 : }
771 : <xdolq>{dolqdelim} {
772 : if (strcmp(yytext, yyextra->dolqstart) == 0)
773 : {
774 : pfree(yyextra->dolqstart);
775 : yyextra->dolqstart = NULL;
776 : BEGIN(INITIAL);
777 : yylval->str = litbufdup(yyscanner);
778 : return SCONST;
779 : }
780 : else
781 : {
782 : /*
783 : * When we fail to match $...$ to dolqstart, transfer
784 : * the $... part to the output, but put back the final
785 : * $ for rescanning. Consider $delim$...$junk$delim$
786 : */
787 : addlit(yytext, yyleng - 1, yyscanner);
788 : yyless(yyleng - 1);
789 : }
790 : }
791 : <xdolq>{dolqinside} {
792 : addlit(yytext, yyleng, yyscanner);
793 : }
794 : <xdolq>{dolqfailed} {
795 : addlit(yytext, yyleng, yyscanner);
796 : }
797 : <xdolq>. {
798 : /* This is only needed for $ inside the quoted text */
799 : addlitchar(yytext[0], yyscanner);
800 : }
801 : <xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
802 :
803 : {xdstart} {
804 : SET_YYLLOC();
805 : BEGIN(xd);
806 : startlit();
807 : }
808 : {xuistart} {
809 : SET_YYLLOC();
810 : BEGIN(xui);
811 : startlit();
812 : }
813 : <xd>{xdstop} {
814 : char *ident;
815 :
816 : BEGIN(INITIAL);
817 : if (yyextra->literallen == 0)
818 : yyerror("zero-length delimited identifier");
819 : ident = litbufdup(yyscanner);
820 : if (yyextra->literallen >= NAMEDATALEN)
821 : truncate_identifier(ident, yyextra->literallen, true);
822 : yylval->str = ident;
823 : return IDENT;
824 : }
825 : <xui>{dquote} {
826 : BEGIN(INITIAL);
827 : if (yyextra->literallen == 0)
828 : yyerror("zero-length delimited identifier");
829 : /* can't truncate till after we de-escape the ident */
830 : yylval->str = litbufdup(yyscanner);
831 : return UIDENT;
832 : }
833 : <xd,xui>{xddouble} {
834 : addlitchar('"', yyscanner);
835 : }
836 : <xd,xui>{xdinside} {
837 : addlit(yytext, yyleng, yyscanner);
838 : }
839 : <xd,xui><<EOF>> { yyerror("unterminated quoted identifier"); }
840 :
841 : {xufailed} {
842 : char *ident;
843 :
844 : SET_YYLLOC();
845 : /* throw back all but the initial u/U */
846 : yyless(1);
847 : /* and treat it as {identifier} */
848 : ident = downcase_truncate_identifier(yytext, yyleng, true);
849 : yylval->str = ident;
850 : return IDENT;
851 : }
852 :
853 : {typecast} {
854 : SET_YYLLOC();
855 : return TYPECAST;
856 : }
857 :
858 : {dot_dot} {
859 : SET_YYLLOC();
860 : return DOT_DOT;
861 : }
862 :
863 : {colon_equals} {
864 : SET_YYLLOC();
865 : return COLON_EQUALS;
866 : }
867 :
868 : {equals_greater} {
869 : SET_YYLLOC();
870 : return EQUALS_GREATER;
871 : }
872 :
873 : {less_equals} {
874 : SET_YYLLOC();
875 : return LESS_EQUALS;
876 : }
877 :
878 : {greater_equals} {
879 : SET_YYLLOC();
880 : return GREATER_EQUALS;
881 : }
882 :
883 : {less_greater} {
884 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
885 : SET_YYLLOC();
886 : return NOT_EQUALS;
887 : }
888 :
889 : {not_equals} {
890 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
891 : SET_YYLLOC();
892 : return NOT_EQUALS;
893 : }
894 :
895 : {self} {
896 : SET_YYLLOC();
897 : return yytext[0];
898 : }
899 :
900 : {operator} {
901 : /*
902 : * Check for embedded slash-star or dash-dash; those
903 : * are comment starts, so operator must stop there.
904 : * Note that slash-star or dash-dash at the first
905 : * character will match a prior rule, not this one.
906 : */
907 : int nchars = yyleng;
908 : char *slashstar = strstr(yytext, "/*");
909 : char *dashdash = strstr(yytext, "--");
910 :
911 : if (slashstar && dashdash)
912 : {
913 : /* if both appear, take the first one */
914 : if (slashstar > dashdash)
915 : slashstar = dashdash;
916 : }
917 : else if (!slashstar)
918 : slashstar = dashdash;
919 : if (slashstar)
920 : nchars = slashstar - yytext;
921 :
922 : /*
923 : * For SQL compatibility, '+' and '-' cannot be the
924 : * last char of a multi-char operator unless the operator
925 : * contains chars that are not in SQL operators.
926 : * The idea is to lex '=-' as two operators, but not
927 : * to forbid operator names like '?-' that could not be
928 : * sequences of SQL operators.
929 : */
930 : if (nchars > 1 &&
931 : (yytext[nchars - 1] == '+' ||
932 : yytext[nchars - 1] == '-'))
933 : {
934 : int ic;
935 :
936 : for (ic = nchars - 2; ic >= 0; ic--)
937 : {
938 : char c = yytext[ic];
939 : if (c == '~' || c == '!' || c == '@' ||
940 : c == '#' || c == '^' || c == '&' ||
941 : c == '|' || c == '`' || c == '?' ||
942 : c == '%')
943 : break;
944 : }
945 : if (ic < 0)
946 : {
947 : /*
948 : * didn't find a qualifying character, so remove
949 : * all trailing [+-]
950 : */
951 : do {
952 : nchars--;
953 : } while (nchars > 1 &&
954 : (yytext[nchars - 1] == '+' ||
955 : yytext[nchars - 1] == '-'));
956 : }
957 : }
958 :
959 : SET_YYLLOC();
960 :
961 : if (nchars < yyleng)
962 : {
963 : /* Strip the unwanted chars from the token */
964 : yyless(nchars);
965 : /*
966 : * If what we have left is only one char, and it's
967 : * one of the characters matching "self", then
968 : * return it as a character token the same way
969 : * that the "self" rule would have.
970 : */
971 : if (nchars == 1 &&
972 : strchr(",()[].;:+-*/%^<>=", yytext[0]))
973 : return yytext[0];
974 : /*
975 : * Likewise, if what we have left is two chars, and
976 : * those match the tokens ">=", "<=", "=>", "<>" or
977 : * "!=", then we must return the appropriate token
978 : * rather than the generic Op.
979 : */
980 : if (nchars == 2)
981 : {
982 : if (yytext[0] == '=' && yytext[1] == '>')
983 : return EQUALS_GREATER;
984 : if (yytext[0] == '>' && yytext[1] == '=')
985 : return GREATER_EQUALS;
986 : if (yytext[0] == '<' && yytext[1] == '=')
987 : return LESS_EQUALS;
988 : if (yytext[0] == '<' && yytext[1] == '>')
989 : return NOT_EQUALS;
990 : if (yytext[0] == '!' && yytext[1] == '=')
991 : return NOT_EQUALS;
992 : }
993 : }
994 :
995 : /*
996 : * Complain if operator is too long. Unlike the case
997 : * for identifiers, we make this an error not a notice-
998 : * and-truncate, because the odds are we are looking at
999 : * a syntactic mistake anyway.
1000 : */
1001 : if (nchars >= NAMEDATALEN)
1002 : yyerror("operator too long");
1003 :
1004 : yylval->str = pstrdup(yytext);
1005 : return Op;
1006 : }
1007 :
1008 : {param} {
1009 : ErrorSaveContext escontext = {T_ErrorSaveContext};
1010 : int32 val;
1011 :
1012 : SET_YYLLOC();
1013 : val = pg_strtoint32_safe(yytext + 1, (Node *) &escontext);
1014 : if (escontext.error_occurred)
1015 : yyerror("parameter number too large");
1016 : yylval->ival = val;
1017 : return PARAM;
1018 : }
1019 : {param_junk} {
1020 : SET_YYLLOC();
1021 : yyerror("trailing junk after parameter");
1022 : }
1023 :
1024 : {decinteger} {
1025 : SET_YYLLOC();
1026 : return process_integer_literal(yytext, yylval, 10);
1027 : }
1028 : {hexinteger} {
1029 : SET_YYLLOC();
1030 : return process_integer_literal(yytext, yylval, 16);
1031 : }
1032 : {octinteger} {
1033 : SET_YYLLOC();
1034 : return process_integer_literal(yytext, yylval, 8);
1035 : }
1036 : {bininteger} {
1037 : SET_YYLLOC();
1038 : return process_integer_literal(yytext, yylval, 2);
1039 : }
1040 : {hexfail} {
1041 : SET_YYLLOC();
1042 : yyerror("invalid hexadecimal integer");
1043 : }
1044 : {octfail} {
1045 : SET_YYLLOC();
1046 : yyerror("invalid octal integer");
1047 : }
1048 : {binfail} {
1049 : SET_YYLLOC();
1050 : yyerror("invalid binary integer");
1051 : }
1052 : {numeric} {
1053 : SET_YYLLOC();
1054 : yylval->str = pstrdup(yytext);
1055 : return FCONST;
1056 : }
1057 : {numericfail} {
1058 : /* throw back the .., and treat as integer */
1059 : yyless(yyleng - 2);
1060 : SET_YYLLOC();
1061 : return process_integer_literal(yytext, yylval, 10);
1062 : }
1063 : {real} {
1064 : SET_YYLLOC();
1065 : yylval->str = pstrdup(yytext);
1066 : return FCONST;
1067 : }
1068 : {realfail} {
1069 : SET_YYLLOC();
1070 : yyerror("trailing junk after numeric literal");
1071 : }
1072 : {integer_junk} {
1073 : SET_YYLLOC();
1074 : yyerror("trailing junk after numeric literal");
1075 : }
1076 : {numeric_junk} {
1077 : SET_YYLLOC();
1078 : yyerror("trailing junk after numeric literal");
1079 : }
1080 : {real_junk} {
1081 : SET_YYLLOC();
1082 : yyerror("trailing junk after numeric literal");
1083 : }
1084 :
1085 :
1086 : {identifier} {
1087 : int kwnum;
1088 : char *ident;
1089 :
1090 : SET_YYLLOC();
1091 :
1092 : /* Is it a keyword? */
1093 : kwnum = ScanKeywordLookup(yytext,
1094 : yyextra->keywordlist);
1095 : if (kwnum >= 0)
1096 : {
1097 : yylval->keyword = GetScanKeyword(kwnum,
1098 : yyextra->keywordlist);
1099 : return yyextra->keyword_tokens[kwnum];
1100 : }
1101 :
1102 : /*
1103 : * No. Convert the identifier to lower case, and truncate
1104 : * if necessary.
1105 : */
1106 : ident = downcase_truncate_identifier(yytext, yyleng, true);
1107 : yylval->str = ident;
1108 : return IDENT;
1109 : }
1110 :
1111 : {other} {
1112 : SET_YYLLOC();
1113 : return yytext[0];
1114 : }
1115 :
1116 : <<EOF>> {
1117 : SET_YYLLOC();
1118 : yyterminate();
1119 : }
1120 :
1121 : %%
1122 :
1123 : /* LCOV_EXCL_STOP */
1124 :
1125 : /*
1126 : * Arrange access to yyextra for subroutines of the main yylex() function.
1127 : * We expect each subroutine to have a yyscanner parameter. Rather than
1128 : * use the yyget_xxx functions, which might or might not get inlined by the
1129 : * compiler, we cheat just a bit and cast yyscanner to the right type.
1130 : */
1131 : #undef yyextra
1132 : #define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r)
1133 :
1134 : /* Likewise for a couple of other things we need. */
1135 : #undef yylloc
1136 : #define yylloc (((struct yyguts_t *) yyscanner)->yylloc_r)
1137 : #undef yyleng
1138 : #define yyleng (((struct yyguts_t *) yyscanner)->yyleng_r)
1139 :
1140 :
1141 : /*
1142 : * scanner_errposition
1143 : * Report a lexer or grammar error cursor position, if possible.
1144 : *
1145 : * This is expected to be used within an ereport() call, or via an error
1146 : * callback such as setup_scanner_errposition_callback(). The return value
1147 : * is a dummy (always 0, in fact).
1148 : *
1149 : * Note that this can only be used for messages emitted during raw parsing
1150 : * (essentially, scan.l, parser.c, and gram.y), since it requires the
1151 : * yyscanner struct to still be available.
1152 : */
1153 : int
1154 1258 : scanner_errposition(int location, core_yyscan_t yyscanner)
1155 : {
1156 : int pos;
1157 :
1158 1258 : if (location < 0)
1159 0 : return 0; /* no-op if location is unknown */
1160 :
1161 : /* Convert byte offset to character number */
1162 1258 : pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1;
1163 : /* And pass it to the ereport mechanism */
1164 1258 : return errposition(pos);
1165 : }
1166 :
1167 : /*
1168 : * Error context callback for inserting scanner error location.
1169 : *
1170 : * Note that this will be called for *any* error occurring while the
1171 : * callback is installed. We avoid inserting an irrelevant error location
1172 : * if the error is a query cancel --- are there any other important cases?
1173 : */
1174 : static void
1175 36 : scb_error_callback(void *arg)
1176 : {
1177 36 : ScannerCallbackState *scbstate = (ScannerCallbackState *) arg;
1178 :
1179 36 : if (geterrcode() != ERRCODE_QUERY_CANCELED)
1180 36 : (void) scanner_errposition(scbstate->location, scbstate->yyscanner);
1181 36 : }
1182 :
1183 : /*
1184 : * setup_scanner_errposition_callback
1185 : * Arrange for non-scanner errors to report an error position
1186 : *
1187 : * Sometimes the scanner calls functions that aren't part of the scanner
1188 : * subsystem and can't reasonably be passed the yyscanner pointer; yet
1189 : * we would like any errors thrown in those functions to be tagged with an
1190 : * error location. Use this function to set up an error context stack
1191 : * entry that will accomplish that. Usage pattern:
1192 : *
1193 : * declare a local variable "ScannerCallbackState scbstate"
1194 : * ...
1195 : * setup_scanner_errposition_callback(&scbstate, yyscanner, location);
1196 : * call function that might throw error;
1197 : * cancel_scanner_errposition_callback(&scbstate);
1198 : */
1199 : void
1200 586 : setup_scanner_errposition_callback(ScannerCallbackState *scbstate,
1201 : core_yyscan_t yyscanner,
1202 : int location)
1203 : {
1204 : /* Setup error traceback support for ereport() */
1205 586 : scbstate->yyscanner = yyscanner;
1206 586 : scbstate->location = location;
1207 586 : scbstate->errcallback.callback = scb_error_callback;
1208 586 : scbstate->errcallback.arg = (void *) scbstate;
1209 586 : scbstate->errcallback.previous = error_context_stack;
1210 586 : error_context_stack = &scbstate->errcallback;
1211 586 : }
1212 :
1213 : /*
1214 : * Cancel a previously-set-up errposition callback.
1215 : */
1216 : void
1217 550 : cancel_scanner_errposition_callback(ScannerCallbackState *scbstate)
1218 : {
1219 : /* Pop the error context stack */
1220 550 : error_context_stack = scbstate->errcallback.previous;
1221 550 : }
1222 :
1223 : /*
1224 : * scanner_yyerror
1225 : * Report a lexer or grammar error.
1226 : *
1227 : * The message's cursor position is whatever YYLLOC was last set to,
1228 : * ie, the start of the current token if called within yylex(), or the
1229 : * most recently lexed token if called from the grammar.
1230 : * This is OK for syntax error messages from the Bison parser, because Bison
1231 : * parsers report error as soon as the first unparsable token is reached.
1232 : * Beware of using yyerror for other purposes, as the cursor position might
1233 : * be misleading!
1234 : */
1235 : void
1236 912 : scanner_yyerror(const char *message, core_yyscan_t yyscanner)
1237 : {
1238 912 : const char *loc = yyextra->scanbuf + *yylloc;
1239 :
1240 912 : if (*loc == YY_END_OF_BUFFER_CHAR)
1241 : {
1242 18 : ereport(ERROR,
1243 : (errcode(ERRCODE_SYNTAX_ERROR),
1244 : /* translator: %s is typically the translation of "syntax error" */
1245 : errmsg("%s at end of input", _(message)),
1246 : lexer_errposition()));
1247 : }
1248 : else
1249 : {
1250 894 : ereport(ERROR,
1251 : (errcode(ERRCODE_SYNTAX_ERROR),
1252 : /* translator: first %s is typically the translation of "syntax error" */
1253 : errmsg("%s at or near \"%s\"", _(message), loc),
1254 : lexer_errposition()));
1255 : }
1256 : }
1257 :
1258 :
1259 : /*
1260 : * Called before any actual parsing is done
1261 : */
1262 : core_yyscan_t
1263 792636 : scanner_init(const char *str,
1264 : core_yy_extra_type *yyext,
1265 : const ScanKeywordList *keywordlist,
1266 : const uint16 *keyword_tokens)
1267 : {
1268 792636 : Size slen = strlen(str);
1269 : yyscan_t scanner;
1270 :
1271 792636 : if (yylex_init(&scanner) != 0)
1272 0 : elog(ERROR, "yylex_init() failed: %m");
1273 :
1274 792636 : core_yyset_extra(yyext, scanner);
1275 :
1276 792636 : yyext->keywordlist = keywordlist;
1277 792636 : yyext->keyword_tokens = keyword_tokens;
1278 :
1279 792636 : yyext->backslash_quote = backslash_quote;
1280 792636 : yyext->escape_string_warning = escape_string_warning;
1281 792636 : yyext->standard_conforming_strings = standard_conforming_strings;
1282 :
1283 : /*
1284 : * Make a scan buffer with special termination needed by flex.
1285 : */
1286 792636 : yyext->scanbuf = (char *) palloc(slen + 2);
1287 792636 : yyext->scanbuflen = slen;
1288 792636 : memcpy(yyext->scanbuf, str, slen);
1289 792636 : yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
1290 792636 : yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);
1291 :
1292 : /* initialize literal buffer to a reasonable but expansible size */
1293 792636 : yyext->literalalloc = 1024;
1294 792636 : yyext->literalbuf = (char *) palloc(yyext->literalalloc);
1295 792636 : yyext->literallen = 0;
1296 :
1297 792636 : return scanner;
1298 : }
1299 :
1300 :
1301 : /*
1302 : * Called after parsing is done to clean up after scanner_init()
1303 : */
1304 : void
1305 791282 : scanner_finish(core_yyscan_t yyscanner)
1306 : {
1307 : /*
1308 : * We don't bother to call yylex_destroy(), because all it would do is
1309 : * pfree a small amount of control storage. It's cheaper to leak the
1310 : * storage until the parsing context is destroyed. The amount of space
1311 : * involved is usually negligible compared to the output parse tree
1312 : * anyway.
1313 : *
1314 : * We do bother to pfree the scanbuf and literal buffer, but only if they
1315 : * represent a nontrivial amount of space. The 8K cutoff is arbitrary.
1316 : */
1317 791282 : if (yyextra->scanbuflen >= 8192)
1318 88 : pfree(yyextra->scanbuf);
1319 791282 : if (yyextra->literalalloc >= 8192)
1320 46 : pfree(yyextra->literalbuf);
1321 791282 : }
1322 :
1323 :
1324 : static void
1325 817228 : addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
1326 : {
1327 : /* enlarge buffer if needed */
1328 817228 : if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
1329 : {
1330 232 : yyextra->literalalloc = pg_nextpower2_32(yyextra->literallen + yleng + 1);
1331 232 : yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1332 232 : yyextra->literalalloc);
1333 : }
1334 : /* append new data */
1335 817228 : memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
1336 817228 : yyextra->literallen += yleng;
1337 817228 : }
1338 :
1339 :
1340 : static void
1341 22064 : addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
1342 : {
1343 : /* enlarge buffer if needed */
1344 22064 : if ((yyextra->literallen + 1) >= yyextra->literalalloc)
1345 : {
1346 0 : yyextra->literalalloc *= 2;
1347 0 : yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1348 0 : yyextra->literalalloc);
1349 : }
1350 : /* append new data */
1351 22064 : yyextra->literalbuf[yyextra->literallen] = ychar;
1352 22064 : yyextra->literallen += 1;
1353 22064 : }
1354 :
1355 :
1356 : /*
1357 : * Create a palloc'd copy of literalbuf, adding a trailing null.
1358 : */
1359 : static char *
1360 818948 : litbufdup(core_yyscan_t yyscanner)
1361 : {
1362 818948 : int llen = yyextra->literallen;
1363 : char *new;
1364 :
1365 818948 : new = palloc(llen + 1);
1366 818948 : memcpy(new, yyextra->literalbuf, llen);
1367 818948 : new[llen] = '\0';
1368 818948 : return new;
1369 : }
1370 :
1371 : /*
1372 : * Process {decinteger}, {hexinteger}, etc. Note this will also do the right
1373 : * thing with {numeric}, ie digits and a decimal point.
1374 : */
1375 : static int
1376 509972 : process_integer_literal(const char *token, YYSTYPE *lval, int base)
1377 : {
1378 509972 : ErrorSaveContext escontext = {T_ErrorSaveContext};
1379 : int32 val;
1380 :
1381 509972 : val = pg_strtoint32_safe(token, (Node *) &escontext);
1382 509972 : if (escontext.error_occurred)
1383 : {
1384 : /* integer too large (or contains decimal pt), treat it as a float */
1385 1678 : lval->str = pstrdup(token);
1386 1678 : return FCONST;
1387 : }
1388 508294 : lval->ival = val;
1389 508294 : return ICONST;
1390 : }
1391 :
1392 : static void
1393 138 : addunicode(pg_wchar c, core_yyscan_t yyscanner)
1394 : {
1395 : ScannerCallbackState scbstate;
1396 : char buf[MAX_UNICODE_EQUIVALENT_STRING + 1];
1397 :
1398 138 : if (!is_valid_unicode_codepoint(c))
1399 6 : yyerror("invalid Unicode escape value");
1400 :
1401 : /*
1402 : * We expect that pg_unicode_to_server() will complain about any
1403 : * unconvertible code point, so we don't have to set saw_non_ascii.
1404 : */
1405 132 : setup_scanner_errposition_callback(&scbstate, yyscanner, *(yylloc));
1406 132 : pg_unicode_to_server(c, (unsigned char *) buf);
1407 132 : cancel_scanner_errposition_callback(&scbstate);
1408 132 : addlit(buf, strlen(buf), yyscanner);
1409 132 : }
1410 :
1411 : static unsigned char
1412 6148 : unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
1413 : {
1414 6148 : switch (c)
1415 : {
1416 26 : case 'b':
1417 26 : return '\b';
1418 2 : case 'f':
1419 2 : return '\f';
1420 1262 : case 'n':
1421 1262 : return '\n';
1422 72 : case 'r':
1423 72 : return '\r';
1424 28 : case 't':
1425 28 : return '\t';
1426 0 : case 'v':
1427 0 : return '\v';
1428 4758 : default:
1429 : /* check for backslash followed by non-7-bit-ASCII */
1430 4758 : if (c == '\0' || IS_HIGHBIT_SET(c))
1431 0 : yyextra->saw_non_ascii = true;
1432 :
1433 4758 : return c;
1434 : }
1435 : }
1436 :
1437 : static void
1438 6148 : check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner)
1439 : {
1440 6148 : if (ychar == '\'')
1441 : {
1442 36 : if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1443 0 : ereport(WARNING,
1444 : (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1445 : errmsg("nonstandard use of \\' in a string literal"),
1446 : errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
1447 : lexer_errposition()));
1448 36 : yyextra->warn_on_first_escape = false; /* warn only once per string */
1449 : }
1450 6112 : else if (ychar == '\\')
1451 : {
1452 4722 : if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1453 64 : ereport(WARNING,
1454 : (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1455 : errmsg("nonstandard use of \\\\ in a string literal"),
1456 : errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
1457 : lexer_errposition()));
1458 4722 : yyextra->warn_on_first_escape = false; /* warn only once per string */
1459 : }
1460 : else
1461 1390 : check_escape_warning(yyscanner);
1462 6148 : }
1463 :
1464 : static void
1465 1666 : check_escape_warning(core_yyscan_t yyscanner)
1466 : {
1467 1666 : if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1468 0 : ereport(WARNING,
1469 : (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1470 : errmsg("nonstandard use of escape in a string literal"),
1471 : errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
1472 : lexer_errposition()));
1473 1666 : yyextra->warn_on_first_escape = false; /* warn only once per string */
1474 1666 : }
1475 :
1476 : /*
1477 : * Interface functions to make flex use palloc() instead of malloc().
1478 : * It'd be better to make these static, but flex insists otherwise.
1479 : */
1480 :
1481 : void *
1482 2377908 : core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
1483 : {
1484 2377908 : return palloc(bytes);
1485 : }
1486 :
1487 : void *
1488 0 : core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
1489 : {
1490 0 : if (ptr)
1491 0 : return repalloc(ptr, bytes);
1492 : else
1493 0 : return palloc(bytes);
1494 : }
1495 :
1496 : void
1497 0 : core_yyfree(void *ptr, core_yyscan_t yyscanner)
1498 : {
1499 0 : if (ptr)
1500 0 : pfree(ptr);
1501 0 : }
|