Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * exprscan.l
5 : * lexical scanner for pgbench backslash commands
6 : *
7 : * This lexer supports two operating modes:
8 : *
9 : * In INITIAL state, just parse off whitespace-separated words (this mode
10 : * is basically equivalent to strtok(), which is what we used to use).
11 : *
12 : * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 : *
14 : * In either mode, stop upon hitting newline or end of string.
15 : *
16 : * Note that this lexer operates within the framework created by psqlscan.l,
17 : *
18 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
19 : * Portions Copyright (c) 1994, Regents of the University of California
20 : *
21 : * src/bin/pgbench/exprscan.l
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 : #include "postgres_fe.h"
26 :
27 : /*
28 : * NB: include exprparse.h only AFTER including pgbench.h, because pgbench.h
29 : * contains definitions needed for YYSTYPE. Likewise, pgbench.h must come after
30 : * psqlscan_int.h for yyscan_t.
31 : */
32 : #include "fe_utils/psqlscan_int.h"
33 : #include "pgbench.h"
34 : #include "exprparse.h"
35 : }
36 :
37 : %{
38 : /* context information for reporting errors in expressions */
39 : static const char *expr_source = NULL;
40 : static int expr_lineno = 0;
41 : static int expr_start_offset = 0;
42 : static const char *expr_command = NULL;
43 :
44 : /* indicates whether last yylex() call read a newline */
45 : static bool last_was_newline = false;
46 :
47 : /* LCOV_EXCL_START */
48 :
49 : %}
50 :
51 : /* Except for the prefix, these options should match psqlscan.l */
52 : %option reentrant
53 : %option bison-bridge
54 : %option 8bit
55 : %option never-interactive
56 : %option nodefault
57 : %option noinput
58 : %option nounput
59 : %option noyywrap
60 : %option warn
61 : %option prefix="expr_yy"
62 :
63 : /* Character classes */
64 : alpha [a-zA-Z\200-\377_]
65 : digit [0-9]
66 : alnum [A-Za-z\200-\377_0-9]
67 : /* {space} + {nonspace} + {newline} should cover all characters */
68 : space [ \t\r\f\v]
69 : nonspace [^ \t\r\f\v\n]
70 : newline [\n]
71 :
72 : /* Line continuation marker */
73 : continuation \\\r?{newline}
74 :
75 : /* case insensitive keywords */
76 : and [Aa][Nn][Dd]
77 : or [Oo][Rr]
78 : not [Nn][Oo][Tt]
79 : case [Cc][Aa][Ss][Ee]
80 : when [Ww][Hh][Ee][Nn]
81 : then [Tt][Hh][Ee][Nn]
82 : else [Ee][Ll][Ss][Ee]
83 : end [Ee][Nn][Dd]
84 : true [Tt][Rr][Uu][Ee]
85 : false [Ff][Aa][Ll][Ss][Ee]
86 : null [Nn][Uu][Ll][Ll]
87 : is [Ii][Ss]
88 : isnull [Ii][Ss][Nn][Uu][Ll][Ll]
89 : notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
90 :
91 : /* Exclusive states */
92 : %x EXPR
93 :
94 : %%
95 :
96 : %{
97 : /* Declare some local variables inside yylex(), for convenience */
98 : PsqlScanState cur_state = yyextra;
99 5107 :
100 : /*
101 : * Force flex into the state indicated by start_state. This has a
102 : * couple of purposes: it lets some of the functions below set a new
103 : * starting state without ugly direct access to flex variables, and it
104 : * allows us to transition from one flex lexer to another so that we
105 : * can lex different parts of the source string using separate lexers.
106 : */
107 : BEGIN(cur_state->start_state);
108 5107 :
109 : /* Reset was-newline flag */
110 : last_was_newline = false;
111 5107 : %}
112 :
113 : /* INITIAL state */
114 :
115 : {nonspace}+ {
116 : /* Found a word, emit and return it */
117 : psqlscan_emit(cur_state, yytext, yyleng);
118 1185 : return 1;
119 1185 : }
120 :
121 : /*
122 : * We need this rule to avoid returning "word\" instead of recognizing
123 : * a continuation marker just after a word:
124 : */
125 : {nonspace}+{continuation} {
126 1 : /* Found "word\\\r?\n", emit and return just "word" */
127 : int wordlen = yyleng - 2;
128 1 : if (yytext[wordlen] == '\r')
129 1 : wordlen--;
130 0 : Assert(yytext[wordlen] == '\\');
131 : psqlscan_emit(cur_state, yytext, wordlen);
132 1 : return 1;
133 1 : }
134 :
135 : {space}+ { /* ignore */ }
136 655 :
137 655 : {continuation} { /* ignore */ }
138 1 :
139 1 : {newline} {
140 108 : /* report end of command */
141 : last_was_newline = true;
142 108 : return 0;
143 108 : }
144 :
145 : /* EXPR state */
146 :
147 : <EXPR>{
148 53 :
149 : "+" { return '+'; }
150 53 : "-" { return '-'; }
151 65 : "*" { return '*'; }
152 65 : "/" { return '/'; }
153 204 : "%" { return '%'; } /* C version, also in Pg SQL */
154 13 : "=" { return '='; }
155 2 : "<>" { return NE_OP; }
156 32 : "!=" { return NE_OP; } /* C version, also in Pg SQL */
157 5 : "<=" { return LE_OP; }
158 3 : ">=" { return GE_OP; }
159 4 : "<<" { return LS_OP; }
160 3 : ">>" { return RS_OP; }
161 7 : "<" { return '<'; }
162 1 : ">" { return '>'; }
163 10 : "|" { return '|'; }
164 6 : "&" { return '&'; }
165 2 : "#" { return '#'; }
166 1 : "~" { return '~'; }
167 1 :
168 2 : "(" { return '('; }
169 451 : ")" { return ')'; }
170 453 : "," { return ','; }
171 453 :
172 343 : {and} { return AND_OP; }
173 44 : {or} { return OR_OP; }
174 5 : {not} { return NOT_OP; }
175 5 : {is} { return IS_OP; }
176 15 : {isnull} { return ISNULL_OP; }
177 8 : {notnull} { return NOTNULL_OP; }
178 1 :
179 1 : {case} { return CASE_KW; }
180 15 : {when} { return WHEN_KW; }
181 16 : {then} { return THEN_KW; }
182 16 : {else} { return ELSE_KW; }
183 17 : {end} { return END_KW; }
184 11 :
185 15 : :{alnum}+ {
186 272 : yylval->str = pg_strdup(yytext + 1);
187 272 : return VARIABLE;
188 272 : }
189 :
190 : {null} { return NULL_CONST; }
191 12 : {true} {
192 : yylval->bval = true;
193 21 : return BOOLEAN_CONST;
194 21 : }
195 : {false} {
196 : yylval->bval = false;
197 13 : return BOOLEAN_CONST;
198 13 : }
199 : "9223372036854775808" {
200 : /*
201 1 : * Special handling for PG_INT64_MIN, which can't
202 : * accurately be represented here, as the minus sign is
203 : * lexed separately and INT64_MIN can't be represented as
204 : * a positive integer.
205 : */
206 : return MAXINT_PLUS_ONE_CONST;
207 1 : }
208 : {digit}+ {
209 : if (!strtoint64(yytext, true, &yylval->ival))
210 797 : expr_yyerror_more(yyscanner, "bigint constant overflow",
211 1 : strdup(yytext));
212 1 : return INTEGER_CONST;
213 796 : }
214 : {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
215 : if (!strtodouble(yytext, true, &yylval->dval))
216 61 : expr_yyerror_more(yyscanner, "double constant overflow",
217 1 : strdup(yytext));
218 1 : return DOUBLE_CONST;
219 60 : }
220 : \.{digit}+([eE][-+]?{digit}+)? {
221 : if (!strtodouble(yytext, true, &yylval->dval))
222 2 : expr_yyerror_more(yyscanner, "double constant overflow",
223 1 : strdup(yytext));
224 1 : return DOUBLE_CONST;
225 1 : }
226 : {alpha}{alnum}* {
227 : yylval->str = pg_strdup(yytext);
228 399 : return FUNCTION;
229 399 : }
230 :
231 : {space}+ { /* ignore */ }
232 1656 :
233 1656 : {continuation} { /* ignore */ }
234 17 :
235 17 : {newline} {
236 368 : /* report end of command */
237 : last_was_newline = true;
238 368 : return 0;
239 368 : }
240 :
241 : . {
242 1 : /*
243 : * must strdup yytext so that expr_yyerror_more doesn't
244 : * change it while finding end of line
245 : */
246 : expr_yyerror_more(yyscanner, "unexpected character",
247 1 : pg_strdup(yytext));
248 1 : /* NOTREACHED, syntax_error calls exit() */
249 : return 0;
250 : }
251 :
252 : }
253 :
254 57 : <<EOF>> {
255 : if (cur_state->buffer_stack == NULL)
256 57 : return 0; /* end of input reached */
257 57 :
258 : /*
259 : * We were expanding a variable, so pop the inclusion
260 : * stack and keep lexing
261 : */
262 : psqlscan_pop_buffer_stack(cur_state);
263 0 : psqlscan_select_top_buffer(cur_state);
264 0 : }
265 :
266 0 : %%
267 0 :
268 : /* LCOV_EXCL_STOP */
269 :
270 : void
271 : expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
272 19 : {
273 : PsqlScanState state = yyget_extra(yyscanner);
274 19 : int lineno;
275 : int error_detection_offset;
276 : YYSTYPE lval;
277 : char *full_line;
278 :
279 : psql_scan_get_location(state, &lineno, &error_detection_offset);
280 19 : error_detection_offset--;
281 19 :
282 : /*
283 : * While parsing an expression, we may not have collected the whole line
284 : * yet from the input source. Lex till EOL so we can report whole line.
285 : * (If we're at EOF, it's okay to call yylex() an extra time.)
286 : */
287 : if (!last_was_newline)
288 19 : {
289 : while (yylex(&lval, yyscanner))
290 25 : /* skip */ ;
291 : }
292 :
293 : /* Extract the line, trimming trailing newline if any */
294 : full_line = expr_scanner_get_substring(state,
295 19 : expr_start_offset,
296 : true);
297 :
298 : syntax_error(expr_source, expr_lineno, full_line, expr_command,
299 19 : message, more, error_detection_offset - expr_start_offset);
300 : }
301 :
302 : /*
303 : * (The first argument is enforced by Bison to match the first argument of
304 : * yyparse(), but it is not used here.)
305 : */
306 : void
307 : expr_yyerror(PgBenchExpr **expr_parse_result_p, yyscan_t yyscanner, const char *message)
308 6 : {
309 : expr_yyerror_more(yyscanner, message, NULL);
310 6 : }
311 :
312 : /*
313 : * Collect a space-separated word from a backslash command and return it
314 : * in word_buf, along with its starting string offset in *offset.
315 : * Returns true if successful, false if at end of command.
316 : */
317 : bool
318 : expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
319 1318 : {
320 : int lexresult;
321 : YYSTYPE lval;
322 :
323 : /* Must be scanning already */
324 : Assert(state->scanbufhandle != NULL);
325 :
326 : /* Set current output target */
327 : state->output_buf = word_buf;
328 1318 : resetPQExpBuffer(word_buf);
329 1318 :
330 : /* Set input source */
331 : if (state->buffer_stack != NULL)
332 1318 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
333 0 : else
334 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
335 1318 :
336 : /* Set start state */
337 : state->start_state = INITIAL;
338 1318 :
339 : /* And lex. */
340 : lexresult = yylex(&lval, state->scanner);
341 1318 :
342 : /* Save start offset of word, if any. */
343 : if (lexresult)
344 1318 : {
345 : int lineno;
346 : int end_offset;
347 :
348 : psql_scan_get_location(state, &lineno, &end_offset);
349 1186 : *offset = end_offset - word_buf->len;
350 1186 : }
351 : else
352 : *offset = -1;
353 132 :
354 : /*
355 : * In case the caller returns to using the regular SQL lexer, reselect the
356 : * appropriate initial state.
357 : */
358 : psql_scan_reselect_sql_lexer(state);
359 1318 :
360 : return (bool) lexresult;
361 1318 : }
362 :
363 : /*
364 : * Prepare to lex an expression via expr_yyparse().
365 : *
366 : * Returns the yyscan_t that is to be passed to expr_yyparse().
367 : * (This is just state->scanner, but callers don't need to know that.)
368 : */
369 : yyscan_t
370 : expr_scanner_init(PsqlScanState state,
371 399 : const char *source, int lineno, int start_offset,
372 : const char *command)
373 : {
374 : /* Save error context info */
375 : expr_source = source;
376 399 : expr_lineno = lineno;
377 399 : expr_start_offset = start_offset;
378 399 : expr_command = command;
379 399 :
380 : /* Must be scanning already */
381 : Assert(state->scanbufhandle != NULL);
382 :
383 : /* Set current output target */
384 : state->output_buf = NULL;
385 399 :
386 : /* Set input source */
387 : if (state->buffer_stack != NULL)
388 399 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
389 0 : else
390 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
391 399 :
392 : /* Set start state */
393 : state->start_state = EXPR;
394 399 :
395 : return state->scanner;
396 399 : }
397 :
398 : /*
399 : * Finish lexing an expression.
400 : */
401 : void
402 : expr_scanner_finish(yyscan_t yyscanner)
403 380 : {
404 : PsqlScanState state = yyget_extra(yyscanner);
405 380 :
406 : /*
407 : * Reselect appropriate initial state for SQL lexer.
408 : */
409 : psql_scan_reselect_sql_lexer(state);
410 380 : }
411 380 :
412 : /*
413 : * Get a malloc'd copy of the lexer input string from start_offset
414 : * to end of current lexer token. If chomp is true, drop any trailing
415 : * newline(s).
416 : *
417 : * We rely on the knowledge that flex modifies the scan buffer by storing
418 : * a NUL at the end of the current token (yytext). Note that this might
419 : * not work quite right if we were parsing a sub-buffer, but since pgbench
420 : * never invokes that functionality, it doesn't matter. Also, this will
421 : * give the wrong answer (the whole remainder of the input) if called
422 : * before any yylex() call has been done.
423 : */
424 : char *
425 : expr_scanner_get_substring(PsqlScanState state,
426 530 : int start_offset,
427 : bool chomp)
428 : {
429 : char *result;
430 : const char *scanptr = state->scanbuf + start_offset;
431 530 : size_t slen = strlen(scanptr);
432 530 :
433 : if (chomp)
434 530 : {
435 : while (slen > 0 &&
436 1006 : (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
437 1006 : slen--;
438 476 : }
439 :
440 : result = (char *) pg_malloc(slen + 1);
441 530 : memcpy(result, scanptr, slen);
442 530 : result[slen] = '\0';
443 530 :
444 : return result;
445 530 : }
446 : /* END: function "expr_scanner_get_substring" */
|