Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * exprscan.l
5 : * lexical scanner for pgbench backslash commands
6 : *
7 : * This lexer supports two operating modes:
8 : *
9 : * In INITIAL state, just parse off whitespace-separated words (this mode
10 : * is basically equivalent to strtok(), which is what we used to use).
11 : *
12 : * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 : *
14 : * In either mode, stop upon hitting newline or end of string.
15 : *
16 : * Note that this lexer operates within the framework created by psqlscan.l,
17 : *
18 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
19 : * Portions Copyright (c) 1994, Regents of the University of California
20 : *
21 : * src/bin/pgbench/exprscan.l
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 : #include "postgres_fe.h"
26 :
27 : /*
28 : * NB: include exprparse.h only AFTER including pgbench.h, because pgbench.h
29 : * contains definitions needed for YYSTYPE. Likewise, pgbench.h must come after
30 : * psqlscan_int.h for yyscan_t.
31 : */
32 : #include "fe_utils/psqlscan_int.h"
33 : #include "pgbench.h"
34 : #include "exprparse.h"
35 : }
36 :
37 : %{
38 : /* context information for reporting errors in expressions */
39 : static const char *expr_source = NULL;
40 : static int expr_lineno = 0;
41 : static int expr_start_offset = 0;
42 : static const char *expr_command = NULL;
43 :
44 : /* indicates whether last yylex() call read a newline */
45 : static bool last_was_newline = false;
46 :
47 : /* LCOV_EXCL_START */
48 :
49 : %}
50 :
51 : /* Except for the prefix, these options should match psqlscan.l */
52 : %option reentrant
53 : %option bison-bridge
54 : %option 8bit
55 : %option never-interactive
56 : %option nodefault
57 : %option noinput
58 : %option nounput
59 : %option noyywrap
60 : %option warn
61 : %option prefix="expr_yy"
62 :
63 : /* Character classes */
64 : alpha [a-zA-Z\200-\377_]
65 : digit [0-9]
66 : alnum [A-Za-z\200-\377_0-9]
67 : /* {space} + {nonspace} + {newline} should cover all characters */
68 : space [ \t\r\f\v]
69 : nonspace [^ \t\r\f\v\n]
70 : newline [\n]
71 :
72 : /* Line continuation marker */
73 : continuation \\\r?{newline}
74 :
75 : /* case insensitive keywords */
76 : and [Aa][Nn][Dd]
77 : or [Oo][Rr]
78 : not [Nn][Oo][Tt]
79 : case [Cc][Aa][Ss][Ee]
80 : when [Ww][Hh][Ee][Nn]
81 : then [Tt][Hh][Ee][Nn]
82 : else [Ee][Ll][Ss][Ee]
83 : end [Ee][Nn][Dd]
84 : true [Tt][Rr][Uu][Ee]
85 : false [Ff][Aa][Ll][Ss][Ee]
86 : null [Nn][Uu][Ll][Ll]
87 : is [Ii][Ss]
88 : isnull [Ii][Ss][Nn][Uu][Ll][Ll]
89 : notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
90 :
91 : /* Exclusive states */
92 : %x EXPR
93 :
94 : %%
95 :
96 : %{
97 : /* Declare some local variables inside yylex(), for convenience */
98 : PsqlScanState cur_state = yyextra;
99 :
100 : /*
101 : * Force flex into the state indicated by start_state. This has a
102 : * couple of purposes: it lets some of the functions below set a new
103 : * starting state without ugly direct access to flex variables, and it
104 : * allows us to transition from one flex lexer to another so that we
105 : * can lex different parts of the source string using separate lexers.
106 : */
107 : BEGIN(cur_state->start_state);
108 :
109 : /* Reset was-newline flag */
110 : last_was_newline = false;
111 : %}
112 :
113 : /* INITIAL state */
114 :
115 : {nonspace}+ {
116 : /* Found a word, emit and return it */
117 : psqlscan_emit(cur_state, yytext, yyleng);
118 : return 1;
119 : }
120 :
121 : /*
122 : * We need this rule to avoid returning "word\" instead of recognizing
123 : * a continuation marker just after a word:
124 : */
125 : {nonspace}+{continuation} {
126 : /* Found "word\\\r?\n", emit and return just "word" */
127 : int wordlen = yyleng - 2;
128 : if (yytext[wordlen] == '\r')
129 : wordlen--;
130 : Assert(yytext[wordlen] == '\\');
131 : psqlscan_emit(cur_state, yytext, wordlen);
132 : return 1;
133 : }
134 :
135 : {space}+ { /* ignore */ }
136 :
137 : {continuation} { /* ignore */ }
138 :
139 : {newline} {
140 : /* report end of command */
141 : last_was_newline = true;
142 : return 0;
143 : }
144 :
145 : /* EXPR state */
146 :
147 : <EXPR>{
148 :
149 : "+" { return '+'; }
150 : "-" { return '-'; }
151 : "*" { return '*'; }
152 : "/" { return '/'; }
153 : "%" { return '%'; } /* C version, also in Pg SQL */
154 : "=" { return '='; }
155 : "<>" { return NE_OP; }
156 : "!=" { return NE_OP; } /* C version, also in Pg SQL */
157 : "<=" { return LE_OP; }
158 : ">=" { return GE_OP; }
159 : "<<" { return LS_OP; }
160 : ">>" { return RS_OP; }
161 : "<" { return '<'; }
162 : ">" { return '>'; }
163 : "|" { return '|'; }
164 : "&" { return '&'; }
165 : "#" { return '#'; }
166 : "~" { return '~'; }
167 :
168 : "(" { return '('; }
169 : ")" { return ')'; }
170 : "," { return ','; }
171 :
172 : {and} { return AND_OP; }
173 : {or} { return OR_OP; }
174 : {not} { return NOT_OP; }
175 : {is} { return IS_OP; }
176 : {isnull} { return ISNULL_OP; }
177 : {notnull} { return NOTNULL_OP; }
178 :
179 : {case} { return CASE_KW; }
180 : {when} { return WHEN_KW; }
181 : {then} { return THEN_KW; }
182 : {else} { return ELSE_KW; }
183 : {end} { return END_KW; }
184 :
185 : :{alnum}+ {
186 : yylval->str = pg_strdup(yytext + 1);
187 : return VARIABLE;
188 : }
189 :
190 : {null} { return NULL_CONST; }
191 : {true} {
192 : yylval->bval = true;
193 : return BOOLEAN_CONST;
194 : }
195 : {false} {
196 : yylval->bval = false;
197 : return BOOLEAN_CONST;
198 : }
199 : "9223372036854775808" {
200 : /*
201 : * Special handling for PG_INT64_MIN, which can't
202 : * accurately be represented here, as the minus sign is
203 : * lexed separately and INT64_MIN can't be represented as
204 : * a positive integer.
205 : */
206 : return MAXINT_PLUS_ONE_CONST;
207 : }
208 : {digit}+ {
209 : if (!strtoint64(yytext, true, &yylval->ival))
210 : expr_yyerror_more(yyscanner, "bigint constant overflow",
211 : strdup(yytext));
212 : return INTEGER_CONST;
213 : }
214 : {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
215 : if (!strtodouble(yytext, true, &yylval->dval))
216 : expr_yyerror_more(yyscanner, "double constant overflow",
217 : strdup(yytext));
218 : return DOUBLE_CONST;
219 : }
220 : \.{digit}+([eE][-+]?{digit}+)? {
221 : if (!strtodouble(yytext, true, &yylval->dval))
222 : expr_yyerror_more(yyscanner, "double constant overflow",
223 : strdup(yytext));
224 : return DOUBLE_CONST;
225 : }
226 : {alpha}{alnum}* {
227 : yylval->str = pg_strdup(yytext);
228 : return FUNCTION;
229 : }
230 :
231 : {space}+ { /* ignore */ }
232 :
233 : {continuation} { /* ignore */ }
234 :
235 : {newline} {
236 : /* report end of command */
237 : last_was_newline = true;
238 : return 0;
239 : }
240 :
241 : . {
242 : /*
243 : * must strdup yytext so that expr_yyerror_more doesn't
244 : * change it while finding end of line
245 : */
246 : expr_yyerror_more(yyscanner, "unexpected character",
247 : pg_strdup(yytext));
248 : /* NOTREACHED, syntax_error calls exit() */
249 : return 0;
250 : }
251 :
252 : }
253 :
254 : <<EOF>> {
255 : if (cur_state->buffer_stack == NULL)
256 : return 0; /* end of input reached */
257 :
258 : /*
259 : * We were expanding a variable, so pop the inclusion
260 : * stack and keep lexing
261 : */
262 : psqlscan_pop_buffer_stack(cur_state);
263 : psqlscan_select_top_buffer(cur_state);
264 : }
265 :
266 : %%
267 :
268 : /* LCOV_EXCL_STOP */
269 :
270 : void
271 : expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
272 38 : {
273 : PsqlScanState state = yyget_extra(yyscanner);
274 38 : int error_detection_offset = expr_scanner_offset(state) - 1;
275 38 : YYSTYPE lval;
276 : char *full_line;
277 :
278 : /*
279 : * While parsing an expression, we may not have collected the whole line
280 : * yet from the input source. Lex till EOL so we can report whole line.
281 : * (If we're at EOF, it's okay to call yylex() an extra time.)
282 : */
283 : if (!last_was_newline)
284 38 : {
285 : while (yylex(&lval, yyscanner))
286 50 : /* skip */ ;
287 : }
288 :
289 : /* Extract the line, trimming trailing newline if any */
290 : full_line = expr_scanner_get_substring(state,
291 38 : expr_start_offset,
292 : expr_scanner_offset(state),
293 : true);
294 :
295 : syntax_error(expr_source, expr_lineno, full_line, expr_command,
296 38 : message, more, error_detection_offset - expr_start_offset);
297 : }
298 :
299 : /*
300 : * (The first argument is enforced by Bison to match the first argument of
301 : * yyparse(), but it is not used here.)
302 : */
303 : void
304 : expr_yyerror(PgBenchExpr **expr_parse_result_p, yyscan_t yyscanner, const char *message)
305 12 : {
306 : expr_yyerror_more(yyscanner, message, NULL);
307 12 : }
308 :
309 : /*
310 : * Collect a space-separated word from a backslash command and return it
311 : * in word_buf, along with its starting string offset in *offset.
312 : * Returns true if successful, false if at end of command.
313 : */
314 : bool
315 : expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
316 2616 : {
317 : int lexresult;
318 : YYSTYPE lval;
319 :
320 : /* Must be scanning already */
321 : Assert(state->scanbufhandle != NULL);
322 :
323 : /* Set current output target */
324 : state->output_buf = word_buf;
325 2616 : resetPQExpBuffer(word_buf);
326 2616 :
327 : /* Set input source */
328 : if (state->buffer_stack != NULL)
329 2616 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
330 0 : else
331 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
332 2616 :
333 : /* Set start state */
334 : state->start_state = INITIAL;
335 2616 :
336 : /* And lex. */
337 : lexresult = yylex(&lval, state->scanner);
338 2616 :
339 : /*
340 : * Save start offset of word, if any. We could do this more efficiently,
341 : * but for now this seems fine.
342 : */
343 : if (lexresult)
344 2616 : *offset = expr_scanner_offset(state) - word_buf->len;
345 2360 : else
346 : *offset = -1;
347 256 :
348 : /*
349 : * In case the caller returns to using the regular SQL lexer, reselect the
350 : * appropriate initial state.
351 : */
352 : psql_scan_reselect_sql_lexer(state);
353 2616 :
354 : return (bool) lexresult;
355 2616 : }
356 :
357 : /*
358 : * Prepare to lex an expression via expr_yyparse().
359 : *
360 : * Returns the yyscan_t that is to be passed to expr_yyparse().
361 : * (This is just state->scanner, but callers don't need to know that.)
362 : */
363 : yyscan_t
364 : expr_scanner_init(PsqlScanState state,
365 794 : const char *source, int lineno, int start_offset,
366 : const char *command)
367 : {
368 : /* Save error context info */
369 : expr_source = source;
370 794 : expr_lineno = lineno;
371 794 : expr_start_offset = start_offset;
372 794 : expr_command = command;
373 794 :
374 : /* Must be scanning already */
375 : Assert(state->scanbufhandle != NULL);
376 :
377 : /* Set current output target */
378 : state->output_buf = NULL;
379 794 :
380 : /* Set input source */
381 : if (state->buffer_stack != NULL)
382 794 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
383 0 : else
384 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
385 794 :
386 : /* Set start state */
387 : state->start_state = EXPR;
388 794 :
389 : return state->scanner;
390 794 : }
391 :
392 : /*
393 : * Finish lexing an expression.
394 : */
395 : void
396 : expr_scanner_finish(yyscan_t yyscanner)
397 756 : {
398 : PsqlScanState state = yyget_extra(yyscanner);
399 756 :
400 : /*
401 : * Reselect appropriate initial state for SQL lexer.
402 : */
403 : psql_scan_reselect_sql_lexer(state);
404 756 : }
405 756 :
406 : /*
407 : * Get offset from start of string to end of current lexer token.
408 : *
409 : * We rely on the knowledge that flex modifies the scan buffer by storing
410 : * a NUL at the end of the current token (yytext). Note that this might
411 : * not work quite right if we were parsing a sub-buffer, but since pgbench
412 : * never invokes that functionality, it doesn't matter.
413 : */
414 : int
415 : expr_scanner_offset(PsqlScanState state)
416 5060 : {
417 : return strlen(state->scanbuf);
418 5060 : }
419 :
420 : /*
421 : * Get a malloc'd copy of the lexer input string from start_offset
422 : * to just before end_offset. If chomp is true, drop any trailing
423 : * newline(s).
424 : */
425 : char *
426 : expr_scanner_get_substring(PsqlScanState state,
427 1048 : int start_offset, int end_offset,
428 : bool chomp)
429 : {
430 : char *result;
431 : const char *scanptr = state->scanbuf + start_offset;
432 1048 : int slen = end_offset - start_offset;
433 1048 :
434 : Assert(slen >= 0);
435 : Assert(end_offset <= strlen(state->scanbuf));
436 :
437 : if (chomp)
438 1048 : {
439 : while (slen > 0 &&
440 1988 : (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
441 1988 : slen--;
442 940 : }
443 :
444 : result = (char *) pg_malloc(slen + 1);
445 1048 : memcpy(result, scanptr, slen);
446 1048 : result[slen] = '\0';
447 1048 :
448 : return result;
449 1048 : }
450 :
451 : /*
452 : * Get the line number associated with the given string offset
453 : * (which must not be past the end of where we've lexed to).
454 : */
455 : int
456 : expr_scanner_get_lineno(PsqlScanState state, int offset)
457 3358 : {
458 : int lineno = 1;
459 3358 : const char *p = state->scanbuf;
460 3358 :
461 : while (*p && offset > 0)
462 1597282 : {
463 : if (*p == '\n')
464 1593924 : lineno++;
465 43458 : p++, offset--;
466 1593924 : }
467 : return lineno;
468 3358 : }
|