Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * exprscan.l
5 : * lexical scanner for pgbench backslash commands
6 : *
7 : * This lexer supports two operating modes:
8 : *
9 : * In INITIAL state, just parse off whitespace-separated words (this mode
10 : * is basically equivalent to strtok(), which is what we used to use).
11 : *
12 : * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 : *
14 : * In either mode, stop upon hitting newline or end of string.
15 : *
16 : * Note that this lexer operates within the framework created by psqlscan.l,
17 : *
18 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
19 : * Portions Copyright (c) 1994, Regents of the University of California
20 : *
21 : * src/bin/pgbench/exprscan.l
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 : #include "postgres_fe.h"
26 :
27 : /*
28 : * NB: include exprparse.h only AFTER including pgbench.h, because pgbench.h
29 : * contains definitions needed for YYSTYPE. Likewise, pgbench.h must come after
30 : * psqlscan_int.h for yyscan_t.
31 : */
32 : #include "fe_utils/psqlscan_int.h"
33 : #include "pgbench.h"
34 : #include "exprparse.h"
35 : }
36 :
37 : %{
38 : /* context information for reporting errors in expressions */
39 : static const char *expr_source = NULL;
40 : static int expr_lineno = 0;
41 : static int expr_start_offset = 0;
42 : static const char *expr_command = NULL;
43 :
44 : /* indicates whether last yylex() call read a newline */
45 : static bool last_was_newline = false;
46 :
47 : /* LCOV_EXCL_START */
48 :
49 : %}
50 :
51 : /* Except for the prefix, these options should match psqlscan.l */
52 : %option reentrant
53 : %option bison-bridge
54 : %option 8bit
55 : %option never-interactive
56 : %option nodefault
57 : %option noinput
58 : %option nounput
59 : %option noyywrap
60 : %option warn
61 : %option prefix="expr_yy"
62 :
63 : /* Character classes */
64 : alpha [a-zA-Z\200-\377_]
65 : digit [0-9]
66 : alnum [A-Za-z\200-\377_0-9]
67 : /* {space} + {nonspace} + {newline} should cover all characters */
68 : space [ \t\r\f\v]
69 : nonspace [^ \t\r\f\v\n]
70 : newline [\n]
71 :
72 : /* Line continuation marker */
73 : continuation \\\r?{newline}
74 :
75 : /* case insensitive keywords */
76 : and [Aa][Nn][Dd]
77 : or [Oo][Rr]
78 : not [Nn][Oo][Tt]
79 : case [Cc][Aa][Ss][Ee]
80 : when [Ww][Hh][Ee][Nn]
81 : then [Tt][Hh][Ee][Nn]
82 : else [Ee][Ll][Ss][Ee]
83 : end [Ee][Nn][Dd]
84 : true [Tt][Rr][Uu][Ee]
85 : false [Ff][Aa][Ll][Ss][Ee]
86 : null [Nn][Uu][Ll][Ll]
87 : is [Ii][Ss]
88 : isnull [Ii][Ss][Nn][Uu][Ll][Ll]
89 : notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
90 :
91 : /* Exclusive states */
92 : %x EXPR
93 :
94 : %%
95 :
96 : %{
97 : /* Declare some local variables inside yylex(), for convenience */
98 : PsqlScanState cur_state = yyextra;
99 :
100 : /*
101 : * Force flex into the state indicated by start_state. This has a
102 : * couple of purposes: it lets some of the functions below set a new
103 : * starting state without ugly direct access to flex variables, and it
104 : * allows us to transition from one flex lexer to another so that we
105 : * can lex different parts of the source string using separate lexers.
106 : */
107 : BEGIN(cur_state->start_state);
108 :
109 : /* Reset was-newline flag */
110 : last_was_newline = false;
111 : %}
112 :
113 : /* INITIAL state */
114 :
115 : {nonspace}+ {
116 : /* Found a word, emit and return it */
117 : psqlscan_emit(cur_state, yytext, yyleng);
118 : return 1;
119 : }
120 :
121 : /*
122 : * We need this rule to avoid returning "word\" instead of recognizing
123 : * a continuation marker just after a word:
124 : */
125 : {nonspace}+{continuation} {
126 : /* Found "word\\\r?\n", emit and return just "word" */
127 : int wordlen = yyleng - 2;
128 : if (yytext[wordlen] == '\r')
129 : wordlen--;
130 : Assert(yytext[wordlen] == '\\');
131 : psqlscan_emit(cur_state, yytext, wordlen);
132 : return 1;
133 : }
134 :
135 : {space}+ { /* ignore */ }
136 :
137 : {continuation} { /* ignore */ }
138 :
139 : {newline} {
140 : /* report end of command */
141 : last_was_newline = true;
142 : return 0;
143 : }
144 :
145 : /* EXPR state */
146 :
147 : <EXPR>{
148 :
149 : "+" { return '+'; }
150 : "-" { return '-'; }
151 : "*" { return '*'; }
152 : "/" { return '/'; }
153 : "%" { return '%'; } /* C version, also in Pg SQL */
154 : "=" { return '='; }
155 : "<>" { return NE_OP; }
156 : "!=" { return NE_OP; } /* C version, also in Pg SQL */
157 : "<=" { return LE_OP; }
158 : ">=" { return GE_OP; }
159 : "<<" { return LS_OP; }
160 : ">>" { return RS_OP; }
161 : "<" { return '<'; }
162 : ">" { return '>'; }
163 : "|" { return '|'; }
164 : "&" { return '&'; }
165 : "#" { return '#'; }
166 : "~" { return '~'; }
167 :
168 : "(" { return '('; }
169 : ")" { return ')'; }
170 : "," { return ','; }
171 :
172 : {and} { return AND_OP; }
173 : {or} { return OR_OP; }
174 : {not} { return NOT_OP; }
175 : {is} { return IS_OP; }
176 : {isnull} { return ISNULL_OP; }
177 : {notnull} { return NOTNULL_OP; }
178 :
179 : {case} { return CASE_KW; }
180 : {when} { return WHEN_KW; }
181 : {then} { return THEN_KW; }
182 : {else} { return ELSE_KW; }
183 : {end} { return END_KW; }
184 :
185 : :{alnum}+ {
186 : yylval->str = pg_strdup(yytext + 1);
187 : return VARIABLE;
188 : }
189 :
190 : {null} { return NULL_CONST; }
191 : {true} {
192 : yylval->bval = true;
193 : return BOOLEAN_CONST;
194 : }
195 : {false} {
196 : yylval->bval = false;
197 : return BOOLEAN_CONST;
198 : }
199 : "9223372036854775808" {
200 : /*
201 : * Special handling for PG_INT64_MIN, which can't
202 : * accurately be represented here, as the minus sign is
203 : * lexed separately and INT64_MIN can't be represented as
204 : * a positive integer.
205 : */
206 : return MAXINT_PLUS_ONE_CONST;
207 : }
208 : {digit}+ {
209 : if (!strtoint64(yytext, true, &yylval->ival))
210 : expr_yyerror_more(yyscanner, "bigint constant overflow",
211 : strdup(yytext));
212 : return INTEGER_CONST;
213 : }
214 : {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
215 : if (!strtodouble(yytext, true, &yylval->dval))
216 : expr_yyerror_more(yyscanner, "double constant overflow",
217 : strdup(yytext));
218 : return DOUBLE_CONST;
219 : }
220 : \.{digit}+([eE][-+]?{digit}+)? {
221 : if (!strtodouble(yytext, true, &yylval->dval))
222 : expr_yyerror_more(yyscanner, "double constant overflow",
223 : strdup(yytext));
224 : return DOUBLE_CONST;
225 : }
226 : {alpha}{alnum}* {
227 : yylval->str = pg_strdup(yytext);
228 : return FUNCTION;
229 : }
230 :
231 : {space}+ { /* ignore */ }
232 :
233 : {continuation} { /* ignore */ }
234 :
235 : {newline} {
236 : /* report end of command */
237 : last_was_newline = true;
238 : return 0;
239 : }
240 :
241 : . {
242 : /*
243 : * must strdup yytext so that expr_yyerror_more doesn't
244 : * change it while finding end of line
245 : */
246 : expr_yyerror_more(yyscanner, "unexpected character",
247 : pg_strdup(yytext));
248 : /* NOTREACHED, syntax_error calls exit() */
249 : return 0;
250 : }
251 :
252 : }
253 :
254 : <<EOF>> {
255 : if (cur_state->buffer_stack == NULL)
256 : return 0; /* end of input reached */
257 :
258 : /*
259 : * We were expanding a variable, so pop the inclusion
260 : * stack and keep lexing
261 : */
262 : psqlscan_pop_buffer_stack(cur_state);
263 : psqlscan_select_top_buffer(cur_state);
264 : }
265 :
266 : %%
267 :
268 : /* LCOV_EXCL_STOP */
269 :
270 : void
271 : expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
272 38 : {
273 : PsqlScanState state = yyget_extra(yyscanner);
274 38 : int error_detection_offset = expr_scanner_offset(state) - 1;
275 38 : YYSTYPE lval;
276 : char *full_line;
277 :
278 : /*
279 : * While parsing an expression, we may not have collected the whole line
280 : * yet from the input source. Lex till EOL so we can report whole line.
281 : * (If we're at EOF, it's okay to call yylex() an extra time.)
282 : */
283 : if (!last_was_newline)
284 38 : {
285 : while (yylex(&lval, yyscanner))
286 50 : /* skip */ ;
287 : }
288 :
289 : /* Extract the line, trimming trailing newline if any */
290 : full_line = expr_scanner_get_substring(state,
291 38 : expr_start_offset,
292 : expr_scanner_offset(state),
293 : true);
294 :
295 : syntax_error(expr_source, expr_lineno, full_line, expr_command,
296 38 : message, more, error_detection_offset - expr_start_offset);
297 : }
298 :
299 : void
300 : expr_yyerror(yyscan_t yyscanner, const char *message)
301 12 : {
302 : expr_yyerror_more(yyscanner, message, NULL);
303 12 : }
304 :
305 : /*
306 : * Collect a space-separated word from a backslash command and return it
307 : * in word_buf, along with its starting string offset in *offset.
308 : * Returns true if successful, false if at end of command.
309 : */
310 : bool
311 : expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
312 2616 : {
313 : int lexresult;
314 : YYSTYPE lval;
315 :
316 : /* Must be scanning already */
317 : Assert(state->scanbufhandle != NULL);
318 :
319 : /* Set current output target */
320 : state->output_buf = word_buf;
321 2616 : resetPQExpBuffer(word_buf);
322 2616 :
323 : /* Set input source */
324 : if (state->buffer_stack != NULL)
325 2616 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
326 0 : else
327 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
328 2616 :
329 : /* Set start state */
330 : state->start_state = INITIAL;
331 2616 :
332 : /* And lex. */
333 : lexresult = yylex(&lval, state->scanner);
334 2616 :
335 : /*
336 : * Save start offset of word, if any. We could do this more efficiently,
337 : * but for now this seems fine.
338 : */
339 : if (lexresult)
340 2616 : *offset = expr_scanner_offset(state) - word_buf->len;
341 2360 : else
342 : *offset = -1;
343 256 :
344 : /*
345 : * In case the caller returns to using the regular SQL lexer, reselect the
346 : * appropriate initial state.
347 : */
348 : psql_scan_reselect_sql_lexer(state);
349 2616 :
350 : return (bool) lexresult;
351 2616 : }
352 :
353 : /*
354 : * Prepare to lex an expression via expr_yyparse().
355 : *
356 : * Returns the yyscan_t that is to be passed to expr_yyparse().
357 : * (This is just state->scanner, but callers don't need to know that.)
358 : */
359 : yyscan_t
360 : expr_scanner_init(PsqlScanState state,
361 794 : const char *source, int lineno, int start_offset,
362 : const char *command)
363 : {
364 : /* Save error context info */
365 : expr_source = source;
366 794 : expr_lineno = lineno;
367 794 : expr_start_offset = start_offset;
368 794 : expr_command = command;
369 794 :
370 : /* Must be scanning already */
371 : Assert(state->scanbufhandle != NULL);
372 :
373 : /* Set current output target */
374 : state->output_buf = NULL;
375 794 :
376 : /* Set input source */
377 : if (state->buffer_stack != NULL)
378 794 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
379 0 : else
380 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
381 794 :
382 : /* Set start state */
383 : state->start_state = EXPR;
384 794 :
385 : return state->scanner;
386 794 : }
387 :
388 : /*
389 : * Finish lexing an expression.
390 : */
391 : void
392 : expr_scanner_finish(yyscan_t yyscanner)
393 756 : {
394 : PsqlScanState state = yyget_extra(yyscanner);
395 756 :
396 : /*
397 : * Reselect appropriate initial state for SQL lexer.
398 : */
399 : psql_scan_reselect_sql_lexer(state);
400 756 : }
401 756 :
402 : /*
403 : * Get offset from start of string to end of current lexer token.
404 : *
405 : * We rely on the knowledge that flex modifies the scan buffer by storing
406 : * a NUL at the end of the current token (yytext). Note that this might
407 : * not work quite right if we were parsing a sub-buffer, but since pgbench
408 : * never invokes that functionality, it doesn't matter.
409 : */
410 : int
411 : expr_scanner_offset(PsqlScanState state)
412 5060 : {
413 : return strlen(state->scanbuf);
414 5060 : }
415 :
416 : /*
417 : * Get a malloc'd copy of the lexer input string from start_offset
418 : * to just before end_offset. If chomp is true, drop any trailing
419 : * newline(s).
420 : */
421 : char *
422 : expr_scanner_get_substring(PsqlScanState state,
423 1048 : int start_offset, int end_offset,
424 : bool chomp)
425 : {
426 : char *result;
427 : const char *scanptr = state->scanbuf + start_offset;
428 1048 : int slen = end_offset - start_offset;
429 1048 :
430 : Assert(slen >= 0);
431 : Assert(end_offset <= strlen(state->scanbuf));
432 :
433 : if (chomp)
434 1048 : {
435 : while (slen > 0 &&
436 1988 : (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
437 1988 : slen--;
438 940 : }
439 :
440 : result = (char *) pg_malloc(slen + 1);
441 1048 : memcpy(result, scanptr, slen);
442 1048 : result[slen] = '\0';
443 1048 :
444 : return result;
445 1048 : }
446 :
447 : /*
448 : * Get the line number associated with the given string offset
449 : * (which must not be past the end of where we've lexed to).
450 : */
451 : int
452 : expr_scanner_get_lineno(PsqlScanState state, int offset)
453 3358 : {
454 : int lineno = 1;
455 3358 : const char *p = state->scanbuf;
456 3358 :
457 : while (*p && offset > 0)
458 1597282 : {
459 : if (*p == '\n')
460 1593924 : lineno++;
461 43458 : p++, offset--;
462 1593924 : }
463 : return lineno;
464 3358 : }
|