Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * exprscan.l
5 : * lexical scanner for pgbench backslash commands
6 : *
7 : * This lexer supports two operating modes:
8 : *
9 : * In INITIAL state, just parse off whitespace-separated words (this mode
10 : * is basically equivalent to strtok(), which is what we used to use).
11 : *
12 : * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 : *
14 : * In either mode, stop upon hitting newline or end of string.
15 : *
16 : * Note that this lexer operates within the framework created by psqlscan.l,
17 : *
18 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
19 : * Portions Copyright (c) 1994, Regents of the University of California
20 : *
21 : * src/bin/pgbench/exprscan.l
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 : #include "postgres_fe.h"
26 :
27 : /*
28 : * NB: include exprparse.h only AFTER including pgbench.h, because pgbench.h
29 : * contains definitions needed for YYSTYPE. Likewise, pgbench.h must come after
30 : * psqlscan_int.h for yyscan_t.
31 : */
32 : #include "fe_utils/psqlscan_int.h"
33 : #include "pgbench.h"
34 : #include "exprparse.h"
35 : }
36 :
37 : %{
38 : /* context information for reporting errors in expressions */
39 : static const char *expr_source = NULL;
40 : static int expr_lineno = 0;
41 : static int expr_start_offset = 0;
42 : static const char *expr_command = NULL;
43 :
44 : /* indicates whether last yylex() call read a newline */
45 : static bool last_was_newline = false;
46 :
47 : /*
48 : * Work around a bug in flex 2.5.35: it emits a couple of functions that
49 : * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
50 : * this would cause warnings. Providing our own declarations should be
51 : * harmless even when the bug gets fixed.
52 : */
53 : extern int expr_yyget_column(yyscan_t yyscanner);
54 : extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
55 :
56 : /* LCOV_EXCL_START */
57 :
58 : %}
59 :
60 : /* Except for the prefix, these options should match psqlscan.l */
61 : %option reentrant
62 : %option bison-bridge
63 : %option 8bit
64 : %option never-interactive
65 : %option nodefault
66 : %option noinput
67 : %option nounput
68 : %option noyywrap
69 : %option warn
70 : %option prefix="expr_yy"
71 :
72 : /* Character classes */
73 : alpha [a-zA-Z\200-\377_]
74 : digit [0-9]
75 : alnum [A-Za-z\200-\377_0-9]
76 : /* {space} + {nonspace} + {newline} should cover all characters */
77 : space [ \t\r\f\v]
78 : nonspace [^ \t\r\f\v\n]
79 : newline [\n]
80 :
81 : /* Line continuation marker */
82 : continuation \\\r?{newline}
83 :
84 : /* case insensitive keywords */
85 : and [Aa][Nn][Dd]
86 : or [Oo][Rr]
87 : not [Nn][Oo][Tt]
88 : case [Cc][Aa][Ss][Ee]
89 : when [Ww][Hh][Ee][Nn]
90 : then [Tt][Hh][Ee][Nn]
91 : else [Ee][Ll][Ss][Ee]
92 : end [Ee][Nn][Dd]
93 : true [Tt][Rr][Uu][Ee]
94 : false [Ff][Aa][Ll][Ss][Ee]
95 : null [Nn][Uu][Ll][Ll]
96 : is [Ii][Ss]
97 : isnull [Ii][Ss][Nn][Uu][Ll][Ll]
98 : notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
99 :
100 : /* Exclusive states */
101 : %x EXPR
102 :
103 : %%
104 :
105 : %{
106 : /* Declare some local variables inside yylex(), for convenience */
107 : PsqlScanState cur_state = yyextra;
108 :
109 : /*
110 : * Force flex into the state indicated by start_state. This has a
111 : * couple of purposes: it lets some of the functions below set a new
112 : * starting state without ugly direct access to flex variables, and it
113 : * allows us to transition from one flex lexer to another so that we
114 : * can lex different parts of the source string using separate lexers.
115 : */
116 : BEGIN(cur_state->start_state);
117 :
118 : /* Reset was-newline flag */
119 : last_was_newline = false;
120 : %}
121 :
122 : /* INITIAL state */
123 :
124 : {nonspace}+ {
125 : /* Found a word, emit and return it */
126 : psqlscan_emit(cur_state, yytext, yyleng);
127 : return 1;
128 : }
129 :
130 : /*
131 : * We need this rule to avoid returning "word\" instead of recognizing
132 : * a continuation marker just after a word:
133 : */
134 : {nonspace}+{continuation} {
135 : /* Found "word\\\r?\n", emit and return just "word" */
136 : int wordlen = yyleng - 2;
137 : if (yytext[wordlen] == '\r')
138 : wordlen--;
139 : Assert(yytext[wordlen] == '\\');
140 : psqlscan_emit(cur_state, yytext, wordlen);
141 : return 1;
142 : }
143 :
144 : {space}+ { /* ignore */ }
145 :
146 : {continuation} { /* ignore */ }
147 :
148 : {newline} {
149 : /* report end of command */
150 : last_was_newline = true;
151 : return 0;
152 : }
153 :
154 : /* EXPR state */
155 :
156 : <EXPR>{
157 :
158 : "+" { return '+'; }
159 : "-" { return '-'; }
160 : "*" { return '*'; }
161 : "/" { return '/'; }
162 : "%" { return '%'; } /* C version, also in Pg SQL */
163 : "=" { return '='; }
164 : "<>" { return NE_OP; }
165 : "!=" { return NE_OP; } /* C version, also in Pg SQL */
166 : "<=" { return LE_OP; }
167 : ">=" { return GE_OP; }
168 : "<<" { return LS_OP; }
169 : ">>" { return RS_OP; }
170 : "<" { return '<'; }
171 : ">" { return '>'; }
172 : "|" { return '|'; }
173 : "&" { return '&'; }
174 : "#" { return '#'; }
175 : "~" { return '~'; }
176 :
177 : "(" { return '('; }
178 : ")" { return ')'; }
179 : "," { return ','; }
180 :
181 : {and} { return AND_OP; }
182 : {or} { return OR_OP; }
183 : {not} { return NOT_OP; }
184 : {is} { return IS_OP; }
185 : {isnull} { return ISNULL_OP; }
186 : {notnull} { return NOTNULL_OP; }
187 :
188 : {case} { return CASE_KW; }
189 : {when} { return WHEN_KW; }
190 : {then} { return THEN_KW; }
191 : {else} { return ELSE_KW; }
192 : {end} { return END_KW; }
193 :
194 : :{alnum}+ {
195 : yylval->str = pg_strdup(yytext + 1);
196 : return VARIABLE;
197 : }
198 :
199 : {null} { return NULL_CONST; }
200 : {true} {
201 : yylval->bval = true;
202 : return BOOLEAN_CONST;
203 : }
204 : {false} {
205 : yylval->bval = false;
206 : return BOOLEAN_CONST;
207 : }
208 : "9223372036854775808" {
209 : /*
210 : * Special handling for PG_INT64_MIN, which can't
211 : * accurately be represented here, as the minus sign is
212 : * lexed separately and INT64_MIN can't be represented as
213 : * a positive integer.
214 : */
215 : return MAXINT_PLUS_ONE_CONST;
216 : }
217 : {digit}+ {
218 : if (!strtoint64(yytext, true, &yylval->ival))
219 : expr_yyerror_more(yyscanner, "bigint constant overflow",
220 : strdup(yytext));
221 : return INTEGER_CONST;
222 : }
223 : {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
224 : if (!strtodouble(yytext, true, &yylval->dval))
225 : expr_yyerror_more(yyscanner, "double constant overflow",
226 : strdup(yytext));
227 : return DOUBLE_CONST;
228 : }
229 : \.{digit}+([eE][-+]?{digit}+)? {
230 : if (!strtodouble(yytext, true, &yylval->dval))
231 : expr_yyerror_more(yyscanner, "double constant overflow",
232 : strdup(yytext));
233 : return DOUBLE_CONST;
234 : }
235 : {alpha}{alnum}* {
236 : yylval->str = pg_strdup(yytext);
237 : return FUNCTION;
238 : }
239 :
240 : {space}+ { /* ignore */ }
241 :
242 : {continuation} { /* ignore */ }
243 :
244 : {newline} {
245 : /* report end of command */
246 : last_was_newline = true;
247 : return 0;
248 : }
249 :
250 : . {
251 : /*
252 : * must strdup yytext so that expr_yyerror_more doesn't
253 : * change it while finding end of line
254 : */
255 : expr_yyerror_more(yyscanner, "unexpected character",
256 : pg_strdup(yytext));
257 : /* NOTREACHED, syntax_error calls exit() */
258 : return 0;
259 : }
260 :
261 : }
262 :
263 : <<EOF>> {
264 : if (cur_state->buffer_stack == NULL)
265 : return 0; /* end of input reached */
266 :
267 : /*
268 : * We were expanding a variable, so pop the inclusion
269 : * stack and keep lexing
270 : */
271 : psqlscan_pop_buffer_stack(cur_state);
272 : psqlscan_select_top_buffer(cur_state);
273 : }
274 :
275 : %%
276 :
277 : /* LCOV_EXCL_STOP */
278 :
279 : void
280 : expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
281 38 : {
282 : PsqlScanState state = yyget_extra(yyscanner);
283 38 : int error_detection_offset = expr_scanner_offset(state) - 1;
284 38 : YYSTYPE lval;
285 : char *full_line;
286 :
287 : /*
288 : * While parsing an expression, we may not have collected the whole line
289 : * yet from the input source. Lex till EOL so we can report whole line.
290 : * (If we're at EOF, it's okay to call yylex() an extra time.)
291 : */
292 : if (!last_was_newline)
293 38 : {
294 : while (yylex(&lval, yyscanner))
295 50 : /* skip */ ;
296 : }
297 :
298 : /* Extract the line, trimming trailing newline if any */
299 : full_line = expr_scanner_get_substring(state,
300 38 : expr_start_offset,
301 : expr_scanner_offset(state),
302 : true);
303 :
304 : syntax_error(expr_source, expr_lineno, full_line, expr_command,
305 38 : message, more, error_detection_offset - expr_start_offset);
306 : }
307 :
308 : void
309 : expr_yyerror(yyscan_t yyscanner, const char *message)
310 12 : {
311 : expr_yyerror_more(yyscanner, message, NULL);
312 12 : }
313 :
314 : /*
315 : * Collect a space-separated word from a backslash command and return it
316 : * in word_buf, along with its starting string offset in *offset.
317 : * Returns true if successful, false if at end of command.
318 : */
319 : bool
320 : expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
321 2462 : {
322 : int lexresult;
323 : YYSTYPE lval;
324 :
325 : /* Must be scanning already */
326 : Assert(state->scanbufhandle != NULL);
327 :
328 : /* Set current output target */
329 : state->output_buf = word_buf;
330 2462 : resetPQExpBuffer(word_buf);
331 2462 :
332 : /* Set input source */
333 : if (state->buffer_stack != NULL)
334 2462 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
335 0 : else
336 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
337 2462 :
338 : /* Set start state */
339 : state->start_state = INITIAL;
340 2462 :
341 : /* And lex. */
342 : lexresult = yylex(&lval, state->scanner);
343 2462 :
344 : /*
345 : * Save start offset of word, if any. We could do this more efficiently,
346 : * but for now this seems fine.
347 : */
348 : if (lexresult)
349 2462 : *offset = expr_scanner_offset(state) - word_buf->len;
350 2272 : else
351 : *offset = -1;
352 190 :
353 : /*
354 : * In case the caller returns to using the regular SQL lexer, reselect the
355 : * appropriate initial state.
356 : */
357 : psql_scan_reselect_sql_lexer(state);
358 2462 :
359 : return (bool) lexresult;
360 2462 : }
361 :
362 : /*
363 : * Prepare to lex an expression via expr_yyparse().
364 : *
365 : * Returns the yyscan_t that is to be passed to expr_yyparse().
366 : * (This is just state->scanner, but callers don't need to know that.)
367 : */
368 : yyscan_t
369 : expr_scanner_init(PsqlScanState state,
370 772 : const char *source, int lineno, int start_offset,
371 : const char *command)
372 : {
373 : /* Save error context info */
374 : expr_source = source;
375 772 : expr_lineno = lineno;
376 772 : expr_start_offset = start_offset;
377 772 : expr_command = command;
378 772 :
379 : /* Must be scanning already */
380 : Assert(state->scanbufhandle != NULL);
381 :
382 : /* Set current output target */
383 : state->output_buf = NULL;
384 772 :
385 : /* Set input source */
386 : if (state->buffer_stack != NULL)
387 772 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
388 0 : else
389 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
390 772 :
391 : /* Set start state */
392 : state->start_state = EXPR;
393 772 :
394 : return state->scanner;
395 772 : }
396 :
397 : /*
398 : * Finish lexing an expression.
399 : */
400 : void
401 : expr_scanner_finish(yyscan_t yyscanner)
402 734 : {
403 : PsqlScanState state = yyget_extra(yyscanner);
404 734 :
405 : /*
406 : * Reselect appropriate initial state for SQL lexer.
407 : */
408 : psql_scan_reselect_sql_lexer(state);
409 734 : }
410 734 :
411 : /*
412 : * Get offset from start of string to end of current lexer token.
413 : *
414 : * We rely on the knowledge that flex modifies the scan buffer by storing
415 : * a NUL at the end of the current token (yytext). Note that this might
416 : * not work quite right if we were parsing a sub-buffer, but since pgbench
417 : * never invokes that functionality, it doesn't matter.
418 : */
419 : int
420 : expr_scanner_offset(PsqlScanState state)
421 4774 : {
422 : return strlen(state->scanbuf);
423 4774 : }
424 :
425 : /*
426 : * Get a malloc'd copy of the lexer input string from start_offset
427 : * to just before end_offset. If chomp is true, drop any trailing
428 : * newline(s).
429 : */
430 : char *
431 : expr_scanner_get_substring(PsqlScanState state,
432 960 : int start_offset, int end_offset,
433 : bool chomp)
434 : {
435 : char *result;
436 : const char *scanptr = state->scanbuf + start_offset;
437 960 : int slen = end_offset - start_offset;
438 960 :
439 : Assert(slen >= 0);
440 : Assert(end_offset <= strlen(state->scanbuf));
441 :
442 : if (chomp)
443 960 : {
444 : while (slen > 0 &&
445 1812 : (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
446 1812 : slen--;
447 852 : }
448 :
449 : result = (char *) pg_malloc(slen + 1);
450 960 : memcpy(result, scanptr, slen);
451 960 : result[slen] = '\0';
452 960 :
453 : return result;
454 960 : }
455 :
456 : /*
457 : * Get the line number associated with the given string offset
458 : * (which must not be past the end of where we've lexed to).
459 : */
460 : int
461 : expr_scanner_get_lineno(PsqlScanState state, int offset)
462 3090 : {
463 : int lineno = 1;
464 3090 : const char *p = state->scanbuf;
465 3090 :
466 : while (*p && offset > 0)
467 1553248 : {
468 : if (*p == '\n')
469 1550158 : lineno++;
470 40492 : p++, offset--;
471 1550158 : }
472 : return lineno;
473 3090 : }
|