Line data Source code
1 : %{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * exprscan.l
5 : * lexical scanner for pgbench backslash commands
6 : *
7 : * This lexer supports two operating modes:
8 : *
9 : * In INITIAL state, just parse off whitespace-separated words (this mode
10 : * is basically equivalent to strtok(), which is what we used to use).
11 : *
12 : * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 : *
14 : * In either mode, stop upon hitting newline or end of string.
15 : *
16 : * Note that this lexer operates within the framework created by psqlscan.l,
17 : *
18 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
19 : * Portions Copyright (c) 1994, Regents of the University of California
20 : *
21 : * src/bin/pgbench/exprscan.l
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 :
26 : #include "fe_utils/psqlscan_int.h"
27 :
28 : /* context information for reporting errors in expressions */
29 : static const char *expr_source = NULL;
30 : static int expr_lineno = 0;
31 : static int expr_start_offset = 0;
32 : static const char *expr_command = NULL;
33 :
34 : /* indicates whether last yylex() call read a newline */
35 : static bool last_was_newline = false;
36 :
37 : /*
38 : * Work around a bug in flex 2.5.35: it emits a couple of functions that
39 : * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
40 : * this would cause warnings. Providing our own declarations should be
41 : * harmless even when the bug gets fixed.
42 : */
43 : extern int expr_yyget_column(yyscan_t yyscanner);
44 : extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
45 :
46 : /* LCOV_EXCL_START */
47 :
48 : %}
49 :
50 : /* Except for the prefix, these options should match psqlscan.l */
51 : %option reentrant
52 : %option bison-bridge
53 : %option 8bit
54 : %option never-interactive
55 : %option nodefault
56 : %option noinput
57 : %option nounput
58 : %option noyywrap
59 : %option warn
60 : %option prefix="expr_yy"
61 :
62 : /* Character classes */
63 : alpha [a-zA-Z\200-\377_]
64 : digit [0-9]
65 : alnum [A-Za-z\200-\377_0-9]
66 : /* {space} + {nonspace} + {newline} should cover all characters */
67 : space [ \t\r\f\v]
68 : nonspace [^ \t\r\f\v\n]
69 : newline [\n]
70 :
71 : /* Line continuation marker */
72 : continuation \\\r?{newline}
73 :
74 : /* case insensitive keywords */
75 : and [Aa][Nn][Dd]
76 : or [Oo][Rr]
77 : not [Nn][Oo][Tt]
78 : case [Cc][Aa][Ss][Ee]
79 : when [Ww][Hh][Ee][Nn]
80 : then [Tt][Hh][Ee][Nn]
81 : else [Ee][Ll][Ss][Ee]
82 : end [Ee][Nn][Dd]
83 : true [Tt][Rr][Uu][Ee]
84 : false [Ff][Aa][Ll][Ss][Ee]
85 : null [Nn][Uu][Ll][Ll]
86 : is [Ii][Ss]
87 : isnull [Ii][Ss][Nn][Uu][Ll][Ll]
88 : notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
89 :
90 : /* Exclusive states */
91 : %x EXPR
92 :
93 : %%
94 :
95 : %{
96 : /* Declare some local variables inside yylex(), for convenience */
97 : PsqlScanState cur_state = yyextra;
98 :
99 : /*
100 : * Force flex into the state indicated by start_state. This has a
101 : * couple of purposes: it lets some of the functions below set a new
102 : * starting state without ugly direct access to flex variables, and it
103 : * allows us to transition from one flex lexer to another so that we
104 : * can lex different parts of the source string using separate lexers.
105 : */
106 : BEGIN(cur_state->start_state);
107 :
108 : /* Reset was-newline flag */
109 : last_was_newline = false;
110 : %}
111 :
112 : /* INITIAL state */
113 :
114 : {nonspace}+ {
115 : /* Found a word, emit and return it */
116 : psqlscan_emit(cur_state, yytext, yyleng);
117 : return 1;
118 : }
119 :
120 : /*
121 : * We need this rule to avoid returning "word\" instead of recognizing
122 : * a continuation marker just after a word:
123 : */
124 : {nonspace}+{continuation} {
125 : /* Found "word\\\r?\n", emit and return just "word" */
126 : int wordlen = yyleng - 2;
127 : if (yytext[wordlen] == '\r')
128 : wordlen--;
129 : Assert(yytext[wordlen] == '\\');
130 : psqlscan_emit(cur_state, yytext, wordlen);
131 : return 1;
132 : }
133 :
134 : {space}+ { /* ignore */ }
135 :
136 : {continuation} { /* ignore */ }
137 :
138 : {newline} {
139 : /* report end of command */
140 : last_was_newline = true;
141 : return 0;
142 : }
143 :
144 : /* EXPR state */
145 :
146 : <EXPR>{
147 :
148 : "+" { return '+'; }
149 : "-" { return '-'; }
150 : "*" { return '*'; }
151 : "/" { return '/'; }
152 : "%" { return '%'; } /* C version, also in Pg SQL */
153 : "=" { return '='; }
154 : "<>" { return NE_OP; }
155 : "!=" { return NE_OP; } /* C version, also in Pg SQL */
156 : "<=" { return LE_OP; }
157 : ">=" { return GE_OP; }
158 : "<<" { return LS_OP; }
159 : ">>" { return RS_OP; }
160 : "<" { return '<'; }
161 : ">" { return '>'; }
162 : "|" { return '|'; }
163 : "&" { return '&'; }
164 : "#" { return '#'; }
165 : "~" { return '~'; }
166 :
167 : "(" { return '('; }
168 : ")" { return ')'; }
169 : "," { return ','; }
170 :
171 : {and} { return AND_OP; }
172 : {or} { return OR_OP; }
173 : {not} { return NOT_OP; }
174 : {is} { return IS_OP; }
175 : {isnull} { return ISNULL_OP; }
176 : {notnull} { return NOTNULL_OP; }
177 :
178 : {case} { return CASE_KW; }
179 : {when} { return WHEN_KW; }
180 : {then} { return THEN_KW; }
181 : {else} { return ELSE_KW; }
182 : {end} { return END_KW; }
183 :
184 : :{alnum}+ {
185 : yylval->str = pg_strdup(yytext + 1);
186 : return VARIABLE;
187 : }
188 :
189 : {null} { return NULL_CONST; }
190 : {true} {
191 : yylval->bval = true;
192 : return BOOLEAN_CONST;
193 : }
194 : {false} {
195 : yylval->bval = false;
196 : return BOOLEAN_CONST;
197 : }
198 : "9223372036854775808" {
199 : /*
200 : * Special handling for PG_INT64_MIN, which can't
201 : * accurately be represented here, as the minus sign is
202 : * lexed separately and INT64_MIN can't be represented as
203 : * a positive integer.
204 : */
205 : return MAXINT_PLUS_ONE_CONST;
206 : }
207 : {digit}+ {
208 : if (!strtoint64(yytext, true, &yylval->ival))
209 : expr_yyerror_more(yyscanner, "bigint constant overflow",
210 : strdup(yytext));
211 : return INTEGER_CONST;
212 : }
213 : {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
214 : if (!strtodouble(yytext, true, &yylval->dval))
215 : expr_yyerror_more(yyscanner, "double constant overflow",
216 : strdup(yytext));
217 : return DOUBLE_CONST;
218 : }
219 : \.{digit}+([eE][-+]?{digit}+)? {
220 : if (!strtodouble(yytext, true, &yylval->dval))
221 : expr_yyerror_more(yyscanner, "double constant overflow",
222 : strdup(yytext));
223 : return DOUBLE_CONST;
224 : }
225 : {alpha}{alnum}* {
226 : yylval->str = pg_strdup(yytext);
227 : return FUNCTION;
228 : }
229 :
230 : {space}+ { /* ignore */ }
231 :
232 : {continuation} { /* ignore */ }
233 :
234 : {newline} {
235 : /* report end of command */
236 : last_was_newline = true;
237 : return 0;
238 : }
239 :
240 : . {
241 : /*
242 : * must strdup yytext so that expr_yyerror_more doesn't
243 : * change it while finding end of line
244 : */
245 : expr_yyerror_more(yyscanner, "unexpected character",
246 : pg_strdup(yytext));
247 : /* NOTREACHED, syntax_error calls exit() */
248 : return 0;
249 : }
250 :
251 : }
252 :
253 : <<EOF>> {
254 : if (cur_state->buffer_stack == NULL)
255 : return 0; /* end of input reached */
256 :
257 : /*
258 : * We were expanding a variable, so pop the inclusion
259 : * stack and keep lexing
260 : */
261 : psqlscan_pop_buffer_stack(cur_state);
262 : psqlscan_select_top_buffer(cur_state);
263 : }
264 :
265 : %%
266 :
267 : /* LCOV_EXCL_STOP */
268 :
269 : void
270 : expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
271 38 : {
272 : PsqlScanState state = yyget_extra(yyscanner);
273 38 : int error_detection_offset = expr_scanner_offset(state) - 1;
274 38 : YYSTYPE lval;
275 : char *full_line;
276 :
277 : /*
278 : * While parsing an expression, we may not have collected the whole line
279 : * yet from the input source. Lex till EOL so we can report whole line.
280 : * (If we're at EOF, it's okay to call yylex() an extra time.)
281 : */
282 : if (!last_was_newline)
283 38 : {
284 : while (yylex(&lval, yyscanner))
285 50 : /* skip */ ;
286 : }
287 :
288 : /* Extract the line, trimming trailing newline if any */
289 : full_line = expr_scanner_get_substring(state,
290 38 : expr_start_offset,
291 : expr_scanner_offset(state),
292 : true);
293 :
294 : syntax_error(expr_source, expr_lineno, full_line, expr_command,
295 38 : message, more, error_detection_offset - expr_start_offset);
296 : }
297 :
298 : void
299 : expr_yyerror(yyscan_t yyscanner, const char *message)
300 12 : {
301 : expr_yyerror_more(yyscanner, message, NULL);
302 12 : }
303 :
304 : /*
305 : * Collect a space-separated word from a backslash command and return it
306 : * in word_buf, along with its starting string offset in *offset.
307 : * Returns true if successful, false if at end of command.
308 : */
309 : bool
310 : expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
311 2396 : {
312 : int lexresult;
313 : YYSTYPE lval;
314 :
315 : /* Must be scanning already */
316 : Assert(state->scanbufhandle != NULL);
317 :
318 : /* Set current output target */
319 : state->output_buf = word_buf;
320 2396 : resetPQExpBuffer(word_buf);
321 2396 :
322 : /* Set input source */
323 : if (state->buffer_stack != NULL)
324 2396 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
325 0 : else
326 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
327 2396 :
328 : /* Set start state */
329 : state->start_state = INITIAL;
330 2396 :
331 : /* And lex. */
332 : lexresult = yylex(&lval, state->scanner);
333 2396 :
334 : /*
335 : * Save start offset of word, if any. We could do this more efficiently,
336 : * but for now this seems fine.
337 : */
338 : if (lexresult)
339 2396 : *offset = expr_scanner_offset(state) - word_buf->len;
340 2236 : else
341 : *offset = -1;
342 160 :
343 : /*
344 : * In case the caller returns to using the regular SQL lexer, reselect the
345 : * appropriate initial state.
346 : */
347 : psql_scan_reselect_sql_lexer(state);
348 2396 :
349 : return (bool) lexresult;
350 2396 : }
351 :
352 : /*
353 : * Prepare to lex an expression via expr_yyparse().
354 : *
355 : * Returns the yyscan_t that is to be passed to expr_yyparse().
356 : * (This is just state->scanner, but callers don't need to know that.)
357 : */
358 : yyscan_t
359 : expr_scanner_init(PsqlScanState state,
360 768 : const char *source, int lineno, int start_offset,
361 : const char *command)
362 : {
363 : /* Save error context info */
364 : expr_source = source;
365 768 : expr_lineno = lineno;
366 768 : expr_start_offset = start_offset;
367 768 : expr_command = command;
368 768 :
369 : /* Must be scanning already */
370 : Assert(state->scanbufhandle != NULL);
371 :
372 : /* Set current output target */
373 : state->output_buf = NULL;
374 768 :
375 : /* Set input source */
376 : if (state->buffer_stack != NULL)
377 768 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
378 0 : else
379 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
380 768 :
381 : /* Set start state */
382 : state->start_state = EXPR;
383 768 :
384 : return state->scanner;
385 768 : }
386 :
387 : /*
388 : * Finish lexing an expression.
389 : */
390 : void
391 : expr_scanner_finish(yyscan_t yyscanner)
392 730 : {
393 : PsqlScanState state = yyget_extra(yyscanner);
394 730 :
395 : /*
396 : * Reselect appropriate initial state for SQL lexer.
397 : */
398 : psql_scan_reselect_sql_lexer(state);
399 730 : }
400 730 :
401 : /*
402 : * Get offset from start of string to end of current lexer token.
403 : *
404 : * We rely on the knowledge that flex modifies the scan buffer by storing
405 : * a NUL at the end of the current token (yytext). Note that this might
406 : * not work quite right if we were parsing a sub-buffer, but since pgbench
407 : * never invokes that functionality, it doesn't matter.
408 : */
409 : int
410 : expr_scanner_offset(PsqlScanState state)
411 4652 : {
412 : return strlen(state->scanbuf);
413 4652 : }
414 :
415 : /*
416 : * Get a malloc'd copy of the lexer input string from start_offset
417 : * to just before end_offset. If chomp is true, drop any trailing
418 : * newline(s).
419 : */
420 : char *
421 : expr_scanner_get_substring(PsqlScanState state,
422 926 : int start_offset, int end_offset,
423 : bool chomp)
424 : {
425 : char *result;
426 : const char *scanptr = state->scanbuf + start_offset;
427 926 : int slen = end_offset - start_offset;
428 926 :
429 : Assert(slen >= 0);
430 : Assert(end_offset <= strlen(state->scanbuf));
431 :
432 : if (chomp)
433 926 : {
434 : while (slen > 0 &&
435 1744 : (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
436 1744 : slen--;
437 818 : }
438 :
439 : result = (char *) pg_malloc(slen + 1);
440 926 : memcpy(result, scanptr, slen);
441 926 : result[slen] = '\0';
442 926 :
443 : return result;
444 926 : }
445 :
446 : /*
447 : * Get the line number associated with the given string offset
448 : * (which must not be past the end of where we've lexed to).
449 : */
450 : int
451 : expr_scanner_get_lineno(PsqlScanState state, int offset)
452 2970 : {
453 : int lineno = 1;
454 2970 : const char *p = state->scanbuf;
455 2970 :
456 : while (*p && offset > 0)
457 1541770 : {
458 : if (*p == '\n')
459 1538800 : lineno++;
460 39786 : p++, offset--;
461 1538800 : }
462 : return lineno;
463 2970 : }
|