Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * pgc.l
5 : * lexical scanner for ecpg
6 : *
7 : * This is a modified version of src/backend/parser/scan.l
8 : *
9 : * The ecpg scanner is not backup-free, so the fail rules are
10 : * only here to simplify syncing this file with scan.l.
11 : *
12 : *
13 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
14 : * Portions Copyright (c) 1994, Regents of the University of California
15 : *
16 : * IDENTIFICATION
17 : * src/interfaces/ecpg/preproc/pgc.l
18 : *
19 : *-------------------------------------------------------------------------
20 : */
21 : #include "postgres_fe.h"
22 :
23 : #include <ctype.h>
24 : #include <limits.h>
25 :
26 : #include "common/string.h"
27 :
28 : #include "preproc_extern.h"
29 : #include "preproc.h"
30 : }
31 :
32 : %{
33 :
34 : /* LCOV_EXCL_START */
35 :
36 : extern YYSTYPE base_yylval;
37 :
38 : static int xcdepth = 0; /* depth of nesting in slash-star comments */
39 : static char *dolqstart = NULL; /* current $foo$ quote start string */
40 :
41 : /*
42 : * literalbuf is used to accumulate literal values when multiple rules
43 : * are needed to parse a single literal. Call startlit to reset buffer
44 : * to empty, addlit to add text. Note that the buffer is permanently
45 : * malloc'd to the largest size needed so far in the current run.
46 : */
47 : static char *literalbuf = NULL; /* expandable buffer */
48 : static int literallen; /* actual current length */
49 : static int literalalloc; /* current allocated buffer size */
50 :
51 : /* Used for detecting global state together with braces_open */
52 : static int parenths_open;
53 :
54 : /* Used to tell parse_include() whether the command was #include or #include_next */
55 : static bool include_next;
56 :
57 : #define startlit() (literalbuf[0] = '\0', literallen = 0)
58 : static void addlit(char *ytext, int yleng);
59 : static void addlitchar(unsigned char ychar);
60 : static int process_integer_literal(const char *token, YYSTYPE *lval, int base);
61 : static void parse_include(void);
62 : static bool ecpg_isspace(char ch);
63 : static bool isdefine(void);
64 : static bool isinformixdefine(void);
65 :
66 : char *token_start;
67 :
68 : /* vars to keep track of start conditions when scanning literals */
69 : static int state_before_str_start;
70 : static int state_before_str_stop;
71 :
72 : /*
73 : * State for handling include files and macro expansion. We use a new
74 : * flex input buffer for each level of include or macro, and create a
75 : * struct _yy_buffer to remember the previous level. There is not a struct
76 : * for the currently active input source; that state is kept in the global
77 : * variables YY_CURRENT_BUFFER, yylineno, and input_filename.
78 : */
79 : static struct _yy_buffer
80 : {
81 : YY_BUFFER_STATE buffer;
82 : long lineno;
83 : char *filename;
84 : struct _yy_buffer *next;
85 : } *yy_buffer = NULL;
86 :
87 : /*
88 : * Vars for handling ifdef/elif/endif constructs. preproc_tos is the current
89 : * nesting depth of such constructs, and stacked_if_value[preproc_tos] is the
90 : * state for the innermost level. (For convenience, stacked_if_value[0] is
91 : * initialized as though we are in the active branch of some outermost IF.)
92 : * The active field is true if the current branch is active (being expanded).
93 : * The saw_active field is true if we have found any successful branch,
94 : * so that all subsequent branches of this level should be skipped.
95 : * The else_branch field is true if we've found an 'else' (so that another
96 : * 'else' or 'elif' at this level is an error.)
97 : * For IFs nested within an inactive branch, all branches always have active
98 : * set to false, but saw_active and else_branch are maintained normally.
99 : * ifcond is valid only while evaluating an if-condition; it's true if we
100 : * are doing ifdef, false if ifndef.
101 : */
102 : #define MAX_NESTED_IF 128
103 : static short preproc_tos;
104 : static bool ifcond;
105 : static struct _if_value
106 : {
107 : bool active;
108 : bool saw_active;
109 : bool else_branch;
110 : } stacked_if_value[MAX_NESTED_IF];
111 :
112 : %}
113 :
114 : %option 8bit
115 : %option never-interactive
116 : %option nodefault
117 : %option noinput
118 : %option noyywrap
119 : %option warn
120 : %option yylineno
121 : %option prefix="base_yy"
122 :
123 : /*
124 : * OK, here is a short description of lex/flex rules behavior.
125 : * The longest pattern which matches an input string is always chosen.
126 : * For equal-length patterns, the first occurring in the rules list is chosen.
127 : * INITIAL is the starting state, to which all non-conditional rules apply.
128 : * Exclusive states change parsing rules while the state is active. When in
129 : * an exclusive state, only those rules defined for that state apply.
130 : *
131 : * We use exclusive states for quoted strings, extended comments,
132 : * and to eliminate parsing troubles for numeric strings.
133 : * Exclusive states:
134 : * <xb> bit string literal
135 : * <xc> extended C-style comments
136 : * <xd> delimited identifiers (double-quoted identifiers)
137 : * <xdc> double-quoted strings in C
138 : * <xh> hexadecimal byte string
139 : * <xn> national character quoted strings
140 : * <xq> standard quoted strings
141 : * <xqs> quote stop (detect continued strings)
142 : * <xe> extended quoted strings (support backslash escape sequences)
143 : * <xqc> single-quoted strings in C
144 : * <xdolq> $foo$ quoted strings
145 : * <xui> quoted identifier with Unicode escapes
146 : * <xus> quoted string with Unicode escapes
147 : * <xcond> condition of an EXEC SQL IFDEF construct
148 : * <xskip> skipping the inactive part of an EXEC SQL IFDEF construct
149 : *
150 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
151 : * no need to distinguish it from <xe> state.
152 : *
153 : * Remember to add an <<EOF>> case whenever you add a new exclusive state!
154 : * The default one is probably not the right thing.
155 : */
156 :
157 : %x xb
158 : %x xc
159 : %x xd
160 : %x xdc
161 : %x xh
162 : %x xn
163 : %x xq
164 : %x xqs
165 : %x xe
166 : %x xqc
167 : %x xdolq
168 : %x xui
169 : %x xus
170 : %x xcond
171 : %x xskip
172 :
173 : /* Additional exclusive states that are specific to ECPG */
174 : %x C SQL incl def def_ident undef
175 :
176 : /*
177 : * In order to make the world safe for Windows and Mac clients as well as
178 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
179 : * sequence will be seen as two successive newlines, but that doesn't cause
180 : * any problems. SQL-style comments, which start with -- and extend to the
181 : * next newline, are treated as equivalent to a single whitespace character.
182 : *
183 : * NOTE a fine point: if there is no newline following --, we will absorb
184 : * everything to the end of the input as a comment. This is correct. Older
185 : * versions of Postgres failed to recognize -- as a comment if the input
186 : * did not end with a newline.
187 : *
188 : * non_newline_space tracks all space characters except newlines.
189 : *
190 : * XXX if you change the set of whitespace characters, fix ecpg_isspace()
191 : * to agree.
192 : */
193 :
194 : space [ \t\n\r\f\v]
195 : non_newline_space [ \t\f\v]
196 : newline [\n\r]
197 : non_newline [^\n\r]
198 :
199 : comment ("--"{non_newline}*)
200 :
201 : whitespace ({space}+|{comment})
202 :
203 : /*
204 : * SQL requires at least one newline in the whitespace separating
205 : * string literals that are to be concatenated. Silly, but who are we
206 : * to argue? Note that {whitespace_with_newline} should not have * after
207 : * it, whereas {whitespace} should generally have a * after it...
208 : */
209 :
210 : non_newline_whitespace ({non_newline_space}|{comment})
211 : whitespace_with_newline ({non_newline_whitespace}*{newline}{whitespace}*)
212 :
213 : quote '
214 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
215 : quotecontinue {whitespace_with_newline}{quote}
216 :
217 : /*
218 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
219 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
220 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
221 : * to see if there's another dash --- which would start a {comment} and thus
222 : * allow continuation of the {quotecontinue} token.
223 : */
224 : quotecontinuefail {whitespace}*"-"?
225 :
226 : /* Bit string
227 : */
228 : xbstart [bB]{quote}
229 : xbinside [^']*
230 :
231 : /* Hexadecimal byte string */
232 : xhstart [xX]{quote}
233 : xhinside [^']*
234 :
235 : /* National character */
236 : xnstart [nN]{quote}
237 :
238 : /* Quoted string that allows backslash escapes */
239 : xestart [eE]{quote}
240 : xeinside [^\\']+
241 : xeescape [\\][^0-7]
242 : xeoctesc [\\][0-7]{1,3}
243 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
244 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
245 :
246 : /* Extended quote
247 : * xqdouble implements embedded quote, ''''
248 : */
249 : xqstart {quote}
250 : xqdouble {quote}{quote}
251 : xqcquote [\\]{quote}
252 : xqinside [^']+
253 :
254 : /* $foo$ style quotes ("dollar quoting")
255 : * The quoted string starts with $foo$ where "foo" is an optional string
256 : * in the form of an identifier, except that it may not contain "$",
257 : * and extends to the first occurrence of an identical string.
258 : * There is *no* processing of the quoted text.
259 : *
260 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
261 : * fails to match its trailing "$".
262 : */
263 : dolq_start [A-Za-z\200-\377_]
264 : dolq_cont [A-Za-z\200-\377_0-9]
265 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
266 : dolqfailed \${dolq_start}{dolq_cont}*
267 : dolqinside [^$]+
268 :
269 : /* Double quote
270 : * Allows embedded spaces and other special characters into identifiers.
271 : */
272 : dquote \"
273 : xdstart {dquote}
274 : xdstop {dquote}
275 : xddouble {dquote}{dquote}
276 : xdinside [^"]+
277 :
278 : /* Quoted identifier with Unicode escapes */
279 : xuistart [uU]&{dquote}
280 :
281 : /* Quoted string with Unicode escapes */
282 : xusstart [uU]&{quote}
283 :
284 : /* special stuff for C strings */
285 : xdcqq \\\\
286 : xdcqdq \\\"
287 : xdcother [^"]
288 : xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
289 :
290 :
291 : /* C-style comments
292 : *
293 : * The "extended comment" syntax closely resembles allowable operator syntax.
294 : * The tricky part here is to get lex to recognize a string starting with
295 : * slash-star as a comment, when interpreting it as an operator would produce
296 : * a longer match --- remember lex will prefer a longer match! Also, if we
297 : * have something like plus-slash-star, lex will think this is a 3-character
298 : * operator whereas we want to see it as a + operator and a comment start.
299 : * The solution is two-fold:
300 : * 1. append {op_chars}* to xcstart so that it matches as much text as
301 : * {operator} would. Then the tie-breaker (first matching rule of same
302 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
303 : * in case it contains a star-slash that should terminate the comment.
304 : * 2. In the operator rule, check for slash-star within the operator, and
305 : * if found throw it back with yyless(). This handles the plus-slash-star
306 : * problem.
307 : * Dash-dash comments have similar interactions with the operator rule.
308 : */
309 : xcstart \/\*{op_chars}*
310 : xcstop \*+\/
311 : xcinside [^*/]+
312 :
313 : ident_start [A-Za-z\200-\377_]
314 : ident_cont [A-Za-z\200-\377_0-9\$]
315 :
316 : identifier {ident_start}{ident_cont}*
317 :
318 : array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
319 :
320 : /* Assorted special-case operators and operator-like tokens */
321 : typecast "::"
322 : dot_dot \.\.
323 : colon_equals ":="
324 :
325 : /*
326 : * These operator-like tokens (unlike the above ones) also match the {operator}
327 : * rule, which means that they might be overridden by a longer match if they
328 : * are followed by a comment start or a + or - character. Accordingly, if you
329 : * add to this list, you must also add corresponding code to the {operator}
330 : * block to return the correct token in such cases. (This is not needed in
331 : * psqlscan.l since the token value is ignored there.)
332 : */
333 : equals_greater "=>"
334 : less_equals "<="
335 : greater_equals ">="
336 : less_greater "<>"
337 : not_equals "!="
338 : /* Note there is no need for left_arrow, since "<-" is not a single operator. */
339 : right_arrow "->"
340 :
341 : /*
342 : * "self" is the set of chars that should be returned as single-character
343 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
344 : * which can be one or more characters long (but if a single-char token
345 : * appears in the "self" set, it is not to be returned as an Op). Note
346 : * that the sets overlap, but each has some chars that are not in the other.
347 : *
348 : * If you change either set, adjust the character lists appearing in the
349 : * rule for "operator"!
350 : */
351 : self [,()\[\].;\:\|\+\-\*\/\%\^\<\>\=]
352 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
353 : operator {op_chars}+
354 :
355 : /*
356 : * Numbers
357 : *
358 : * Unary minus is not part of a number here. Instead we pass it separately to
359 : * the parser, and there it gets coerced via doNegate().
360 : *
361 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
362 : *
363 : * {realfail} is added to prevent the need for scanner
364 : * backup when the {real} rule fails to match completely.
365 : */
366 : decdigit [0-9]
367 : hexdigit [0-9A-Fa-f]
368 : octdigit [0-7]
369 : bindigit [0-1]
370 :
371 : decinteger {decdigit}(_?{decdigit})*
372 : hexinteger 0[xX](_?{hexdigit})+
373 : octinteger 0[oO](_?{octdigit})+
374 : bininteger 0[bB](_?{bindigit})+
375 :
376 : hexfail 0[xX]_?
377 : octfail 0[oO]_?
378 : binfail 0[bB]_?
379 :
380 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
381 : numericfail {decinteger}\.\.
382 :
383 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
384 : realfail ({decinteger}|{numeric})[Ee][-+]
385 :
386 : /* Positional parameters don't accept underscores. */
387 : param \${decdigit}+
388 :
389 : /*
390 : * An identifier immediately following an integer literal is disallowed because
391 : * in some cases it's ambiguous what is meant: for example, 0x1234 could be
392 : * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
393 : * detect such problems by seeing if integer_junk matches a longer substring
394 : * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
395 : * bininteger). One "junk" pattern is sufficient because
396 : * {decinteger}{identifier} will match all the same strings we'd match with
397 : * {hexinteger}{identifier} etc.
398 : *
399 : * Note that the rule for integer_junk must appear after the ones for
400 : * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
401 : * and integer_junk, and we need hexinteger to be chosen in that case.
402 : *
403 : * Also disallow strings matched by numeric_junk, real_junk and param_junk
404 : * for consistency.
405 : */
406 : integer_junk {decinteger}{identifier}
407 : numeric_junk {numeric}{identifier}
408 : real_junk {real}{identifier}
409 : param_junk \${decdigit}+{identifier}
410 :
411 : /* special characters for other dbms */
412 : /* we have to react differently in compat mode */
413 : informix_special [\$]
414 :
415 : other .
416 :
417 : /*
418 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
419 : * Other quoted strings must allow some special characters such as single-quote
420 : * and newline.
421 : * Embedded single-quotes are implemented both in the SQL standard
422 : * style of two adjacent single quotes "''" and in the Postgres/Java style
423 : * of escaped-quote "\'".
424 : * Other embedded escaped characters are matched explicitly and the leading
425 : * backslash is dropped from the string.
426 : * Note that xcstart must appear before operator, as explained above!
427 : * Also whitespace (comment) must appear before operator.
428 : */
429 :
430 : /* some stuff needed for ecpg */
431 : exec [eE][xX][eE][cC]
432 : sql [sS][qQ][lL]
433 : define [dD][eE][fF][iI][nN][eE]
434 : include [iI][nN][cC][lL][uU][dD][eE]
435 : include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
436 : import [iI][mM][pP][oO][rR][tT]
437 : undef [uU][nN][dD][eE][fF]
438 :
439 : ccomment "//".*\n
440 :
441 : if [iI][fF]
442 : ifdef [iI][fF][dD][eE][fF]
443 : ifndef [iI][fF][nN][dD][eE][fF]
444 : else [eE][lL][sS][eE]
445 : elif [eE][lL][iI][fF]
446 : endif [eE][nN][dD][iI][fF]
447 :
448 : struct [sS][tT][rR][uU][cC][tT]
449 :
450 : exec_sql {exec}{space}*{sql}{space}*
451 : ipdigit ({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
452 : ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
453 :
454 : /* we might want to parse all cpp include files */
455 : cppinclude {space}*#{include}{space}*
456 : cppinclude_next {space}*#{include_next}{space}*
457 :
458 : /* take care of cpp lines, they may also be continued */
459 : /* first a general line for all commands not starting with "i" */
460 : /* and then the other commands starting with "i", we have to add these
461 : * separately because the cppline production would match on "include" too
462 : */
463 : cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
464 :
465 : %%
466 :
467 : %{
468 : /* code to execute during start of each call of yylex() */
469 : char *newdefsymbol = NULL;
470 36696 :
471 : token_start = NULL;
472 36696 : %}
473 :
474 : <SQL>{
475 : {whitespace} {
476 : /* ignore */
477 : }
478 : } /* <SQL> */
479 5560 :
480 : <C,SQL>{
481 : {xcstart} {
482 534 : token_start = yytext;
483 534 : state_before_str_start = YYSTATE;
484 534 : xcdepth = 0;
485 534 : BEGIN(xc);
486 534 : /* Put back any characters past slash-star; see above */
487 : yyless(2);
488 686 : fputs("/*", yyout);
489 534 : }
490 : } /* <C,SQL> */
491 534 :
492 : <xc>{
493 : {xcstart} {
494 0 : if (state_before_str_start == SQL)
495 0 : {
496 : xcdepth++;
497 0 : /* Put back any characters past slash-star; see above */
498 : yyless(2);
499 0 : fputs("/_*", yyout);
500 0 : }
501 : else if (state_before_str_start == C)
502 0 : {
503 : ECHO;
504 0 : }
505 : }
506 :
507 0 : {xcstop} {
508 534 : if (state_before_str_start == SQL)
509 534 : {
510 : if (xcdepth <= 0)
511 0 : {
512 : ECHO;
513 0 : BEGIN(SQL);
514 0 : token_start = NULL;
515 0 : }
516 : else
517 : {
518 : xcdepth--;
519 0 : fputs("*_/", yyout);
520 0 : }
521 : }
522 : else if (state_before_str_start == C)
523 534 : {
524 : ECHO;
525 534 : BEGIN(C);
526 534 : token_start = NULL;
527 534 : }
528 : }
529 :
530 534 : {xcinside} {
531 634 : ECHO;
532 634 : }
533 :
534 634 : {op_chars} {
535 100 : ECHO;
536 100 : }
537 :
538 100 : \*+ {
539 1 : ECHO;
540 1 : }
541 :
542 1 : <<EOF>> {
543 0 : mmfatal(PARSE_ERROR, "unterminated /* comment");
544 0 : }
545 : } /* <xc> */
546 :
547 : <SQL>{
548 : {xbstart} {
549 1 : token_start = yytext;
550 1 : state_before_str_start = YYSTATE;
551 1 : BEGIN(xb);
552 1 : startlit();
553 1 : }
554 : } /* <SQL> */
555 1 :
556 : <xh>{xhinside} |
557 2 : <xb>{xbinside} {
558 : addlit(yytext, yyleng);
559 2 : }
560 : <xb><<EOF>> {
561 2 : mmfatal(PARSE_ERROR, "unterminated bit string literal");
562 0 : }
563 :
564 : <SQL>{xhstart} {
565 1 : token_start = yytext;
566 1 : state_before_str_start = YYSTATE;
567 1 : BEGIN(xh);
568 1 : startlit();
569 1 : }
570 : <xh><<EOF>> {
571 1 : mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal");
572 0 : }
573 :
574 : <C>{xqstart} {
575 15 : token_start = yytext;
576 15 : state_before_str_start = YYSTATE;
577 15 : BEGIN(xqc);
578 15 : startlit();
579 15 : }
580 :
581 15 : <SQL>{
582 : {xnstart} {
583 1 : /* National character. Transfer it as-is to the backend. */
584 : token_start = yytext;
585 1 : state_before_str_start = YYSTATE;
586 1 : BEGIN(xn);
587 1 : startlit();
588 1 : }
589 :
590 1 : {xqstart} {
591 164 : token_start = yytext;
592 164 : state_before_str_start = YYSTATE;
593 164 : BEGIN(xq);
594 164 : startlit();
595 164 : }
596 : {xestart} {
597 164 : token_start = yytext;
598 2 : state_before_str_start = YYSTATE;
599 2 : BEGIN(xe);
600 2 : startlit();
601 2 : }
602 : {xusstart} {
603 2 : token_start = yytext;
604 2 : state_before_str_start = YYSTATE;
605 2 : BEGIN(xus);
606 2 : startlit();
607 2 : }
608 : } /* <SQL> */
609 2 :
610 : <xb,xh,xq,xqc,xe,xn,xus>{quote} {
611 186 : /*
612 : * When we are scanning a quoted string and see an end
613 : * quote, we must look ahead for a possible continuation.
614 : * If we don't see one, we know the end quote was in fact
615 : * the end of the string. To reduce the lexer table size,
616 : * we use a single "xqs" state to do the lookahead for all
617 : * types of strings.
618 : */
619 : state_before_str_stop = YYSTATE;
620 186 : BEGIN(xqs);
621 186 : }
622 : <xqs>{quotecontinue} {
623 186 : /*
624 0 : * Found a quote continuation, so return to the in-quote
625 : * state and continue scanning the literal. Nothing is
626 : * added to the literal's contents.
627 : */
628 : BEGIN(state_before_str_stop);
629 0 : }
630 : <xqs>{quotecontinuefail} |
631 0 : <xqs>{other} |
632 186 : <xqs><<EOF>> {
633 : /*
634 : * Failed to see a quote continuation. Throw back
635 : * everything after the end quote, and handle the string
636 : * according to the state we were in previously.
637 : */
638 : yyless(0);
639 390 : BEGIN(state_before_str_start);
640 186 :
641 : switch (state_before_str_stop)
642 186 : {
643 : case xb:
644 1 : if (literalbuf[strspn(literalbuf, "01")] != '\0')
645 1 : mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string literal");
646 0 : base_yylval.str = make3_str("b'", literalbuf, "'");
647 1 : return BCONST;
648 1 : case xh:
649 1 : if (literalbuf[strspn(literalbuf, "0123456789abcdefABCDEF")] != '\0')
650 1 : mmerror(PARSE_ERROR, ET_ERROR, "invalid hexadecimal string literal");
651 0 : base_yylval.str = make3_str("x'", literalbuf, "'");
652 1 : return XCONST;
653 1 : case xq:
654 179 : /* fallthrough */
655 : case xqc:
656 : base_yylval.str = make3_str("'", literalbuf, "'");
657 179 : return SCONST;
658 179 : case xe:
659 2 : base_yylval.str = make3_str("E'", literalbuf, "'");
660 2 : return SCONST;
661 2 : case xn:
662 1 : base_yylval.str = make3_str("N'", literalbuf, "'");
663 1 : return SCONST;
664 1 : case xus:
665 2 : base_yylval.str = make3_str("U&'", literalbuf, "'");
666 2 : return USCONST;
667 2 : default:
668 0 : mmfatal(PARSE_ERROR, "unhandled previous state in xqs\n");
669 0 : }
670 : }
671 :
672 : <xq,xe,xn,xus>{xqdouble} {
673 4 : addlit(yytext, yyleng);
674 4 : }
675 : <xqc>{xqcquote} {
676 4 : addlit(yytext, yyleng);
677 0 : }
678 : <xq,xqc,xn,xus>{xqinside} {
679 0 : addlit(yytext, yyleng);
680 184 : }
681 : <xe>{xeinside} {
682 184 : addlit(yytext, yyleng);
683 5 : }
684 : <xe>{xeunicode} {
685 5 : addlit(yytext, yyleng);
686 0 : }
687 : <xe>{xeescape} {
688 0 : addlit(yytext, yyleng);
689 3 : }
690 : <xe>{xeoctesc} {
691 3 : addlit(yytext, yyleng);
692 0 : }
693 : <xe>{xehexesc} {
694 0 : addlit(yytext, yyleng);
695 0 : }
696 : <xe>. {
697 0 : /* This is only needed for \ just before EOF */
698 0 : addlitchar(yytext[0]);
699 0 : }
700 : <xq,xqc,xe,xn,xus><<EOF>> {
701 0 : mmfatal(PARSE_ERROR, "unterminated quoted string");
702 0 : }
703 :
704 : <SQL>{
705 : {dolqdelim} {
706 2 : token_start = yytext;
707 2 : if (dolqstart)
708 2 : free(dolqstart);
709 0 : dolqstart = mm_strdup(yytext);
710 2 : BEGIN(xdolq);
711 2 : startlit();
712 2 : addlit(yytext, yyleng);
713 2 : }
714 : {dolqfailed} {
715 2 : /* throw back all but the initial "$" */
716 0 : yyless(1);
717 0 : /* and treat it as {other} */
718 : return yytext[0];
719 0 : }
720 : } /* <SQL> */
721 :
722 : <xdolq>{dolqdelim} {
723 3 : if (strcmp(yytext, dolqstart) == 0)
724 3 : {
725 : addlit(yytext, yyleng);
726 2 : free(dolqstart);
727 2 : dolqstart = NULL;
728 2 : BEGIN(SQL);
729 2 : base_yylval.str = loc_strdup(literalbuf);
730 2 : return SCONST;
731 2 : }
732 : else
733 : {
734 : /*
735 : * When we fail to match $...$ to dolqstart, transfer
736 : * the $... part to the output, but put back the final
737 : * $ for rescanning. Consider $delim$...$junk$delim$
738 : */
739 : addlit(yytext, yyleng - 1);
740 1 : yyless(yyleng - 1);
741 2 : }
742 : }
743 : <xdolq>{dolqinside} {
744 1 : addlit(yytext, yyleng);
745 2 : }
746 : <xdolq>{dolqfailed} {
747 2 : addlit(yytext, yyleng);
748 0 : }
749 : <xdolq>. {
750 0 : /* single quote or dollar sign */
751 0 : addlitchar(yytext[0]);
752 0 : }
753 : <xdolq><<EOF>> {
754 0 : mmfatal(PARSE_ERROR, "unterminated dollar-quoted string");
755 0 : }
756 :
757 : <SQL>{
758 : {xdstart} {
759 62 : state_before_str_start = YYSTATE;
760 62 : BEGIN(xd);
761 62 : startlit();
762 62 : }
763 : {xuistart} {
764 62 : state_before_str_start = YYSTATE;
765 1 : BEGIN(xui);
766 1 : startlit();
767 1 : }
768 : } /* <SQL> */
769 1 :
770 : <xd>{xdstop} {
771 62 : BEGIN(state_before_str_start);
772 62 : if (literallen == 0)
773 62 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
774 0 :
775 : /*
776 : * The server will truncate the identifier here. We do
777 : * not, as (1) it does not change the result; (2) we don't
778 : * know what NAMEDATALEN the server might use; (3) this
779 : * code path is also taken for literal query strings in
780 : * PREPARE and EXECUTE IMMEDIATE, which can certainly be
781 : * longer than NAMEDATALEN.
782 : */
783 : base_yylval.str = loc_strdup(literalbuf);
784 62 : return CSTRING;
785 62 : }
786 : <xdc>{xdstop} {
787 : BEGIN(state_before_str_start);
788 1123 : base_yylval.str = loc_strdup(literalbuf);
789 1123 : return CSTRING;
790 1123 : }
791 : <xui>{dquote} {
792 : BEGIN(state_before_str_start);
793 1 : if (literallen == 0)
794 1 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
795 0 :
796 : /*
797 : * The backend will truncate the identifier here. We do
798 : * not as it does not change the result.
799 : */
800 : base_yylval.str = make3_str("U&\"", literalbuf, "\"");
801 1 : return UIDENT;
802 1 : }
803 : <xd,xui>{xddouble} {
804 : addlit(yytext, yyleng);
805 2 : }
806 : <xd,xui>{xdinside} {
807 2 : addlit(yytext, yyleng);
808 65 : }
809 : <xd,xui><<EOF>> {
810 65 : mmfatal(PARSE_ERROR, "unterminated quoted identifier");
811 0 : }
812 : <C>{xdstart} {
813 : state_before_str_start = YYSTATE;
814 1123 : BEGIN(xdc);
815 1123 : startlit();
816 1123 : }
817 : <xdc>{xdcinside} {
818 1123 : addlit(yytext, yyleng);
819 19813 : }
820 : <xdc><<EOF>> {
821 19813 : mmfatal(PARSE_ERROR, "unterminated quoted string");
822 0 : }
823 :
824 : <SQL>{
825 : {typecast} {
826 11 : return TYPECAST;
827 11 : }
828 :
829 : {dot_dot} {
830 0 : return DOT_DOT;
831 0 : }
832 :
833 : {colon_equals} {
834 0 : return COLON_EQUALS;
835 0 : }
836 :
837 : {equals_greater} {
838 0 : return EQUALS_GREATER;
839 0 : }
840 :
841 : {less_equals} {
842 1 : return LESS_EQUALS;
843 1 : }
844 :
845 : {greater_equals} {
846 0 : return GREATER_EQUALS;
847 0 : }
848 :
849 : {less_greater} {
850 0 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
851 : return NOT_EQUALS;
852 0 : }
853 :
854 : {not_equals} {
855 0 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
856 : return NOT_EQUALS;
857 0 : }
858 :
859 : {right_arrow} {
860 3 : return RIGHT_ARROW;
861 3 : }
862 :
863 : {informix_special} {
864 0 : /* are we simulating Informix? */
865 : if (INFORMIX_MODE)
866 0 : {
867 : unput(':');
868 0 : }
869 : else
870 : return yytext[0];
871 0 : }
872 :
873 0 : {self} {
874 3237 : /*
875 : * We may find a ';' inside a structure definition in a
876 : * TYPE or VAR statement. This is not an EOL marker.
877 : */
878 : if (yytext[0] == ';' && struct_level == 0)
879 3237 : BEGIN(C);
880 1391 : return yytext[0];
881 3237 : }
882 :
883 : {operator} {
884 19 : /*
885 : * Check for embedded slash-star or dash-dash; those
886 : * are comment starts, so operator must stop there.
887 : * Note that slash-star or dash-dash at the first
888 : * character will match a prior rule, not this one.
889 : */
890 : int nchars = yyleng;
891 19 : char *slashstar = strstr(yytext, "/*");
892 19 : char *dashdash = strstr(yytext, "--");
893 19 :
894 : if (slashstar && dashdash)
895 19 : {
896 : /* if both appear, take the first one */
897 : if (slashstar > dashdash)
898 0 : slashstar = dashdash;
899 0 : }
900 : else if (!slashstar)
901 19 : slashstar = dashdash;
902 19 : if (slashstar)
903 19 : nchars = slashstar - yytext;
904 0 :
905 : /*
906 : * For SQL compatibility, '+' and '-' cannot be the
907 : * last char of a multi-char operator unless the operator
908 : * contains chars that are not in SQL operators.
909 : * The idea is to lex '=-' as two operators, but not
910 : * to forbid operator names like '?-' that could not be
911 : * sequences of SQL operators.
912 : */
913 : if (nchars > 1 &&
914 19 : (yytext[nchars - 1] == '+' ||
915 14 : yytext[nchars - 1] == '-'))
916 14 : {
917 : int ic;
918 :
919 : for (ic = nchars - 2; ic >= 0; ic--)
920 6 : {
921 : char c = yytext[ic];
922 3 :
923 : if (c == '~' || c == '!' || c == '@' ||
924 3 : c == '#' || c == '^' || c == '&' ||
925 3 : c == '|' || c == '`' || c == '?' ||
926 3 : c == '%')
927 : break;
928 : }
929 : if (ic < 0)
930 3 : {
931 : /*
932 : * didn't find a qualifying character, so remove
933 : * all trailing [+-]
934 : */
935 : do
936 : {
937 : nchars--;
938 3 : } while (nchars > 1 &&
939 3 : (yytext[nchars - 1] == '+' ||
940 0 : yytext[nchars - 1] == '-'));
941 0 : }
942 : }
943 :
944 : if (nchars < yyleng)
945 19 : {
946 : /* Strip the unwanted chars from the token */
947 : yyless(nchars);
948 6 :
949 : /*
950 : * If what we have left is only one char, and it's
951 : * one of the characters matching "self", then
952 : * return it as a character token the same way
953 : * that the "self" rule would have.
954 : */
955 : if (nchars == 1 &&
956 3 : strchr(",()[].;:|+-*/%^<>=", yytext[0]))
957 3 : return yytext[0];
958 3 :
959 : /*
960 : * Likewise, if what we have left is two chars, and
961 : * those match the tokens ">=", "<=", "=>", "<>" or
962 : * "!=", then we must return the appropriate token
963 : * rather than the generic Op.
964 : */
965 : if (nchars == 2)
966 0 : {
967 : if (yytext[0] == '=' && yytext[1] == '>')
968 0 : return EQUALS_GREATER;
969 0 : if (yytext[0] == '>' && yytext[1] == '=')
970 0 : return GREATER_EQUALS;
971 0 : if (yytext[0] == '<' && yytext[1] == '=')
972 0 : return LESS_EQUALS;
973 0 : if (yytext[0] == '<' && yytext[1] == '>')
974 0 : return NOT_EQUALS;
975 0 : if (yytext[0] == '!' && yytext[1] == '=')
976 0 : return NOT_EQUALS;
977 0 : if (yytext[0] == '-' && yytext[1] == '>')
978 0 : return RIGHT_ARROW;
979 0 : }
980 : }
981 :
982 : base_yylval.str = loc_strdup(yytext);
983 16 : return Op;
984 16 : }
985 :
986 : {param} {
987 11 : int val;
988 :
989 : errno = 0;
990 11 : val = strtoint(yytext + 1, NULL, 10);
991 11 : if (errno == ERANGE)
992 11 : mmfatal(PARSE_ERROR, "parameter number too large");
993 0 : base_yylval.ival = val;
994 11 : return PARAM;
995 11 : }
996 : {param_junk} {
997 : mmfatal(PARSE_ERROR, "trailing junk after parameter");
998 0 : }
999 :
1000 : {ip} {
1001 1 : base_yylval.str = loc_strdup(yytext);
1002 1 : return IP;
1003 1 : }
1004 : } /* <SQL> */
1005 :
1006 : <C,SQL>{
1007 : {decinteger} {
1008 1251 : return process_integer_literal(yytext, &base_yylval, 10);
1009 1251 : }
1010 : {hexinteger} {
1011 : return process_integer_literal(yytext, &base_yylval, 16);
1012 3 : }
1013 : {numeric} {
1014 : base_yylval.str = loc_strdup(yytext);
1015 19 : return FCONST;
1016 19 : }
1017 : {numericfail} {
1018 : /* throw back the .., and treat as integer */
1019 0 : yyless(yyleng - 2);
1020 0 : return process_integer_literal(yytext, &base_yylval, 10);
1021 0 : }
1022 : {real} {
1023 : base_yylval.str = loc_strdup(yytext);
1024 0 : return FCONST;
1025 0 : }
1026 : {realfail} {
1027 : /*
1028 0 : * throw back the [Ee][+-], and figure out whether what
1029 : * remains is an {decinteger} or {numeric}.
1030 : */
1031 : yyless(yyleng - 2);
1032 0 : return process_integer_literal(yytext, &base_yylval, 10);
1033 0 : }
1034 : } /* <C,SQL> */
1035 :
1036 : <SQL>{
1037 : {octinteger} {
1038 0 : return process_integer_literal(yytext, &base_yylval, 8);
1039 0 : }
1040 : {bininteger} {
1041 : return process_integer_literal(yytext, &base_yylval, 2);
1042 0 : }
1043 :
1044 : /*
1045 : * Note that some trailing junk is valid in C (such as 100LL), so we
1046 : * contain this to SQL mode.
1047 : */
1048 : {integer_junk} {
1049 0 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1050 0 : }
1051 : {numeric_junk} {
1052 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1053 0 : }
1054 : {real_junk} {
1055 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1056 0 : }
1057 :
1058 : :{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
1059 657 : base_yylval.str = loc_strdup(yytext + 1);
1060 657 : return CVARIABLE;
1061 657 : }
1062 :
1063 : {identifier} {
1064 5943 : /* First check to see if it's a define symbol to expand */
1065 : if (!isdefine())
1066 5943 : {
1067 : int kwvalue;
1068 :
1069 : /*
1070 : * User-defined typedefs override SQL keywords, but
1071 : * not C keywords. Currently, a typedef name is just
1072 : * reported as IDENT, but someday we might need to
1073 : * return a distinct token type.
1074 : */
1075 : if (get_typedef(yytext, true) == NULL)
1076 5864 : {
1077 : /* Is it an SQL/ECPG keyword? */
1078 : kwvalue = ScanECPGKeywordLookup(yytext);
1079 5860 : if (kwvalue >= 0)
1080 5860 : return kwvalue;
1081 4165 : }
1082 :
1083 : /* Is it a C keyword? */
1084 : kwvalue = ScanCKeywordLookup(yytext);
1085 1699 : if (kwvalue >= 0)
1086 1699 : return kwvalue;
1087 8 :
1088 : /*
1089 : * None of the above. Return it as an identifier.
1090 : *
1091 : * The backend will attempt to truncate and case-fold
1092 : * the identifier, but I see no good reason for ecpg
1093 : * to do so; that's just another way that ecpg could
1094 : * get out of step with the backend.
1095 : */
1096 : base_yylval.str = loc_strdup(yytext);
1097 1691 : return IDENT;
1098 1691 : }
1099 : }
1100 :
1101 79 : {other} {
1102 18 : return yytext[0];
1103 18 : }
1104 : } /* <SQL> */
1105 :
1106 : /*
1107 : * Begin ECPG-specific rules
1108 : */
1109 :
1110 1339 : <C>{exec_sql} {
1111 : BEGIN(SQL);
1112 1339 : return SQL_START;
1113 1339 : }
1114 : <C>{informix_special} {
1115 : /* are we simulating Informix? */
1116 52 : if (INFORMIX_MODE)
1117 52 : {
1118 : BEGIN(SQL);
1119 52 : return SQL_START;
1120 52 : }
1121 : else
1122 : return S_ANYTHING;
1123 0 : }
1124 : <C>{ccomment} {
1125 : ECHO;
1126 5 : }
1127 : <C>{cppinclude} {
1128 5 : if (system_includes)
1129 228 : {
1130 : include_next = false;
1131 2 : BEGIN(incl);
1132 2 : }
1133 : else
1134 : {
1135 : base_yylval.str = loc_strdup(yytext);
1136 226 : return CPP_LINE;
1137 226 : }
1138 : }
1139 : <C>{cppinclude_next} {
1140 2 : if (system_includes)
1141 0 : {
1142 : include_next = true;
1143 0 : BEGIN(incl);
1144 0 : }
1145 : else
1146 : {
1147 : base_yylval.str = loc_strdup(yytext);
1148 0 : return CPP_LINE;
1149 0 : }
1150 : }
1151 : <C,SQL>{cppline} {
1152 0 : base_yylval.str = loc_strdup(yytext);
1153 504 : return CPP_LINE;
1154 504 : }
1155 : <C>{identifier} {
1156 : /*
1157 8658 : * Try to detect a function name:
1158 : * look for identifiers at the global scope
1159 : * keep the last identifier before the first '(' and '{'
1160 : */
1161 : if (braces_open == 0 && parenths_open == 0)
1162 8658 : {
1163 : if (current_function)
1164 988 : free(current_function);
1165 840 : current_function = mm_strdup(yytext);
1166 988 : }
1167 : /* Informix uses SQL defines only in SQL space */
1168 : /* however, some defines have to be taken care of for compatibility */
1169 : if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine())
1170 8658 : {
1171 : int kwvalue;
1172 :
1173 : kwvalue = ScanCKeywordLookup(yytext);
1174 8651 : if (kwvalue >= 0)
1175 8651 : return kwvalue;
1176 941 : else
1177 : {
1178 : base_yylval.str = loc_strdup(yytext);
1179 7710 : return IDENT;
1180 7710 : }
1181 : }
1182 : }
1183 : <C>{xcstop} {
1184 7 : mmerror(PARSE_ERROR, ET_ERROR, "nested /* ... */ comments");
1185 0 : }
1186 : <C>":" { return ':'; }
1187 0 : <C>";" { return ';'; }
1188 97 : <C>"," { return ','; }
1189 2738 : <C>"*" { return '*'; }
1190 1869 : <C>"%" { return '%'; }
1191 401 : <C>"/" { return '/'; }
1192 0 : <C>"+" { return '+'; }
1193 0 : <C>"-" { return '-'; }
1194 13 : <C>"(" { parenths_open++; return '('; }
1195 89 : <C>")" { parenths_open--; return ')'; }
1196 2125 : <C,xskip>{space} { ECHO; }
1197 2125 : <C>\{ { return '{'; }
1198 22816 : <C>\} { return '}'; }
1199 351 : <C>\[ { return '['; }
1200 350 : <C>\] { return ']'; }
1201 544 : <C>\= { return '='; }
1202 544 : <C>"->" { return S_MEMBER; }
1203 662 : <C>">>" { return S_RSHIFT; }
1204 101 : <C>"<<" { return S_LSHIFT; }
1205 1 : <C>"||" { return S_OR; }
1206 0 : <C>"&&" { return S_AND; }
1207 5 : <C>"++" { return S_INC; }
1208 14 : <C>"--" { return S_DEC; }
1209 96 : <C>"==" { return S_EQUAL; }
1210 1 : <C>"!=" { return S_NEQUAL; }
1211 55 : <C>"+=" { return S_ADD; }
1212 25 : <C>"-=" { return S_SUB; }
1213 0 : <C>"*=" { return S_MUL; }
1214 0 : <C>"/=" { return S_DIV; }
1215 0 : <C>"%=" { return S_MOD; }
1216 0 : <C>"->*" { return S_MEMPOINT; }
1217 0 : <C>".*" { return S_DOTPOINT; }
1218 0 : <C>{other} { return S_ANYTHING; }
1219 0 : <C>{exec_sql}{define}{space}* { BEGIN(def_ident); }
1220 1180 : <C>{informix_special}{define}{space}* {
1221 250 : /* are we simulating Informix? */
1222 2 : if (INFORMIX_MODE)
1223 2 : {
1224 : BEGIN(def_ident);
1225 2 : }
1226 : else
1227 : {
1228 : yyless(1);
1229 0 : return S_ANYTHING;
1230 0 : }
1231 : }
1232 : <C>{exec_sql}{undef}{space}* {
1233 2 : BEGIN(undef);
1234 2 : }
1235 : <C>{informix_special}{undef}{space}* {
1236 2 : /* are we simulating Informix? */
1237 0 : if (INFORMIX_MODE)
1238 0 : {
1239 : BEGIN(undef);
1240 0 : }
1241 : else
1242 : {
1243 : yyless(1);
1244 0 : return S_ANYTHING;
1245 0 : }
1246 : }
1247 : <undef>{identifier}{space}*";" {
1248 0 : struct _defines *ptr,
1249 2 : *ptr2 = NULL;
1250 2 : int i;
1251 :
1252 : /*
1253 : * Skip the ";" and trailing whitespace. Note that yytext
1254 : * contains at least one non-space character plus the ";"
1255 : */
1256 : for (i = strlen(yytext) - 2;
1257 2 : i > 0 && ecpg_isspace(yytext[i]);
1258 2 : i--)
1259 0 : ;
1260 : yytext[i + 1] = '\0';
1261 2 :
1262 : /* Find and unset any matching define; should be only 1 */
1263 : for (ptr = defines; ptr; ptr2 = ptr, ptr = ptr->next)
1264 11 : {
1265 : if (strcmp(yytext, ptr->name) == 0)
1266 11 : {
1267 : free(ptr->value);
1268 2 : ptr->value = NULL;
1269 2 : /* We cannot forget it if there's a cmdvalue */
1270 : if (ptr->cmdvalue == NULL)
1271 2 : {
1272 : if (ptr2 == NULL)
1273 1 : defines = ptr->next;
1274 0 : else
1275 : ptr2->next = ptr->next;
1276 1 : free(ptr->name);
1277 1 : free(ptr);
1278 1 : }
1279 : break;
1280 2 : }
1281 : }
1282 :
1283 : BEGIN(C);
1284 2 : }
1285 : <undef>{other}|\n {
1286 2 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL UNDEF command");
1287 0 : yyterminate();
1288 : }
1289 : <C>{exec_sql}{include}{space}* {
1290 : BEGIN(incl);
1291 85 : }
1292 : <C>{informix_special}{include}{space}* {
1293 85 : /* are we simulating Informix? */
1294 2 : if (INFORMIX_MODE)
1295 2 : {
1296 : BEGIN(incl);
1297 2 : }
1298 : else
1299 : {
1300 : yyless(1);
1301 0 : return S_ANYTHING;
1302 0 : }
1303 : }
1304 : <C,xskip>{exec_sql}{ifdef}{space}* {
1305 2 : if (preproc_tos >= MAX_NESTED_IF - 1)
1306 5 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1307 0 : preproc_tos++;
1308 5 : stacked_if_value[preproc_tos].active = false;
1309 5 : stacked_if_value[preproc_tos].saw_active = false;
1310 5 : stacked_if_value[preproc_tos].else_branch = false;
1311 5 : ifcond = true;
1312 5 : BEGIN(xcond);
1313 5 : }
1314 : <C,xskip>{informix_special}{ifdef}{space}* {
1315 5 : /* are we simulating Informix? */
1316 0 : if (INFORMIX_MODE)
1317 0 : {
1318 : if (preproc_tos >= MAX_NESTED_IF - 1)
1319 0 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1320 0 : preproc_tos++;
1321 0 : stacked_if_value[preproc_tos].active = false;
1322 0 : stacked_if_value[preproc_tos].saw_active = false;
1323 0 : stacked_if_value[preproc_tos].else_branch = false;
1324 0 : ifcond = true;
1325 0 : BEGIN(xcond);
1326 0 : }
1327 : else
1328 : {
1329 : yyless(1);
1330 0 : return S_ANYTHING;
1331 0 : }
1332 : }
1333 : <C,xskip>{exec_sql}{ifndef}{space}* {
1334 0 : if (preproc_tos >= MAX_NESTED_IF - 1)
1335 4 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1336 0 : preproc_tos++;
1337 4 : stacked_if_value[preproc_tos].active = false;
1338 4 : stacked_if_value[preproc_tos].saw_active = false;
1339 4 : stacked_if_value[preproc_tos].else_branch = false;
1340 4 : ifcond = false;
1341 4 : BEGIN(xcond);
1342 4 : }
1343 : <C,xskip>{informix_special}{ifndef}{space}* {
1344 4 : /* are we simulating Informix? */
1345 0 : if (INFORMIX_MODE)
1346 0 : {
1347 : if (preproc_tos >= MAX_NESTED_IF - 1)
1348 0 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1349 0 : preproc_tos++;
1350 0 : stacked_if_value[preproc_tos].active = false;
1351 0 : stacked_if_value[preproc_tos].saw_active = false;
1352 0 : stacked_if_value[preproc_tos].else_branch = false;
1353 0 : ifcond = false;
1354 0 : BEGIN(xcond);
1355 0 : }
1356 : else
1357 : {
1358 : yyless(1);
1359 0 : return S_ANYTHING;
1360 0 : }
1361 : }
1362 : <C,xskip>{exec_sql}{elif}{space}* {
1363 0 : if (preproc_tos == 0)
1364 5 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1365 0 : if (stacked_if_value[preproc_tos].else_branch)
1366 5 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1367 0 : ifcond = true;
1368 5 : BEGIN(xcond);
1369 5 : }
1370 : <C,xskip>{informix_special}{elif}{space}* {
1371 5 : /* are we simulating Informix? */
1372 0 : if (INFORMIX_MODE)
1373 0 : {
1374 : if (preproc_tos == 0)
1375 0 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1376 0 : if (stacked_if_value[preproc_tos].else_branch)
1377 0 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1378 0 : ifcond = true;
1379 0 : BEGIN(xcond);
1380 0 : }
1381 : else
1382 : {
1383 : yyless(1);
1384 0 : return S_ANYTHING;
1385 0 : }
1386 : }
1387 :
1388 0 : <C,xskip>{exec_sql}{else}{space}*";" {
1389 4 : /* only exec sql endif pops the stack, so take care of duplicated 'else' */
1390 : if (preproc_tos == 0)
1391 4 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1392 0 : else if (stacked_if_value[preproc_tos].else_branch)
1393 4 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1394 0 : else
1395 : {
1396 : stacked_if_value[preproc_tos].else_branch = true;
1397 4 : stacked_if_value[preproc_tos].active =
1398 4 : (stacked_if_value[preproc_tos - 1].active &&
1399 8 : !stacked_if_value[preproc_tos].saw_active);
1400 4 : stacked_if_value[preproc_tos].saw_active = true;
1401 4 :
1402 : if (stacked_if_value[preproc_tos].active)
1403 4 : BEGIN(C);
1404 3 : else
1405 : BEGIN(xskip);
1406 1 : }
1407 : }
1408 : <C,xskip>{informix_special}{else}{space}*";" {
1409 4 : /* are we simulating Informix? */
1410 0 : if (INFORMIX_MODE)
1411 0 : {
1412 : if (preproc_tos == 0)
1413 0 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1414 0 : else if (stacked_if_value[preproc_tos].else_branch)
1415 0 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1416 0 : else
1417 : {
1418 : stacked_if_value[preproc_tos].else_branch = true;
1419 0 : stacked_if_value[preproc_tos].active =
1420 0 : (stacked_if_value[preproc_tos - 1].active &&
1421 0 : !stacked_if_value[preproc_tos].saw_active);
1422 0 : stacked_if_value[preproc_tos].saw_active = true;
1423 0 :
1424 : if (stacked_if_value[preproc_tos].active)
1425 0 : BEGIN(C);
1426 0 : else
1427 : BEGIN(xskip);
1428 0 : }
1429 : }
1430 : else
1431 : {
1432 : yyless(1);
1433 0 : return S_ANYTHING;
1434 0 : }
1435 : }
1436 : <C,xskip>{exec_sql}{endif}{space}*";" {
1437 0 : if (preproc_tos == 0)
1438 9 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1439 0 : else
1440 : preproc_tos--;
1441 9 :
1442 : if (stacked_if_value[preproc_tos].active)
1443 9 : BEGIN(C);
1444 9 : else
1445 : BEGIN(xskip);
1446 0 : }
1447 : <C,xskip>{informix_special}{endif}{space}*";" {
1448 9 : /* are we simulating Informix? */
1449 0 : if (INFORMIX_MODE)
1450 0 : {
1451 : if (preproc_tos == 0)
1452 0 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1453 0 : else
1454 : preproc_tos--;
1455 0 :
1456 : if (stacked_if_value[preproc_tos].active)
1457 0 : BEGIN(C);
1458 0 : else
1459 : BEGIN(xskip);
1460 0 : }
1461 : else
1462 : {
1463 : yyless(1);
1464 0 : return S_ANYTHING;
1465 0 : }
1466 : }
1467 :
1468 0 : <xskip>{other} { /* ignore */ }
1469 341 :
1470 341 : <xcond>{identifier}{space}*";" {
1471 14 : {
1472 : struct _defines *defptr;
1473 : unsigned int i;
1474 : bool this_active;
1475 :
1476 : /*
1477 : * Skip the ";" and trailing whitespace. Note that
1478 : * yytext contains at least one non-space character
1479 : * plus the ";"
1480 : */
1481 : for (i = strlen(yytext) - 2;
1482 14 : i > 0 && ecpg_isspace(yytext[i]);
1483 14 : i--)
1484 0 : /* skip */ ;
1485 : yytext[i + 1] = '\0';
1486 14 :
1487 : /* Does a definition exist? */
1488 : for (defptr = defines; defptr; defptr = defptr->next)
1489 63 : {
1490 : if (strcmp(yytext, defptr->name) == 0)
1491 58 : {
1492 : /* Found it, but is it currently undefined? */
1493 : if (defptr->value == NULL)
1494 9 : defptr = NULL; /* pretend it's not found */
1495 1 : break;
1496 9 : }
1497 : }
1498 :
1499 : this_active = (defptr ? ifcond : !ifcond);
1500 14 : stacked_if_value[preproc_tos].active =
1501 14 : (stacked_if_value[preproc_tos - 1].active &&
1502 28 : !stacked_if_value[preproc_tos].saw_active &&
1503 14 : this_active);
1504 14 : stacked_if_value[preproc_tos].saw_active |= this_active;
1505 14 : }
1506 :
1507 : if (stacked_if_value[preproc_tos].active)
1508 14 : BEGIN(C);
1509 4 : else
1510 : BEGIN(xskip);
1511 10 : }
1512 :
1513 14 : <xcond>{other}|\n {
1514 0 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL IFDEF command");
1515 0 : yyterminate();
1516 : }
1517 : <def_ident>{identifier} {
1518 : newdefsymbol = mm_strdup(yytext);
1519 252 : BEGIN(def);
1520 252 : startlit();
1521 252 : }
1522 : <def_ident>{other}|\n {
1523 252 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL DEFINE command");
1524 0 : yyterminate();
1525 : }
1526 : <def>{space}*";" {
1527 : struct _defines *ptr;
1528 252 :
1529 : /* Does it already exist? */
1530 : for (ptr = defines; ptr != NULL; ptr = ptr->next)
1531 671 : {
1532 : if (strcmp(newdefsymbol, ptr->name) == 0)
1533 424 : {
1534 : free(ptr->value);
1535 5 : ptr->value = mm_strdup(literalbuf);
1536 5 : /* Don't leak newdefsymbol */
1537 : free(newdefsymbol);
1538 5 : break;
1539 5 : }
1540 : }
1541 : if (ptr == NULL)
1542 252 : {
1543 : /* Not present, make a new entry */
1544 : ptr = (struct _defines *) mm_alloc(sizeof(struct _defines));
1545 247 :
1546 : ptr->name = newdefsymbol;
1547 247 : ptr->value = mm_strdup(literalbuf);
1548 247 : ptr->cmdvalue = NULL;
1549 247 : ptr->used = NULL;
1550 247 : ptr->next = defines;
1551 247 : defines = ptr;
1552 247 : }
1553 :
1554 : BEGIN(C);
1555 252 : }
1556 : <def>[^;] { addlit(yytext, yyleng); }
1557 252 : <incl>\<[^\>]+\>{space}*";"? { parse_include(); }
1558 4301 : <incl>{dquote}{xdinside}{dquote}{space}*";"? { parse_include(); }
1559 2 : <incl>[^;\<\>\"]+";" { parse_include(); }
1560 0 : <incl>{other}|\n {
1561 87 : mmfatal(PARSE_ERROR, "syntax error in EXEC SQL INCLUDE command");
1562 0 : yyterminate();
1563 : }
1564 :
1565 : <<EOF>> {
1566 244 : if (yy_buffer == NULL)
1567 244 : {
1568 : /* No more input */
1569 : if (preproc_tos > 0)
1570 69 : {
1571 : preproc_tos = 0;
1572 0 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1573 0 : }
1574 : yyterminate();
1575 69 : }
1576 : else
1577 : {
1578 : /* Revert to previous input source */
1579 : struct _yy_buffer *yb = yy_buffer;
1580 175 : int i;
1581 : struct _defines *ptr;
1582 :
1583 : /* Check to see if we are exiting a macro value */
1584 : for (ptr = defines; ptr; ptr = ptr->next)
1585 695 : {
1586 : if (ptr->used == yy_buffer)
1587 605 : {
1588 : ptr->used = NULL;
1589 85 : break; /* there can't be multiple matches */
1590 85 : }
1591 : }
1592 :
1593 : if (yyin != NULL)
1594 175 : fclose(yyin);
1595 89 :
1596 : yy_delete_buffer(YY_CURRENT_BUFFER);
1597 175 : yy_switch_to_buffer(yy_buffer->buffer);
1598 175 :
1599 : yylineno = yy_buffer->lineno;
1600 175 :
1601 : /* We have to output the filename only if we change files here */
1602 : i = strcmp(input_filename, yy_buffer->filename);
1603 175 :
1604 : free(input_filename);
1605 175 : input_filename = yy_buffer->filename;
1606 175 :
1607 : yy_buffer = yy_buffer->next;
1608 175 : free(yb);
1609 175 :
1610 : if (i != 0)
1611 175 : output_line_number();
1612 89 : }
1613 : }
1614 :
1615 175 : <INITIAL>{other}|\n {
1616 0 : mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <%s>", PACKAGE_BUGREPORT);
1617 0 : }
1618 :
1619 : %%
1620 0 :
1621 : /* LCOV_EXCL_STOP */
1622 :
1623 : void
1624 : lex_init(void)
1625 70 : {
1626 : braces_open = 0;
1627 70 : parenths_open = 0;
1628 70 : current_function = NULL;
1629 70 :
1630 : yylineno = 1;
1631 70 :
1632 : /* initialize state for if/else/endif */
1633 : preproc_tos = 0;
1634 70 : stacked_if_value[preproc_tos].active = true;
1635 70 : stacked_if_value[preproc_tos].saw_active = true;
1636 70 : stacked_if_value[preproc_tos].else_branch = false;
1637 70 :
1638 : /* initialize literal buffer to a reasonable but expansible size */
1639 : if (literalbuf == NULL)
1640 70 : {
1641 : literalalloc = 1024;
1642 69 : literalbuf = (char *) mm_alloc(literalalloc);
1643 69 : }
1644 : startlit();
1645 70 :
1646 : BEGIN(C);
1647 70 : }
1648 70 :
1649 : static void
1650 : addlit(char *ytext, int yleng)
1651 24386 : {
1652 : /* enlarge buffer if needed */
1653 : if ((literallen + yleng) >= literalalloc)
1654 24386 : {
1655 : do
1656 : literalalloc *= 2;
1657 0 : while ((literallen + yleng) >= literalalloc);
1658 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1659 0 : }
1660 : /* append new data, add trailing null */
1661 : memcpy(literalbuf + literallen, ytext, yleng);
1662 24386 : literallen += yleng;
1663 24386 : literalbuf[literallen] = '\0';
1664 24386 : }
1665 24386 :
1666 : static void
1667 : addlitchar(unsigned char ychar)
1668 0 : {
1669 : /* enlarge buffer if needed */
1670 : if ((literallen + 1) >= literalalloc)
1671 0 : {
1672 : literalalloc *= 2;
1673 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1674 0 : }
1675 : /* append new data, add trailing null */
1676 : literalbuf[literallen] = ychar;
1677 0 : literallen += 1;
1678 0 : literalbuf[literallen] = '\0';
1679 0 : }
1680 0 :
1681 : /*
1682 : * Process {decinteger}, {hexinteger}, etc. Note this will also do the right
1683 : * thing with {numeric}, ie digits and a decimal point.
1684 : */
1685 : static int
1686 : process_integer_literal(const char *token, YYSTYPE *lval, int base)
1687 1254 : {
1688 : int val;
1689 : char *endptr;
1690 :
1691 : errno = 0;
1692 1254 : val = strtoint(base == 10 ? token : token + 2, &endptr, base);
1693 1254 : if (*endptr != '\0' || errno == ERANGE)
1694 1254 : {
1695 : /* integer too large (or contains decimal pt), treat it as a float */
1696 : lval->str = loc_strdup(token);
1697 6 : return FCONST;
1698 6 : }
1699 : lval->ival = val;
1700 1248 : return ICONST;
1701 1248 : }
1702 :
1703 : static void
1704 : parse_include(void)
1705 89 : {
1706 : /* got the include file name */
1707 : struct _yy_buffer *yb;
1708 : struct _include_path *ip;
1709 : char inc_file[MAXPGPATH];
1710 : unsigned int i;
1711 :
1712 : yb = mm_alloc(sizeof(struct _yy_buffer));
1713 89 :
1714 : yb->buffer = YY_CURRENT_BUFFER;
1715 89 : yb->lineno = yylineno;
1716 89 : yb->filename = input_filename;
1717 89 : yb->next = yy_buffer;
1718 89 :
1719 : yy_buffer = yb;
1720 89 :
1721 : /*
1722 : * skip the ";" if there is one and trailing whitespace. Note that yytext
1723 : * contains at least one non-space character plus the ";"
1724 : */
1725 : for (i = strlen(yytext) - 2;
1726 89 : i > 0 && ecpg_isspace(yytext[i]);
1727 90 : i--)
1728 1 : ;
1729 :
1730 : if (yytext[i] == ';')
1731 89 : i--;
1732 0 :
1733 : yytext[i + 1] = '\0';
1734 89 :
1735 : yyin = NULL;
1736 89 :
1737 : /* If file name is enclosed in '"' remove these and look only in '.' */
1738 :
1739 : /*
1740 : * Informix does look into all include paths though, except filename
1741 : * starts with '/'
1742 : */
1743 : if (yytext[0] == '"' && yytext[i] == '"' &&
1744 89 : ((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/'))
1745 0 : {
1746 : yytext[i] = '\0';
1747 0 : memmove(yytext, yytext + 1, strlen(yytext));
1748 0 :
1749 : strlcpy(inc_file, yytext, sizeof(inc_file));
1750 0 : yyin = fopen(inc_file, "r");
1751 0 : if (!yyin)
1752 0 : {
1753 : if (strlen(inc_file) <= 2 || strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1754 0 : {
1755 : strcat(inc_file, ".h");
1756 0 : yyin = fopen(inc_file, "r");
1757 0 : }
1758 : }
1759 :
1760 : }
1761 : else
1762 : {
1763 : if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>'))
1764 89 : {
1765 : yytext[i] = '\0';
1766 2 : memmove(yytext, yytext + 1, strlen(yytext));
1767 2 : }
1768 :
1769 : for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
1770 242 : {
1771 : if (strlen(ip->path) + strlen(yytext) + 4 > MAXPGPATH)
1772 153 : {
1773 : fprintf(stderr, _("Error: include path \"%s/%s\" is too long on line %d, skipping\n"), ip->path, yytext, yylineno);
1774 0 : continue;
1775 0 : }
1776 : snprintf(inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext);
1777 153 : yyin = fopen(inc_file, "r");
1778 153 : if (!yyin)
1779 153 : {
1780 : if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1781 141 : {
1782 : strcat(inc_file, ".h");
1783 138 : yyin = fopen(inc_file, "r");
1784 138 : }
1785 : }
1786 :
1787 : /*
1788 : * if the command was "include_next" we have to disregard the
1789 : * first hit
1790 : */
1791 : if (yyin && include_next)
1792 153 : {
1793 : fclose(yyin);
1794 0 : yyin = NULL;
1795 0 : include_next = false;
1796 0 : }
1797 : }
1798 : }
1799 : if (!yyin)
1800 89 : mmfatal(NO_INCLUDE_FILE, "could not open include file \"%s\" on line %d", yytext, yylineno);
1801 0 :
1802 : input_filename = mm_strdup(inc_file);
1803 89 : yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1804 89 : yylineno = 1;
1805 89 : output_line_number();
1806 89 :
1807 : BEGIN(C);
1808 89 : }
1809 89 :
1810 : /*
1811 : * ecpg_isspace() --- return true if flex scanner considers char whitespace
1812 : */
1813 : static bool
1814 : ecpg_isspace(char ch)
1815 106 : {
1816 : if (ch == ' ' ||
1817 106 : ch == '\t' ||
1818 106 : ch == '\n' ||
1819 105 : ch == '\r' ||
1820 105 : ch == '\f' ||
1821 105 : ch == '\v')
1822 : return true;
1823 1 : return false;
1824 105 : }
1825 :
1826 : /*
1827 : * If yytext matches a define symbol, begin scanning the symbol's value
1828 : * and return true
1829 : */
1830 : static bool
1831 : isdefine(void)
1832 14600 : {
1833 : struct _defines *ptr;
1834 :
1835 : /* is it a define? */
1836 : for (ptr = defines; ptr; ptr = ptr->next)
1837 66353 : {
1838 : /* notice we do not match anything being actively expanded */
1839 : if (strcmp(yytext, ptr->name) == 0 &&
1840 51838 : ptr->value != NULL &&
1841 85 : ptr->used == NULL)
1842 85 : {
1843 : /* Save state associated with the current buffer */
1844 : struct _yy_buffer *yb;
1845 :
1846 : yb = mm_alloc(sizeof(struct _yy_buffer));
1847 85 :
1848 : yb->buffer = YY_CURRENT_BUFFER;
1849 85 : yb->lineno = yylineno;
1850 85 : yb->filename = mm_strdup(input_filename);
1851 85 : yb->next = yy_buffer;
1852 85 : yy_buffer = yb;
1853 85 :
1854 : /* Mark symbol as being actively expanded */
1855 : ptr->used = yb;
1856 85 :
1857 : /*
1858 : * We use yy_scan_string which will copy the value, so there's no
1859 : * need to worry about a possible undef happening while we are
1860 : * still scanning it.
1861 : */
1862 : yy_scan_string(ptr->value);
1863 85 : return true;
1864 85 : }
1865 : }
1866 :
1867 : return false;
1868 14515 : }
1869 :
1870 : /*
1871 : * Handle replacement of INFORMIX built-in defines. This works just
1872 : * like isdefine() except for the source of the string to scan.
1873 : */
1874 : static bool
1875 : isinformixdefine(void)
1876 1773 : {
1877 : const char *new = NULL;
1878 1773 :
1879 : if (strcmp(yytext, "dec_t") == 0)
1880 1773 : new = "decimal";
1881 1 : else if (strcmp(yytext, "intrvl_t") == 0)
1882 1772 : new = "interval";
1883 0 : else if (strcmp(yytext, "dtime_t") == 0)
1884 1772 : new = "timestamp";
1885 0 :
1886 : if (new)
1887 1773 : {
1888 : struct _yy_buffer *yb;
1889 :
1890 : yb = mm_alloc(sizeof(struct _yy_buffer));
1891 1 :
1892 : yb->buffer = YY_CURRENT_BUFFER;
1893 1 : yb->lineno = yylineno;
1894 1 : yb->filename = mm_strdup(input_filename);
1895 1 : yb->next = yy_buffer;
1896 1 : yy_buffer = yb;
1897 1 :
1898 : yy_scan_string(new);
1899 1 : return true;
1900 1 : }
1901 :
1902 : return false;
1903 1772 : }
1904 : /* END: function "isinformixdefine" */
|