Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * pgc.l
5 : * lexical scanner for ecpg
6 : *
7 : * This is a modified version of src/backend/parser/scan.l
8 : *
9 : * The ecpg scanner is not backup-free, so the fail rules are
10 : * only here to simplify syncing this file with scan.l.
11 : *
12 : *
13 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
14 : * Portions Copyright (c) 1994, Regents of the University of California
15 : *
16 : * IDENTIFICATION
17 : * src/interfaces/ecpg/preproc/pgc.l
18 : *
19 : *-------------------------------------------------------------------------
20 : */
21 : #include "postgres_fe.h"
22 :
23 : #include <ctype.h>
24 : #include <limits.h>
25 :
26 : #include "common/string.h"
27 :
28 : #include "preproc_extern.h"
29 : #include "preproc.h"
30 : }
31 :
32 : %{
33 :
34 : /* LCOV_EXCL_START */
35 :
36 : extern YYSTYPE base_yylval;
37 :
38 : static int xcdepth = 0; /* depth of nesting in slash-star comments */
39 : static char *dolqstart = NULL; /* current $foo$ quote start string */
40 :
41 : /*
42 : * literalbuf is used to accumulate literal values when multiple rules
43 : * are needed to parse a single literal. Call startlit to reset buffer
44 : * to empty, addlit to add text. Note that the buffer is permanently
45 : * malloc'd to the largest size needed so far in the current run.
46 : */
47 : static char *literalbuf = NULL; /* expandable buffer */
48 : static int literallen; /* actual current length */
49 : static int literalalloc; /* current allocated buffer size */
50 :
51 : /* Used for detecting global state together with braces_open */
52 : static int parenths_open;
53 :
54 : /* Used to tell parse_include() whether the command was #include or #include_next */
55 : static bool include_next;
56 :
57 : #define startlit() (literalbuf[0] = '\0', literallen = 0)
58 : static void addlit(char *ytext, int yleng);
59 : static void addlitchar(unsigned char ychar);
60 : static int process_integer_literal(const char *token, YYSTYPE *lval, int base);
61 : static void parse_include(void);
62 : static bool ecpg_isspace(char ch);
63 : static bool isdefine(void);
64 : static bool isinformixdefine(void);
65 :
66 : char *token_start;
67 :
68 : /* vars to keep track of start conditions when scanning literals */
69 : static int state_before_str_start;
70 : static int state_before_str_stop;
71 :
72 : /*
73 : * State for handling include files and macro expansion. We use a new
74 : * flex input buffer for each level of include or macro, and create a
75 : * struct _yy_buffer to remember the previous level. There is not a struct
76 : * for the currently active input source; that state is kept in the global
77 : * variables YY_CURRENT_BUFFER, yylineno, and input_filename.
78 : */
79 : static struct _yy_buffer
80 : {
81 : YY_BUFFER_STATE buffer;
82 : long lineno;
83 : char *filename;
84 : struct _yy_buffer *next;
85 : } *yy_buffer = NULL;
86 :
87 : /*
88 : * Vars for handling ifdef/elif/endif constructs. preproc_tos is the current
89 : * nesting depth of such constructs, and stacked_if_value[preproc_tos] is the
90 : * state for the innermost level. (For convenience, stacked_if_value[0] is
91 : * initialized as though we are in the active branch of some outermost IF.)
92 : * The active field is true if the current branch is active (being expanded).
93 : * The saw_active field is true if we have found any successful branch,
94 : * so that all subsequent branches of this level should be skipped.
95 : * The else_branch field is true if we've found an 'else' (so that another
96 : * 'else' or 'elif' at this level is an error.)
97 : * For IFs nested within an inactive branch, all branches always have active
98 : * set to false, but saw_active and else_branch are maintained normally.
99 : * ifcond is valid only while evaluating an if-condition; it's true if we
100 : * are doing ifdef, false if ifndef.
101 : */
102 : #define MAX_NESTED_IF 128
103 : static short preproc_tos;
104 : static bool ifcond;
105 : static struct _if_value
106 : {
107 : bool active;
108 : bool saw_active;
109 : bool else_branch;
110 : } stacked_if_value[MAX_NESTED_IF];
111 :
112 : %}
113 :
114 : %option 8bit
115 : %option never-interactive
116 : %option nodefault
117 : %option noinput
118 : %option noyywrap
119 : %option warn
120 : %option yylineno
121 : %option prefix="base_yy"
122 :
123 : /*
124 : * OK, here is a short description of lex/flex rules behavior.
125 : * The longest pattern which matches an input string is always chosen.
126 : * For equal-length patterns, the first occurring in the rules list is chosen.
127 : * INITIAL is the starting state, to which all non-conditional rules apply.
128 : * Exclusive states change parsing rules while the state is active. When in
129 : * an exclusive state, only those rules defined for that state apply.
130 : *
131 : * We use exclusive states for quoted strings, extended comments,
132 : * and to eliminate parsing troubles for numeric strings.
133 : * Exclusive states:
134 : * <xb> bit string literal
135 : * <xc> extended C-style comments
136 : * <xd> delimited identifiers (double-quoted identifiers)
137 : * <xdc> double-quoted strings in C
138 : * <xh> hexadecimal byte string
139 : * <xn> national character quoted strings
140 : * <xq> standard quoted strings
141 : * <xqs> quote stop (detect continued strings)
142 : * <xe> extended quoted strings (support backslash escape sequences)
143 : * <xqc> single-quoted strings in C
144 : * <xdolq> $foo$ quoted strings
145 : * <xui> quoted identifier with Unicode escapes
146 : * <xus> quoted string with Unicode escapes
147 : * <xcond> condition of an EXEC SQL IFDEF construct
148 : * <xskip> skipping the inactive part of an EXEC SQL IFDEF construct
149 : *
150 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
151 : * no need to distinguish it from <xe> state.
152 : *
153 : * Remember to add an <<EOF>> case whenever you add a new exclusive state!
154 : * The default one is probably not the right thing.
155 : */
156 :
157 : %x xb
158 : %x xc
159 : %x xd
160 : %x xdc
161 : %x xh
162 : %x xn
163 : %x xq
164 : %x xqs
165 : %x xe
166 : %x xqc
167 : %x xdolq
168 : %x xui
169 : %x xus
170 : %x xcond
171 : %x xskip
172 :
173 : /* Additional exclusive states that are specific to ECPG */
174 : %x C SQL incl def def_ident undef
175 :
176 : /*
177 : * In order to make the world safe for Windows and Mac clients as well as
178 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
179 : * sequence will be seen as two successive newlines, but that doesn't cause
180 : * any problems. SQL-style comments, which start with -- and extend to the
181 : * next newline, are treated as equivalent to a single whitespace character.
182 : *
183 : * NOTE a fine point: if there is no newline following --, we will absorb
184 : * everything to the end of the input as a comment. This is correct. Older
185 : * versions of Postgres failed to recognize -- as a comment if the input
186 : * did not end with a newline.
187 : *
188 : * non_newline_space tracks all space characters except newlines.
189 : *
190 : * XXX if you change the set of whitespace characters, fix ecpg_isspace()
191 : * to agree.
192 : */
193 :
194 : space [ \t\n\r\f\v]
195 : non_newline_space [ \t\f\v]
196 : newline [\n\r]
197 : non_newline [^\n\r]
198 :
199 : comment ("--"{non_newline}*)
200 :
201 : whitespace ({space}+|{comment})
202 :
203 : /*
204 : * SQL requires at least one newline in the whitespace separating
205 : * string literals that are to be concatenated. Silly, but who are we
206 : * to argue? Note that {whitespace_with_newline} should not have * after
207 : * it, whereas {whitespace} should generally have a * after it...
208 : */
209 :
210 : non_newline_whitespace ({non_newline_space}|{comment})
211 : whitespace_with_newline ({non_newline_whitespace}*{newline}{whitespace}*)
212 :
213 : quote '
214 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
215 : quotecontinue {whitespace_with_newline}{quote}
216 :
217 : /*
218 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
219 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
220 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
221 : * to see if there's another dash --- which would start a {comment} and thus
222 : * allow continuation of the {quotecontinue} token.
223 : */
224 : quotecontinuefail {whitespace}*"-"?
225 :
226 : /* Bit string
227 : */
228 : xbstart [bB]{quote}
229 : xbinside [^']*
230 :
231 : /* Hexadecimal byte string */
232 : xhstart [xX]{quote}
233 : xhinside [^']*
234 :
235 : /* National character */
236 : xnstart [nN]{quote}
237 :
238 : /* Quoted string that allows backslash escapes */
239 : xestart [eE]{quote}
240 : xeinside [^\\']+
241 : xeescape [\\][^0-7]
242 : xeoctesc [\\][0-7]{1,3}
243 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
244 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
245 :
246 : /* Extended quote
247 : * xqdouble implements embedded quote, ''''
248 : */
249 : xqstart {quote}
250 : xqdouble {quote}{quote}
251 : xqcquote [\\]{quote}
252 : xqinside [^']+
253 :
254 : /* $foo$ style quotes ("dollar quoting")
255 : * The quoted string starts with $foo$ where "foo" is an optional string
256 : * in the form of an identifier, except that it may not contain "$",
257 : * and extends to the first occurrence of an identical string.
258 : * There is *no* processing of the quoted text.
259 : *
260 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
261 : * fails to match its trailing "$".
262 : */
263 : dolq_start [A-Za-z\200-\377_]
264 : dolq_cont [A-Za-z\200-\377_0-9]
265 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
266 : dolqfailed \${dolq_start}{dolq_cont}*
267 : dolqinside [^$]+
268 :
269 : /* Double quote
270 : * Allows embedded spaces and other special characters into identifiers.
271 : */
272 : dquote \"
273 : xdstart {dquote}
274 : xdstop {dquote}
275 : xddouble {dquote}{dquote}
276 : xdinside [^"]+
277 :
278 : /* Quoted identifier with Unicode escapes */
279 : xuistart [uU]&{dquote}
280 :
281 : /* Quoted string with Unicode escapes */
282 : xusstart [uU]&{quote}
283 :
284 : /* special stuff for C strings */
285 : xdcqq \\\\
286 : xdcqdq \\\"
287 : xdcother [^"]
288 : xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
289 :
290 :
291 : /* C-style comments
292 : *
293 : * The "extended comment" syntax closely resembles allowable operator syntax.
294 : * The tricky part here is to get lex to recognize a string starting with
295 : * slash-star as a comment, when interpreting it as an operator would produce
296 : * a longer match --- remember lex will prefer a longer match! Also, if we
297 : * have something like plus-slash-star, lex will think this is a 3-character
298 : * operator whereas we want to see it as a + operator and a comment start.
299 : * The solution is two-fold:
300 : * 1. append {op_chars}* to xcstart so that it matches as much text as
301 : * {operator} would. Then the tie-breaker (first matching rule of same
302 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
303 : * in case it contains a star-slash that should terminate the comment.
304 : * 2. In the operator rule, check for slash-star within the operator, and
305 : * if found throw it back with yyless(). This handles the plus-slash-star
306 : * problem.
307 : * Dash-dash comments have similar interactions with the operator rule.
308 : */
309 : xcstart \/\*{op_chars}*
310 : xcstop \*+\/
311 : xcinside [^*/]+
312 :
313 : ident_start [A-Za-z\200-\377_]
314 : ident_cont [A-Za-z\200-\377_0-9\$]
315 :
316 : identifier {ident_start}{ident_cont}*
317 :
318 : array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
319 :
320 : /* Assorted special-case operators and operator-like tokens */
321 : typecast "::"
322 : dot_dot \.\.
323 : colon_equals ":="
324 :
325 : /*
326 : * These operator-like tokens (unlike the above ones) also match the {operator}
327 : * rule, which means that they might be overridden by a longer match if they
328 : * are followed by a comment start or a + or - character. Accordingly, if you
329 : * add to this list, you must also add corresponding code to the {operator}
330 : * block to return the correct token in such cases. (This is not needed in
331 : * psqlscan.l since the token value is ignored there.)
332 : */
333 : equals_greater "=>"
334 : less_equals "<="
335 : greater_equals ">="
336 : less_greater "<>"
337 : not_equals "!="
338 :
339 : /*
340 : * "self" is the set of chars that should be returned as single-character
341 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
342 : * which can be one or more characters long (but if a single-char token
343 : * appears in the "self" set, it is not to be returned as an Op). Note
344 : * that the sets overlap, but each has some chars that are not in the other.
345 : *
346 : * If you change either set, adjust the character lists appearing in the
347 : * rule for "operator"!
348 : */
349 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
350 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
351 : operator {op_chars}+
352 :
353 : /*
354 : * Numbers
355 : *
356 : * Unary minus is not part of a number here. Instead we pass it separately to
357 : * the parser, and there it gets coerced via doNegate().
358 : *
359 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
360 : *
361 : * {realfail} is added to prevent the need for scanner
362 : * backup when the {real} rule fails to match completely.
363 : */
364 : decdigit [0-9]
365 : hexdigit [0-9A-Fa-f]
366 : octdigit [0-7]
367 : bindigit [0-1]
368 :
369 : decinteger {decdigit}(_?{decdigit})*
370 : hexinteger 0[xX](_?{hexdigit})+
371 : octinteger 0[oO](_?{octdigit})+
372 : bininteger 0[bB](_?{bindigit})+
373 :
374 : hexfail 0[xX]_?
375 : octfail 0[oO]_?
376 : binfail 0[bB]_?
377 :
378 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
379 : numericfail {decdigit}+\.\.
380 :
381 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
382 : realfail ({decinteger}|{numeric})[Ee][-+]
383 :
384 : decinteger_junk {decinteger}{ident_start}
385 : hexinteger_junk {hexinteger}{ident_start}
386 : octinteger_junk {octinteger}{ident_start}
387 : bininteger_junk {bininteger}{ident_start}
388 : numeric_junk {numeric}{ident_start}
389 : real_junk {real}{ident_start}
390 :
391 : param \${decinteger}
392 : param_junk \${decinteger}{ident_start}
393 :
394 : /* special characters for other dbms */
395 : /* we have to react differently in compat mode */
396 : informix_special [\$]
397 :
398 : other .
399 :
400 : /*
401 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
402 : * Other quoted strings must allow some special characters such as single-quote
403 : * and newline.
404 : * Embedded single-quotes are implemented both in the SQL standard
405 : * style of two adjacent single quotes "''" and in the Postgres/Java style
406 : * of escaped-quote "\'".
407 : * Other embedded escaped characters are matched explicitly and the leading
408 : * backslash is dropped from the string.
409 : * Note that xcstart must appear before operator, as explained above!
410 : * Also whitespace (comment) must appear before operator.
411 : */
412 :
413 : /* some stuff needed for ecpg */
414 : exec [eE][xX][eE][cC]
415 : sql [sS][qQ][lL]
416 : define [dD][eE][fF][iI][nN][eE]
417 : include [iI][nN][cC][lL][uU][dD][eE]
418 : include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
419 : import [iI][mM][pP][oO][rR][tT]
420 : undef [uU][nN][dD][eE][fF]
421 :
422 : ccomment "//".*\n
423 :
424 : if [iI][fF]
425 : ifdef [iI][fF][dD][eE][fF]
426 : ifndef [iI][fF][nN][dD][eE][fF]
427 : else [eE][lL][sS][eE]
428 : elif [eE][lL][iI][fF]
429 : endif [eE][nN][dD][iI][fF]
430 :
431 : struct [sS][tT][rR][uU][cC][tT]
432 :
433 : exec_sql {exec}{space}*{sql}{space}*
434 : ipdigit ({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
435 : ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
436 :
437 : /* we might want to parse all cpp include files */
438 : cppinclude {space}*#{include}{space}*
439 : cppinclude_next {space}*#{include_next}{space}*
440 :
441 : /* take care of cpp lines, they may also be continued */
442 : /* first a general line for all commands not starting with "i" */
443 : /* and then the other commands starting with "i", we have to add these
444 : * separately because the cppline production would match on "include" too
445 : */
446 : cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
447 :
448 : %%
449 :
450 : %{
451 : /* code to execute during start of each call of yylex() */
452 : char *newdefsymbol = NULL;
453 :
454 : token_start = NULL;
455 : %}
456 :
457 : <SQL>{
458 : {whitespace} {
459 : /* ignore */
460 : }
461 : } /* <SQL> */
462 :
463 : <C,SQL>{
464 : {xcstart} {
465 : token_start = yytext;
466 : state_before_str_start = YYSTATE;
467 : xcdepth = 0;
468 : BEGIN(xc);
469 : /* Put back any characters past slash-star; see above */
470 : yyless(2);
471 : fputs("/*", yyout);
472 : }
473 : } /* <C,SQL> */
474 :
475 : <xc>{
476 : {xcstart} {
477 : if (state_before_str_start == SQL)
478 : {
479 : xcdepth++;
480 : /* Put back any characters past slash-star; see above */
481 : yyless(2);
482 : fputs("/_*", yyout);
483 : }
484 : else if (state_before_str_start == C)
485 : {
486 : ECHO;
487 : }
488 : }
489 :
490 : {xcstop} {
491 : if (state_before_str_start == SQL)
492 : {
493 : if (xcdepth <= 0)
494 : {
495 : ECHO;
496 : BEGIN(SQL);
497 : token_start = NULL;
498 : }
499 : else
500 : {
501 : xcdepth--;
502 : fputs("*_/", yyout);
503 : }
504 : }
505 : else if (state_before_str_start == C)
506 : {
507 : ECHO;
508 : BEGIN(C);
509 : token_start = NULL;
510 : }
511 : }
512 :
513 : {xcinside} {
514 : ECHO;
515 : }
516 :
517 : {op_chars} {
518 : ECHO;
519 : }
520 :
521 : \*+ {
522 : ECHO;
523 : }
524 :
525 : <<EOF>> {
526 : mmfatal(PARSE_ERROR, "unterminated /* comment");
527 : }
528 : } /* <xc> */
529 :
530 : <SQL>{
531 : {xbstart} {
532 : token_start = yytext;
533 : state_before_str_start = YYSTATE;
534 : BEGIN(xb);
535 : startlit();
536 : }
537 : } /* <SQL> */
538 :
539 : <xh>{xhinside} |
540 : <xb>{xbinside} {
541 : addlit(yytext, yyleng);
542 : }
543 : <xb><<EOF>> { mmfatal(PARSE_ERROR, "unterminated bit string literal"); }
544 :
545 : <SQL>{xhstart} {
546 : token_start = yytext;
547 : state_before_str_start = YYSTATE;
548 : BEGIN(xh);
549 : startlit();
550 : }
551 : <xh><<EOF>> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); }
552 :
553 : <C>{xqstart} {
554 : token_start = yytext;
555 : state_before_str_start = YYSTATE;
556 : BEGIN(xqc);
557 : startlit();
558 : }
559 :
560 : <SQL>{
561 : {xnstart} {
562 : /* National character.
563 : * Transfer it as-is to the backend.
564 : */
565 : token_start = yytext;
566 : state_before_str_start = YYSTATE;
567 : BEGIN(xn);
568 : startlit();
569 : }
570 :
571 : {xqstart} {
572 : token_start = yytext;
573 : state_before_str_start = YYSTATE;
574 : BEGIN(xq);
575 : startlit();
576 : }
577 : {xestart} {
578 : token_start = yytext;
579 : state_before_str_start = YYSTATE;
580 : BEGIN(xe);
581 : startlit();
582 : }
583 : {xusstart} {
584 : token_start = yytext;
585 : state_before_str_start = YYSTATE;
586 : BEGIN(xus);
587 : startlit();
588 : }
589 : } /* <SQL> */
590 :
591 : <xb,xh,xq,xqc,xe,xn,xus>{quote} {
592 : /*
593 : * When we are scanning a quoted string and see an end
594 : * quote, we must look ahead for a possible continuation.
595 : * If we don't see one, we know the end quote was in fact
596 : * the end of the string. To reduce the lexer table size,
597 : * we use a single "xqs" state to do the lookahead for all
598 : * types of strings.
599 : */
600 : state_before_str_stop = YYSTATE;
601 : BEGIN(xqs);
602 : }
603 : <xqs>{quotecontinue} {
604 : /*
605 : * Found a quote continuation, so return to the in-quote
606 : * state and continue scanning the literal. Nothing is
607 : * added to the literal's contents.
608 : */
609 : BEGIN(state_before_str_stop);
610 : }
611 : <xqs>{quotecontinuefail} |
612 : <xqs>{other} |
613 : <xqs><<EOF>> {
614 : /*
615 : * Failed to see a quote continuation. Throw back
616 : * everything after the end quote, and handle the string
617 : * according to the state we were in previously.
618 : */
619 : yyless(0);
620 : BEGIN(state_before_str_start);
621 :
622 : switch (state_before_str_stop)
623 : {
624 : case xb:
625 : if (literalbuf[strspn(literalbuf, "01")] != '\0')
626 : mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string literal");
627 : base_yylval.str = psprintf("b'%s'", literalbuf);
628 : return BCONST;
629 : case xh:
630 : if (literalbuf[strspn(literalbuf, "0123456789abcdefABCDEF")] != '\0')
631 : mmerror(PARSE_ERROR, ET_ERROR, "invalid hexadecimal string literal");
632 : base_yylval.str = psprintf("x'%s'", literalbuf);
633 : return XCONST;
634 : case xq:
635 : /* fallthrough */
636 : case xqc:
637 : base_yylval.str = psprintf("'%s'", literalbuf);
638 : return SCONST;
639 : case xe:
640 : base_yylval.str = psprintf("E'%s'", literalbuf);
641 : return SCONST;
642 : case xn:
643 : base_yylval.str = psprintf("N'%s'", literalbuf);
644 : return SCONST;
645 : case xus:
646 : base_yylval.str = psprintf("U&'%s'", literalbuf);
647 : return USCONST;
648 : default:
649 : mmfatal(PARSE_ERROR, "unhandled previous state in xqs\n");
650 : }
651 : }
652 :
653 : <xq,xe,xn,xus>{xqdouble} { addlit(yytext, yyleng); }
654 : <xqc>{xqcquote} { addlit(yytext, yyleng); }
655 : <xq,xqc,xn,xus>{xqinside} { addlit(yytext, yyleng); }
656 : <xe>{xeinside} {
657 : addlit(yytext, yyleng);
658 : }
659 : <xe>{xeunicode} {
660 : addlit(yytext, yyleng);
661 : }
662 : <xe>{xeescape} {
663 : addlit(yytext, yyleng);
664 : }
665 : <xe>{xeoctesc} {
666 : addlit(yytext, yyleng);
667 : }
668 : <xe>{xehexesc} {
669 : addlit(yytext, yyleng);
670 : }
671 : <xe>. {
672 : /* This is only needed for \ just before EOF */
673 : addlitchar(yytext[0]);
674 : }
675 : <xq,xqc,xe,xn,xus><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); }
676 :
677 : <SQL>{
678 : {dolqdelim} {
679 : token_start = yytext;
680 : if (dolqstart)
681 : free(dolqstart);
682 : dolqstart = mm_strdup(yytext);
683 : BEGIN(xdolq);
684 : startlit();
685 : addlit(yytext, yyleng);
686 : }
687 : {dolqfailed} {
688 : /* throw back all but the initial "$" */
689 : yyless(1);
690 : /* and treat it as {other} */
691 : return yytext[0];
692 : }
693 : } /* <SQL> */
694 :
695 : <xdolq>{dolqdelim} {
696 : if (strcmp(yytext, dolqstart) == 0)
697 : {
698 : addlit(yytext, yyleng);
699 : free(dolqstart);
700 : dolqstart = NULL;
701 : BEGIN(SQL);
702 : base_yylval.str = mm_strdup(literalbuf);
703 : return SCONST;
704 : }
705 : else
706 : {
707 : /*
708 : * When we fail to match $...$ to dolqstart, transfer
709 : * the $... part to the output, but put back the final
710 : * $ for rescanning. Consider $delim$...$junk$delim$
711 : */
712 : addlit(yytext, yyleng - 1);
713 : yyless(yyleng - 1);
714 : }
715 : }
716 : <xdolq>{dolqinside} {
717 : addlit(yytext, yyleng);
718 : }
719 : <xdolq>{dolqfailed} {
720 : addlit(yytext, yyleng);
721 : }
722 : <xdolq>. {
723 : /* single quote or dollar sign */
724 : addlitchar(yytext[0]);
725 : }
726 : <xdolq><<EOF>> { mmfatal(PARSE_ERROR, "unterminated dollar-quoted string"); }
727 :
728 : <SQL>{
729 : {xdstart} {
730 : state_before_str_start = YYSTATE;
731 : BEGIN(xd);
732 : startlit();
733 : }
734 : {xuistart} {
735 : state_before_str_start = YYSTATE;
736 : BEGIN(xui);
737 : startlit();
738 : }
739 : } /* <SQL> */
740 :
741 : <xd>{xdstop} {
742 : BEGIN(state_before_str_start);
743 : if (literallen == 0)
744 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
745 : /*
746 : * The server will truncate the identifier here. We do
747 : * not, as (1) it does not change the result; (2) we don't
748 : * know what NAMEDATALEN the server might use; (3) this
749 : * code path is also taken for literal query strings in
750 : * PREPARE and EXECUTE IMMEDIATE, which can certainly be
751 : * longer than NAMEDATALEN.
752 : */
753 : base_yylval.str = mm_strdup(literalbuf);
754 : return CSTRING;
755 : }
756 : <xdc>{xdstop} {
757 : BEGIN(state_before_str_start);
758 : base_yylval.str = mm_strdup(literalbuf);
759 : return CSTRING;
760 : }
761 : <xui>{dquote} {
762 : BEGIN(state_before_str_start);
763 : if (literallen == 0)
764 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
765 : /* The backend will truncate the identifier here. We do not as it does not change the result. */
766 : base_yylval.str = psprintf("U&\"%s\"", literalbuf);
767 : return UIDENT;
768 : }
769 : <xd,xui>{xddouble} {
770 : addlit(yytext, yyleng);
771 : }
772 : <xd,xui>{xdinside} {
773 : addlit(yytext, yyleng);
774 : }
775 : <xd,xui><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
776 : <C>{xdstart} {
777 : state_before_str_start = YYSTATE;
778 : BEGIN(xdc);
779 : startlit();
780 : }
781 : <xdc>{xdcinside} {
782 : addlit(yytext, yyleng);
783 : }
784 : <xdc><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); }
785 :
786 : <SQL>{
787 : {typecast} {
788 : return TYPECAST;
789 : }
790 :
791 : {dot_dot} {
792 : return DOT_DOT;
793 : }
794 :
795 : {colon_equals} {
796 : return COLON_EQUALS;
797 : }
798 :
799 : {equals_greater} {
800 : return EQUALS_GREATER;
801 : }
802 :
803 : {less_equals} {
804 : return LESS_EQUALS;
805 : }
806 :
807 : {greater_equals} {
808 : return GREATER_EQUALS;
809 : }
810 :
811 : {less_greater} {
812 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
813 : return NOT_EQUALS;
814 : }
815 :
816 : {not_equals} {
817 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
818 : return NOT_EQUALS;
819 : }
820 :
821 : {informix_special} {
822 : /* are we simulating Informix? */
823 : if (INFORMIX_MODE)
824 : {
825 : unput(':');
826 : }
827 : else
828 : return yytext[0];
829 : }
830 :
831 : {self} {
832 : /*
833 : * We may find a ';' inside a structure
834 : * definition in a TYPE or VAR statement.
835 : * This is not an EOL marker.
836 : */
837 : if (yytext[0] == ';' && struct_level == 0)
838 : BEGIN(C);
839 : return yytext[0];
840 : }
841 :
842 : {operator} {
843 : /*
844 : * Check for embedded slash-star or dash-dash; those
845 : * are comment starts, so operator must stop there.
846 : * Note that slash-star or dash-dash at the first
847 : * character will match a prior rule, not this one.
848 : */
849 : int nchars = yyleng;
850 : char *slashstar = strstr(yytext, "/*");
851 : char *dashdash = strstr(yytext, "--");
852 :
853 : if (slashstar && dashdash)
854 : {
855 : /* if both appear, take the first one */
856 : if (slashstar > dashdash)
857 : slashstar = dashdash;
858 : }
859 : else if (!slashstar)
860 : slashstar = dashdash;
861 : if (slashstar)
862 : nchars = slashstar - yytext;
863 :
864 : /*
865 : * For SQL compatibility, '+' and '-' cannot be the
866 : * last char of a multi-char operator unless the operator
867 : * contains chars that are not in SQL operators.
868 : * The idea is to lex '=-' as two operators, but not
869 : * to forbid operator names like '?-' that could not be
870 : * sequences of SQL operators.
871 : */
872 : if (nchars > 1 &&
873 : (yytext[nchars - 1] == '+' ||
874 : yytext[nchars - 1] == '-'))
875 : {
876 : int ic;
877 :
878 : for (ic = nchars - 2; ic >= 0; ic--)
879 : {
880 : char c = yytext[ic];
881 : if (c == '~' || c == '!' || c == '@' ||
882 : c == '#' || c == '^' || c == '&' ||
883 : c == '|' || c == '`' || c == '?' ||
884 : c == '%')
885 : break;
886 : }
887 : if (ic < 0)
888 : {
889 : /*
890 : * didn't find a qualifying character, so remove
891 : * all trailing [+-]
892 : */
893 : do {
894 : nchars--;
895 : } while (nchars > 1 &&
896 : (yytext[nchars - 1] == '+' ||
897 : yytext[nchars - 1] == '-'));
898 : }
899 : }
900 :
901 : if (nchars < yyleng)
902 : {
903 : /* Strip the unwanted chars from the token */
904 : yyless(nchars);
905 : /*
906 : * If what we have left is only one char, and it's
907 : * one of the characters matching "self", then
908 : * return it as a character token the same way
909 : * that the "self" rule would have.
910 : */
911 : if (nchars == 1 &&
912 : strchr(",()[].;:+-*/%^<>=", yytext[0]))
913 : return yytext[0];
914 : /*
915 : * Likewise, if what we have left is two chars, and
916 : * those match the tokens ">=", "<=", "=>", "<>" or
917 : * "!=", then we must return the appropriate token
918 : * rather than the generic Op.
919 : */
920 : if (nchars == 2)
921 : {
922 : if (yytext[0] == '=' && yytext[1] == '>')
923 : return EQUALS_GREATER;
924 : if (yytext[0] == '>' && yytext[1] == '=')
925 : return GREATER_EQUALS;
926 : if (yytext[0] == '<' && yytext[1] == '=')
927 : return LESS_EQUALS;
928 : if (yytext[0] == '<' && yytext[1] == '>')
929 : return NOT_EQUALS;
930 : if (yytext[0] == '!' && yytext[1] == '=')
931 : return NOT_EQUALS;
932 : }
933 : }
934 :
935 : base_yylval.str = mm_strdup(yytext);
936 : return Op;
937 : }
938 :
939 : {param} {
940 : base_yylval.ival = atol(yytext+1);
941 : return PARAM;
942 : }
943 : {param_junk} {
944 : mmfatal(PARSE_ERROR, "trailing junk after parameter");
945 : }
946 :
947 : {ip} {
948 : base_yylval.str = mm_strdup(yytext);
949 : return IP;
950 : }
951 : } /* <SQL> */
952 :
953 : <C,SQL>{
954 : {decinteger} {
955 : return process_integer_literal(yytext, &base_yylval, 10);
956 : }
957 : {hexinteger} {
958 : return process_integer_literal(yytext, &base_yylval, 16);
959 : }
960 : {numeric} {
961 : base_yylval.str = mm_strdup(yytext);
962 : return FCONST;
963 : }
964 : {numericfail} {
965 : /* throw back the .., and treat as integer */
966 : yyless(yyleng - 2);
967 : return process_integer_literal(yytext, &base_yylval, 10);
968 : }
969 : {real} {
970 : base_yylval.str = mm_strdup(yytext);
971 : return FCONST;
972 : }
973 : {realfail} {
974 : /*
975 : * throw back the [Ee][+-], and figure out whether what
976 : * remains is an {decinteger} or {numeric}.
977 : */
978 : yyless(yyleng - 2);
979 : return process_integer_literal(yytext, &base_yylval, 10);
980 : }
981 : } /* <C,SQL> */
982 :
983 : <SQL>{
984 : {octinteger} {
985 : return process_integer_literal(yytext, &base_yylval, 8);
986 : }
987 : {bininteger} {
988 : return process_integer_literal(yytext, &base_yylval, 2);
989 : }
990 :
991 : /*
992 : * Note that some trailing junk is valid in C (such as 100LL), so we
993 : * contain this to SQL mode.
994 : */
995 : {decinteger_junk} {
996 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
997 : }
998 : {hexinteger_junk} {
999 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1000 : }
1001 : {octinteger_junk} {
1002 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1003 : }
1004 : {bininteger_junk} {
1005 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1006 : }
1007 : {numeric_junk} {
1008 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1009 : }
1010 : {real_junk} {
1011 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1012 : }
1013 :
1014 : :{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
1015 : base_yylval.str = mm_strdup(yytext+1);
1016 : return CVARIABLE;
1017 : }
1018 :
1019 : {identifier} {
1020 : /* First check to see if it's a define symbol to expand */
1021 : if (!isdefine())
1022 : {
1023 : int kwvalue;
1024 :
1025 : /*
1026 : * User-defined typedefs override SQL keywords, but
1027 : * not C keywords. Currently, a typedef name is just
1028 : * reported as IDENT, but someday we might need to
1029 : * return a distinct token type.
1030 : */
1031 : if (get_typedef(yytext, true) == NULL)
1032 : {
1033 : /* Is it an SQL/ECPG keyword? */
1034 : kwvalue = ScanECPGKeywordLookup(yytext);
1035 : if (kwvalue >= 0)
1036 : return kwvalue;
1037 : }
1038 :
1039 : /* Is it a C keyword? */
1040 : kwvalue = ScanCKeywordLookup(yytext);
1041 : if (kwvalue >= 0)
1042 : return kwvalue;
1043 :
1044 : /*
1045 : * None of the above. Return it as an identifier.
1046 : *
1047 : * The backend will attempt to truncate and case-fold
1048 : * the identifier, but I see no good reason for ecpg
1049 : * to do so; that's just another way that ecpg could get
1050 : * out of step with the backend.
1051 : */
1052 : base_yylval.str = mm_strdup(yytext);
1053 : return IDENT;
1054 : }
1055 : }
1056 :
1057 : {other} {
1058 : return yytext[0];
1059 : }
1060 : } /* <SQL> */
1061 :
1062 : /*
1063 : * Begin ECPG-specific rules
1064 : */
1065 :
1066 : <C>{exec_sql} { BEGIN(SQL); return SQL_START; }
1067 : <C>{informix_special} {
1068 : /* are we simulating Informix? */
1069 : if (INFORMIX_MODE)
1070 : {
1071 : BEGIN(SQL);
1072 : return SQL_START;
1073 : }
1074 : else
1075 : return S_ANYTHING;
1076 : }
1077 : <C>{ccomment} { ECHO; }
1078 : <C>{cppinclude} {
1079 : if (system_includes)
1080 : {
1081 : include_next = false;
1082 : BEGIN(incl);
1083 : }
1084 : else
1085 : {
1086 : base_yylval.str = mm_strdup(yytext);
1087 : return CPP_LINE;
1088 : }
1089 : }
1090 : <C>{cppinclude_next} {
1091 : if (system_includes)
1092 : {
1093 : include_next = true;
1094 : BEGIN(incl);
1095 : }
1096 : else
1097 : {
1098 : base_yylval.str = mm_strdup(yytext);
1099 : return CPP_LINE;
1100 : }
1101 : }
1102 : <C,SQL>{cppline} {
1103 : base_yylval.str = mm_strdup(yytext);
1104 : return CPP_LINE;
1105 : }
1106 : <C>{identifier} {
1107 : /*
1108 : * Try to detect a function name:
1109 : * look for identifiers at the global scope
1110 : * keep the last identifier before the first '(' and '{'
1111 : */
1112 : if (braces_open == 0 && parenths_open == 0)
1113 : {
1114 : if (current_function)
1115 : free(current_function);
1116 : current_function = mm_strdup(yytext);
1117 : }
1118 : /* Informix uses SQL defines only in SQL space */
1119 : /* however, some defines have to be taken care of for compatibility */
1120 : if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine())
1121 : {
1122 : int kwvalue;
1123 :
1124 : kwvalue = ScanCKeywordLookup(yytext);
1125 : if (kwvalue >= 0)
1126 : return kwvalue;
1127 : else
1128 : {
1129 : base_yylval.str = mm_strdup(yytext);
1130 : return IDENT;
1131 : }
1132 : }
1133 : }
1134 : <C>{xcstop} { mmerror(PARSE_ERROR, ET_ERROR, "nested /* ... */ comments"); }
1135 : <C>":" { return ':'; }
1136 : <C>";" { return ';'; }
1137 : <C>"," { return ','; }
1138 : <C>"*" { return '*'; }
1139 : <C>"%" { return '%'; }
1140 : <C>"/" { return '/'; }
1141 : <C>"+" { return '+'; }
1142 : <C>"-" { return '-'; }
1143 : <C>"(" { parenths_open++; return '('; }
1144 : <C>")" { parenths_open--; return ')'; }
1145 : <C,xskip>{space} { ECHO; }
1146 : <C>\{ { return '{'; }
1147 : <C>\} { return '}'; }
1148 : <C>\[ { return '['; }
1149 : <C>\] { return ']'; }
1150 : <C>\= { return '='; }
1151 : <C>"->" { return S_MEMBER; }
1152 : <C>">>" { return S_RSHIFT; }
1153 : <C>"<<" { return S_LSHIFT; }
1154 : <C>"||" { return S_OR; }
1155 : <C>"&&" { return S_AND; }
1156 : <C>"++" { return S_INC; }
1157 : <C>"--" { return S_DEC; }
1158 : <C>"==" { return S_EQUAL; }
1159 : <C>"!=" { return S_NEQUAL; }
1160 : <C>"+=" { return S_ADD; }
1161 : <C>"-=" { return S_SUB; }
1162 : <C>"*=" { return S_MUL; }
1163 : <C>"/=" { return S_DIV; }
1164 : <C>"%=" { return S_MOD; }
1165 : <C>"->*" { return S_MEMPOINT; }
1166 : <C>".*" { return S_DOTPOINT; }
1167 : <C>{other} { return S_ANYTHING; }
1168 : <C>{exec_sql}{define}{space}* { BEGIN(def_ident); }
1169 : <C>{informix_special}{define}{space}* {
1170 : /* are we simulating Informix? */
1171 : if (INFORMIX_MODE)
1172 : {
1173 : BEGIN(def_ident);
1174 : }
1175 : else
1176 : {
1177 : yyless(1);
1178 : return S_ANYTHING;
1179 : }
1180 : }
1181 : <C>{exec_sql}{undef}{space}* { BEGIN(undef); }
1182 : <C>{informix_special}{undef}{space}* {
1183 : /* are we simulating Informix? */
1184 : if (INFORMIX_MODE)
1185 : {
1186 : BEGIN(undef);
1187 : }
1188 : else
1189 : {
1190 : yyless(1);
1191 : return S_ANYTHING;
1192 : }
1193 : }
1194 : <undef>{identifier}{space}*";" {
1195 : struct _defines *ptr, *ptr2 = NULL;
1196 : int i;
1197 :
1198 : /*
1199 : * Skip the ";" and trailing whitespace. Note that yytext
1200 : * contains at least one non-space character plus the ";"
1201 : */
1202 : for (i = strlen(yytext)-2;
1203 : i > 0 && ecpg_isspace(yytext[i]);
1204 : i--)
1205 : ;
1206 : yytext[i+1] = '\0';
1207 :
1208 :
1209 : /* Find and unset any matching define; should be only 1 */
1210 : for (ptr = defines; ptr; ptr2 = ptr, ptr = ptr->next)
1211 : {
1212 : if (strcmp(yytext, ptr->name) == 0)
1213 : {
1214 : free(ptr->value);
1215 : ptr->value = NULL;
1216 : /* We cannot forget it if there's a cmdvalue */
1217 : if (ptr->cmdvalue == NULL)
1218 : {
1219 : if (ptr2 == NULL)
1220 : defines = ptr->next;
1221 : else
1222 : ptr2->next = ptr->next;
1223 : free(ptr->name);
1224 : free(ptr);
1225 : }
1226 : break;
1227 : }
1228 : }
1229 :
1230 : BEGIN(C);
1231 : }
1232 : <undef>{other}|\n {
1233 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL UNDEF command");
1234 : yyterminate();
1235 : }
1236 : <C>{exec_sql}{include}{space}* { BEGIN(incl); }
1237 : <C>{informix_special}{include}{space}* {
1238 : /* are we simulating Informix? */
1239 : if (INFORMIX_MODE)
1240 : {
1241 : BEGIN(incl);
1242 : }
1243 : else
1244 : {
1245 : yyless(1);
1246 : return S_ANYTHING;
1247 : }
1248 : }
1249 : <C,xskip>{exec_sql}{ifdef}{space}* {
1250 : if (preproc_tos >= MAX_NESTED_IF-1)
1251 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1252 : preproc_tos++;
1253 : stacked_if_value[preproc_tos].active = false;
1254 : stacked_if_value[preproc_tos].saw_active = false;
1255 : stacked_if_value[preproc_tos].else_branch = false;
1256 : ifcond = true;
1257 : BEGIN(xcond);
1258 : }
1259 : <C,xskip>{informix_special}{ifdef}{space}* {
1260 : /* are we simulating Informix? */
1261 : if (INFORMIX_MODE)
1262 : {
1263 : if (preproc_tos >= MAX_NESTED_IF-1)
1264 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1265 : preproc_tos++;
1266 : stacked_if_value[preproc_tos].active = false;
1267 : stacked_if_value[preproc_tos].saw_active = false;
1268 : stacked_if_value[preproc_tos].else_branch = false;
1269 : ifcond = true;
1270 : BEGIN(xcond);
1271 : }
1272 : else
1273 : {
1274 : yyless(1);
1275 : return S_ANYTHING;
1276 : }
1277 : }
1278 : <C,xskip>{exec_sql}{ifndef}{space}* {
1279 : if (preproc_tos >= MAX_NESTED_IF-1)
1280 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1281 : preproc_tos++;
1282 : stacked_if_value[preproc_tos].active = false;
1283 : stacked_if_value[preproc_tos].saw_active = false;
1284 : stacked_if_value[preproc_tos].else_branch = false;
1285 : ifcond = false;
1286 : BEGIN(xcond);
1287 : }
1288 : <C,xskip>{informix_special}{ifndef}{space}* {
1289 : /* are we simulating Informix? */
1290 : if (INFORMIX_MODE)
1291 : {
1292 : if (preproc_tos >= MAX_NESTED_IF-1)
1293 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1294 : preproc_tos++;
1295 : stacked_if_value[preproc_tos].active = false;
1296 : stacked_if_value[preproc_tos].saw_active = false;
1297 : stacked_if_value[preproc_tos].else_branch = false;
1298 : ifcond = false;
1299 : BEGIN(xcond);
1300 : }
1301 : else
1302 : {
1303 : yyless(1);
1304 : return S_ANYTHING;
1305 : }
1306 : }
1307 : <C,xskip>{exec_sql}{elif}{space}* {
1308 : if (preproc_tos == 0)
1309 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1310 : if (stacked_if_value[preproc_tos].else_branch)
1311 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1312 : ifcond = true;
1313 : BEGIN(xcond);
1314 : }
1315 : <C,xskip>{informix_special}{elif}{space}* {
1316 : /* are we simulating Informix? */
1317 : if (INFORMIX_MODE)
1318 : {
1319 : if (preproc_tos == 0)
1320 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1321 : if (stacked_if_value[preproc_tos].else_branch)
1322 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1323 : ifcond = true;
1324 : BEGIN(xcond);
1325 : }
1326 : else
1327 : {
1328 : yyless(1);
1329 : return S_ANYTHING;
1330 : }
1331 : }
1332 :
1333 : <C,xskip>{exec_sql}{else}{space}*";" { /* only exec sql endif pops the stack, so take care of duplicated 'else' */
1334 : if (preproc_tos == 0)
1335 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1336 : else if (stacked_if_value[preproc_tos].else_branch)
1337 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1338 : else
1339 : {
1340 : stacked_if_value[preproc_tos].else_branch = true;
1341 : stacked_if_value[preproc_tos].active =
1342 : (stacked_if_value[preproc_tos-1].active &&
1343 : !stacked_if_value[preproc_tos].saw_active);
1344 : stacked_if_value[preproc_tos].saw_active = true;
1345 :
1346 : if (stacked_if_value[preproc_tos].active)
1347 : BEGIN(C);
1348 : else
1349 : BEGIN(xskip);
1350 : }
1351 : }
1352 : <C,xskip>{informix_special}{else}{space}*";" {
1353 : /* are we simulating Informix? */
1354 : if (INFORMIX_MODE)
1355 : {
1356 : if (preproc_tos == 0)
1357 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1358 : else if (stacked_if_value[preproc_tos].else_branch)
1359 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1360 : else
1361 : {
1362 : stacked_if_value[preproc_tos].else_branch = true;
1363 : stacked_if_value[preproc_tos].active =
1364 : (stacked_if_value[preproc_tos-1].active &&
1365 : !stacked_if_value[preproc_tos].saw_active);
1366 : stacked_if_value[preproc_tos].saw_active = true;
1367 :
1368 : if (stacked_if_value[preproc_tos].active)
1369 : BEGIN(C);
1370 : else
1371 : BEGIN(xskip);
1372 : }
1373 : }
1374 : else
1375 : {
1376 : yyless(1);
1377 : return S_ANYTHING;
1378 : }
1379 : }
1380 : <C,xskip>{exec_sql}{endif}{space}*";" {
1381 : if (preproc_tos == 0)
1382 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1383 : else
1384 : preproc_tos--;
1385 :
1386 : if (stacked_if_value[preproc_tos].active)
1387 : BEGIN(C);
1388 : else
1389 : BEGIN(xskip);
1390 : }
1391 : <C,xskip>{informix_special}{endif}{space}*";" {
1392 : /* are we simulating Informix? */
1393 : if (INFORMIX_MODE)
1394 : {
1395 : if (preproc_tos == 0)
1396 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1397 : else
1398 : preproc_tos--;
1399 :
1400 : if (stacked_if_value[preproc_tos].active)
1401 : BEGIN(C);
1402 : else
1403 : BEGIN(xskip);
1404 : }
1405 : else
1406 : {
1407 : yyless(1);
1408 : return S_ANYTHING;
1409 : }
1410 : }
1411 :
1412 : <xskip>{other} { /* ignore */ }
1413 :
1414 : <xcond>{identifier}{space}*";" {
1415 : {
1416 : struct _defines *defptr;
1417 : unsigned int i;
1418 : bool this_active;
1419 :
1420 : /*
1421 : * Skip the ";" and trailing whitespace. Note that yytext
1422 : * contains at least one non-space character plus the ";"
1423 : */
1424 : for (i = strlen(yytext)-2;
1425 : i > 0 && ecpg_isspace(yytext[i]);
1426 : i--)
1427 : ;
1428 : yytext[i+1] = '\0';
1429 :
1430 : /* Does a definition exist? */
1431 : for (defptr = defines; defptr; defptr = defptr->next)
1432 : {
1433 : if (strcmp(yytext, defptr->name) == 0)
1434 : {
1435 : /* Found it, but is it currently undefined? */
1436 : if (defptr->value == NULL)
1437 : defptr = NULL; /* pretend it's not found */
1438 : break;
1439 : }
1440 : }
1441 :
1442 : this_active = (defptr ? ifcond : !ifcond);
1443 : stacked_if_value[preproc_tos].active =
1444 : (stacked_if_value[preproc_tos-1].active &&
1445 : !stacked_if_value[preproc_tos].saw_active &&
1446 : this_active);
1447 : stacked_if_value[preproc_tos].saw_active |= this_active;
1448 : }
1449 :
1450 : if (stacked_if_value[preproc_tos].active)
1451 : BEGIN(C);
1452 : else
1453 : BEGIN(xskip);
1454 : }
1455 :
1456 : <xcond>{other}|\n {
1457 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL IFDEF command");
1458 : yyterminate();
1459 : }
1460 : <def_ident>{identifier} {
1461 : newdefsymbol = mm_strdup(yytext);
1462 : BEGIN(def);
1463 : startlit();
1464 : }
1465 : <def_ident>{other}|\n {
1466 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL DEFINE command");
1467 : yyterminate();
1468 : }
1469 : <def>{space}*";" {
1470 : struct _defines *ptr;
1471 :
1472 : /* Does it already exist? */
1473 : for (ptr = defines; ptr != NULL; ptr = ptr->next)
1474 : {
1475 : if (strcmp(newdefsymbol, ptr->name) == 0)
1476 : {
1477 : free(ptr->value);
1478 : ptr->value = mm_strdup(literalbuf);
1479 : /* Don't leak newdefsymbol */
1480 : free(newdefsymbol);
1481 : break;
1482 : }
1483 : }
1484 : if (ptr == NULL)
1485 : {
1486 : /* Not present, make a new entry */
1487 : ptr = (struct _defines *) mm_alloc(sizeof(struct _defines));
1488 :
1489 : ptr->name = newdefsymbol;
1490 : ptr->value = mm_strdup(literalbuf);
1491 : ptr->cmdvalue = NULL;
1492 : ptr->used = NULL;
1493 : ptr->next = defines;
1494 : defines = ptr;
1495 : }
1496 :
1497 : BEGIN(C);
1498 : }
1499 : <def>[^;] { addlit(yytext, yyleng); }
1500 : <incl>\<[^\>]+\>{space}*";"? { parse_include(); }
1501 : <incl>{dquote}{xdinside}{dquote}{space}*";"? { parse_include(); }
1502 : <incl>[^;\<\>\"]+";" { parse_include(); }
1503 : <incl>{other}|\n {
1504 : mmfatal(PARSE_ERROR, "syntax error in EXEC SQL INCLUDE command");
1505 : yyterminate();
1506 : }
1507 :
1508 : <<EOF>> {
1509 : if (yy_buffer == NULL)
1510 : {
1511 : /* No more input */
1512 : if (preproc_tos > 0)
1513 : {
1514 : preproc_tos = 0;
1515 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1516 : }
1517 : yyterminate();
1518 : }
1519 : else
1520 : {
1521 : /* Revert to previous input source */
1522 : struct _yy_buffer *yb = yy_buffer;
1523 : int i;
1524 : struct _defines *ptr;
1525 :
1526 : /* Check to see if we are exiting a macro value */
1527 : for (ptr = defines; ptr; ptr = ptr->next)
1528 : {
1529 : if (ptr->used == yy_buffer)
1530 : {
1531 : ptr->used = NULL;
1532 : break; /* there can't be multiple matches */
1533 : }
1534 : }
1535 :
1536 : if (yyin != NULL)
1537 : fclose(yyin);
1538 :
1539 : yy_delete_buffer(YY_CURRENT_BUFFER);
1540 : yy_switch_to_buffer(yy_buffer->buffer);
1541 :
1542 : yylineno = yy_buffer->lineno;
1543 :
1544 : /* We have to output the filename only if we change files here */
1545 : i = strcmp(input_filename, yy_buffer->filename);
1546 :
1547 : free(input_filename);
1548 : input_filename = yy_buffer->filename;
1549 :
1550 : yy_buffer = yy_buffer->next;
1551 : free(yb);
1552 :
1553 : if (i != 0)
1554 : output_line_number();
1555 :
1556 : }
1557 : }
1558 :
1559 : <INITIAL>{other}|\n { mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <%s>", PACKAGE_BUGREPORT); }
1560 :
1561 : %%
1562 :
1563 : /* LCOV_EXCL_STOP */
1564 :
1565 : void
1566 : lex_init(void)
1567 134 : {
1568 : braces_open = 0;
1569 134 : parenths_open = 0;
1570 134 : current_function = NULL;
1571 134 :
1572 : yylineno = 1;
1573 134 :
1574 : /* initialize state for if/else/endif */
1575 : preproc_tos = 0;
1576 134 : stacked_if_value[preproc_tos].active = true;
1577 134 : stacked_if_value[preproc_tos].saw_active = true;
1578 134 : stacked_if_value[preproc_tos].else_branch = false;
1579 134 :
1580 : /* initialize literal buffer to a reasonable but expansible size */
1581 : if (literalbuf == NULL)
1582 134 : {
1583 : literalalloc = 1024;
1584 132 : literalbuf = (char *) mm_alloc(literalalloc);
1585 132 : }
1586 : startlit();
1587 134 :
1588 : BEGIN(C);
1589 134 : }
1590 134 :
1591 : static void
1592 : addlit(char *ytext, int yleng)
1593 47902 : {
1594 : /* enlarge buffer if needed */
1595 : if ((literallen+yleng) >= literalalloc)
1596 47902 : {
1597 : do
1598 : literalalloc *= 2;
1599 0 : while ((literallen+yleng) >= literalalloc);
1600 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1601 0 : }
1602 : /* append new data, add trailing null */
1603 : memcpy(literalbuf+literallen, ytext, yleng);
1604 47902 : literallen += yleng;
1605 47902 : literalbuf[literallen] = '\0';
1606 47902 : }
1607 47902 :
1608 : static void
1609 : addlitchar(unsigned char ychar)
1610 0 : {
1611 : /* enlarge buffer if needed */
1612 : if ((literallen+1) >= literalalloc)
1613 0 : {
1614 : literalalloc *= 2;
1615 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1616 0 : }
1617 : /* append new data, add trailing null */
1618 : literalbuf[literallen] = ychar;
1619 0 : literallen += 1;
1620 0 : literalbuf[literallen] = '\0';
1621 0 : }
1622 0 :
1623 : /*
1624 : * Process {decinteger}, {hexinteger}, etc. Note this will also do the right
1625 : * thing with {numeric}, ie digits and a decimal point.
1626 : */
1627 : static int
1628 : process_integer_literal(const char *token, YYSTYPE *lval, int base)
1629 2386 : {
1630 : int val;
1631 : char *endptr;
1632 :
1633 : errno = 0;
1634 2386 : val = strtoint(base == 10 ? token : token + 2, &endptr, base);
1635 2386 : if (*endptr != '\0' || errno == ERANGE)
1636 2386 : {
1637 : /* integer too large (or contains decimal pt), treat it as a float */
1638 : lval->str = mm_strdup(token);
1639 12 : return FCONST;
1640 12 : }
1641 : lval->ival = val;
1642 2374 : return ICONST;
1643 2374 : }
1644 :
1645 : static void
1646 : parse_include(void)
1647 176 : {
1648 : /* got the include file name */
1649 : struct _yy_buffer *yb;
1650 : struct _include_path *ip;
1651 : char inc_file[MAXPGPATH];
1652 : unsigned int i;
1653 :
1654 : yb = mm_alloc(sizeof(struct _yy_buffer));
1655 176 :
1656 : yb->buffer = YY_CURRENT_BUFFER;
1657 176 : yb->lineno = yylineno;
1658 176 : yb->filename = input_filename;
1659 176 : yb->next = yy_buffer;
1660 176 :
1661 : yy_buffer = yb;
1662 176 :
1663 : /*
1664 : * skip the ";" if there is one and trailing whitespace. Note that
1665 : * yytext contains at least one non-space character plus the ";"
1666 : */
1667 : for (i = strlen(yytext)-2;
1668 178 : i > 0 && ecpg_isspace(yytext[i]);
1669 178 : i--)
1670 2 : ;
1671 :
1672 : if (yytext[i] == ';')
1673 176 : i--;
1674 0 :
1675 : yytext[i+1] = '\0';
1676 176 :
1677 : yyin = NULL;
1678 176 :
1679 : /* If file name is enclosed in '"' remove these and look only in '.' */
1680 : /* Informix does look into all include paths though, except filename starts with '/' */
1681 : if (yytext[0] == '"' && yytext[i] == '"' &&
1682 176 : ((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/'))
1683 0 : {
1684 : yytext[i] = '\0';
1685 0 : memmove(yytext, yytext+1, strlen(yytext));
1686 0 :
1687 : strlcpy(inc_file, yytext, sizeof(inc_file));
1688 0 : yyin = fopen(inc_file, "r");
1689 0 : if (!yyin)
1690 0 : {
1691 : if (strlen(inc_file) <= 2 || strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1692 0 : {
1693 : strcat(inc_file, ".h");
1694 0 : yyin = fopen(inc_file, "r");
1695 0 : }
1696 : }
1697 :
1698 : }
1699 : else
1700 : {
1701 : if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>'))
1702 176 : {
1703 : yytext[i] = '\0';
1704 4 : memmove(yytext, yytext+1, strlen(yytext));
1705 4 : }
1706 :
1707 : for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
1708 478 : {
1709 : if (strlen(ip->path) + strlen(yytext) + 4 > MAXPGPATH)
1710 302 : {
1711 : fprintf(stderr, _("Error: include path \"%s/%s\" is too long on line %d, skipping\n"), ip->path, yytext, yylineno);
1712 0 : continue;
1713 0 : }
1714 : snprintf (inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext);
1715 302 : yyin = fopen(inc_file, "r");
1716 302 : if (!yyin)
1717 302 : {
1718 : if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1719 278 : {
1720 : strcat(inc_file, ".h");
1721 272 : yyin = fopen(inc_file, "r");
1722 272 : }
1723 : }
1724 : /* if the command was "include_next" we have to disregard the first hit */
1725 : if (yyin && include_next)
1726 302 : {
1727 : fclose (yyin);
1728 0 : yyin = NULL;
1729 0 : include_next = false;
1730 0 : }
1731 : }
1732 : }
1733 : if (!yyin)
1734 176 : mmfatal(NO_INCLUDE_FILE, "could not open include file \"%s\" on line %d", yytext, yylineno);
1735 0 :
1736 : input_filename = mm_strdup(inc_file);
1737 176 : yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
1738 176 : yylineno = 1;
1739 176 : output_line_number();
1740 176 :
1741 : BEGIN(C);
1742 176 : }
1743 176 :
1744 : /*
1745 : * ecpg_isspace() --- return true if flex scanner considers char whitespace
1746 : */
1747 : static bool
1748 : ecpg_isspace(char ch)
1749 210 : {
1750 : if (ch == ' ' ||
1751 210 : ch == '\t' ||
1752 210 : ch == '\n' ||
1753 208 : ch == '\r' ||
1754 208 : ch == '\f' ||
1755 208 : ch == '\v')
1756 : return true;
1757 2 : return false;
1758 208 : }
1759 :
1760 : /*
1761 : * If yytext matches a define symbol, begin scanning the symbol's value
1762 : * and return true
1763 : */
1764 : static bool
1765 : isdefine(void)
1766 28118 : {
1767 : struct _defines *ptr;
1768 :
1769 : /* is it a define? */
1770 : for (ptr = defines; ptr; ptr = ptr->next)
1771 128258 : {
1772 : /* notice we do not match anything being actively expanded */
1773 : if (strcmp(yytext, ptr->name) == 0 &&
1774 100308 : ptr->value != NULL &&
1775 168 : ptr->used == NULL)
1776 168 : {
1777 : /* Save state associated with the current buffer */
1778 : struct _yy_buffer *yb;
1779 :
1780 : yb = mm_alloc(sizeof(struct _yy_buffer));
1781 168 :
1782 : yb->buffer = YY_CURRENT_BUFFER;
1783 168 : yb->lineno = yylineno;
1784 168 : yb->filename = mm_strdup(input_filename);
1785 168 : yb->next = yy_buffer;
1786 168 : yy_buffer = yb;
1787 168 :
1788 : /* Mark symbol as being actively expanded */
1789 : ptr->used = yb;
1790 168 :
1791 : /*
1792 : * We use yy_scan_string which will copy the value, so there's
1793 : * no need to worry about a possible undef happening while we
1794 : * are still scanning it.
1795 : */
1796 : yy_scan_string(ptr->value);
1797 168 : return true;
1798 168 : }
1799 : }
1800 :
1801 : return false;
1802 27950 : }
1803 :
1804 : /*
1805 : * Handle replacement of INFORMIX built-in defines. This works just
1806 : * like isdefine() except for the source of the string to scan.
1807 : */
1808 : static bool
1809 : isinformixdefine(void)
1810 3518 : {
1811 : const char *new = NULL;
1812 3518 :
1813 : if (strcmp(yytext, "dec_t") == 0)
1814 3518 : new = "decimal";
1815 2 : else if (strcmp(yytext, "intrvl_t") == 0)
1816 3516 : new = "interval";
1817 0 : else if (strcmp(yytext, "dtime_t") == 0)
1818 3516 : new = "timestamp";
1819 0 :
1820 : if (new)
1821 3518 : {
1822 : struct _yy_buffer *yb;
1823 :
1824 : yb = mm_alloc(sizeof(struct _yy_buffer));
1825 2 :
1826 : yb->buffer = YY_CURRENT_BUFFER;
1827 2 : yb->lineno = yylineno;
1828 2 : yb->filename = mm_strdup(input_filename);
1829 2 : yb->next = yy_buffer;
1830 2 : yy_buffer = yb;
1831 2 :
1832 : yy_scan_string(new);
1833 2 : return true;
1834 2 : }
1835 :
1836 : return false;
1837 3516 : }
|