Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * pgc.l
5 : * lexical scanner for ecpg
6 : *
7 : * This is a modified version of src/backend/parser/scan.l
8 : *
9 : * The ecpg scanner is not backup-free, so the fail rules are
10 : * only here to simplify syncing this file with scan.l.
11 : *
12 : *
13 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
14 : * Portions Copyright (c) 1994, Regents of the University of California
15 : *
16 : * IDENTIFICATION
17 : * src/interfaces/ecpg/preproc/pgc.l
18 : *
19 : *-------------------------------------------------------------------------
20 : */
21 : #include "postgres_fe.h"
22 :
23 : #include <ctype.h>
24 : #include <limits.h>
25 :
26 : #include "common/string.h"
27 :
28 : #include "preproc_extern.h"
29 : #include "preproc.h"
30 : }
31 :
32 : %{
33 :
34 : /* LCOV_EXCL_START */
35 :
36 : extern YYSTYPE base_yylval;
37 :
38 : static int xcdepth = 0; /* depth of nesting in slash-star comments */
39 : static char *dolqstart = NULL; /* current $foo$ quote start string */
40 :
41 : /*
42 : * literalbuf is used to accumulate literal values when multiple rules
43 : * are needed to parse a single literal. Call startlit to reset buffer
44 : * to empty, addlit to add text. Note that the buffer is permanently
45 : * malloc'd to the largest size needed so far in the current run.
46 : */
47 : static char *literalbuf = NULL; /* expandable buffer */
48 : static int literallen; /* actual current length */
49 : static int literalalloc; /* current allocated buffer size */
50 :
51 : /* Used for detecting global state together with braces_open */
52 : static int parenths_open;
53 :
54 : /* Used to tell parse_include() whether the command was #include or #include_next */
55 : static bool include_next;
56 :
57 : #define startlit() (literalbuf[0] = '\0', literallen = 0)
58 : static void addlit(char *ytext, int yleng);
59 : static void addlitchar(unsigned char ychar);
60 : static int process_integer_literal(const char *token, YYSTYPE *lval, int base);
61 : static void parse_include(void);
62 : static bool ecpg_isspace(char ch);
63 : static bool isdefine(void);
64 : static bool isinformixdefine(void);
65 :
66 : char *token_start;
67 :
68 : /* vars to keep track of start conditions when scanning literals */
69 : static int state_before_str_start;
70 : static int state_before_str_stop;
71 :
72 : /*
73 : * State for handling include files and macro expansion. We use a new
74 : * flex input buffer for each level of include or macro, and create a
75 : * struct _yy_buffer to remember the previous level. There is not a struct
76 : * for the currently active input source; that state is kept in the global
77 : * variables YY_CURRENT_BUFFER, yylineno, and input_filename.
78 : */
79 : static struct _yy_buffer
80 : {
81 : YY_BUFFER_STATE buffer;
82 : long lineno;
83 : char *filename;
84 : struct _yy_buffer *next;
85 : } *yy_buffer = NULL;
86 :
87 : /*
88 : * Vars for handling ifdef/elif/endif constructs. preproc_tos is the current
89 : * nesting depth of such constructs, and stacked_if_value[preproc_tos] is the
90 : * state for the innermost level. (For convenience, stacked_if_value[0] is
91 : * initialized as though we are in the active branch of some outermost IF.)
92 : * The active field is true if the current branch is active (being expanded).
93 : * The saw_active field is true if we have found any successful branch,
94 : * so that all subsequent branches of this level should be skipped.
95 : * The else_branch field is true if we've found an 'else' (so that another
96 : * 'else' or 'elif' at this level is an error.)
97 : * For IFs nested within an inactive branch, all branches always have active
98 : * set to false, but saw_active and else_branch are maintained normally.
99 : * ifcond is valid only while evaluating an if-condition; it's true if we
100 : * are doing ifdef, false if ifndef.
101 : */
102 : #define MAX_NESTED_IF 128
103 : static short preproc_tos;
104 : static bool ifcond;
105 : static struct _if_value
106 : {
107 : bool active;
108 : bool saw_active;
109 : bool else_branch;
110 : } stacked_if_value[MAX_NESTED_IF];
111 :
112 : %}
113 :
114 : %option 8bit
115 : %option never-interactive
116 : %option nodefault
117 : %option noinput
118 : %option noyywrap
119 : %option warn
120 : %option yylineno
121 : %option prefix="base_yy"
122 :
123 : /*
124 : * OK, here is a short description of lex/flex rules behavior.
125 : * The longest pattern which matches an input string is always chosen.
126 : * For equal-length patterns, the first occurring in the rules list is chosen.
127 : * INITIAL is the starting state, to which all non-conditional rules apply.
128 : * Exclusive states change parsing rules while the state is active. When in
129 : * an exclusive state, only those rules defined for that state apply.
130 : *
131 : * We use exclusive states for quoted strings, extended comments,
132 : * and to eliminate parsing troubles for numeric strings.
133 : * Exclusive states:
134 : * <xb> bit string literal
135 : * <xc> extended C-style comments
136 : * <xd> delimited identifiers (double-quoted identifiers)
137 : * <xdc> double-quoted strings in C
138 : * <xh> hexadecimal byte string
139 : * <xn> national character quoted strings
140 : * <xq> standard quoted strings
141 : * <xqs> quote stop (detect continued strings)
142 : * <xe> extended quoted strings (support backslash escape sequences)
143 : * <xqc> single-quoted strings in C
144 : * <xdolq> $foo$ quoted strings
145 : * <xui> quoted identifier with Unicode escapes
146 : * <xus> quoted string with Unicode escapes
147 : * <xcond> condition of an EXEC SQL IFDEF construct
148 : * <xskip> skipping the inactive part of an EXEC SQL IFDEF construct
149 : *
150 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
151 : * no need to distinguish it from <xe> state.
152 : *
153 : * Remember to add an <<EOF>> case whenever you add a new exclusive state!
154 : * The default one is probably not the right thing.
155 : */
156 :
157 : %x xb
158 : %x xc
159 : %x xd
160 : %x xdc
161 : %x xh
162 : %x xn
163 : %x xq
164 : %x xqs
165 : %x xe
166 : %x xqc
167 : %x xdolq
168 : %x xui
169 : %x xus
170 : %x xcond
171 : %x xskip
172 :
173 : /* Additional exclusive states that are specific to ECPG */
174 : %x C SQL incl def def_ident undef
175 :
176 : /*
177 : * In order to make the world safe for Windows and Mac clients as well as
178 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
179 : * sequence will be seen as two successive newlines, but that doesn't cause
180 : * any problems. SQL-style comments, which start with -- and extend to the
181 : * next newline, are treated as equivalent to a single whitespace character.
182 : *
183 : * NOTE a fine point: if there is no newline following --, we will absorb
184 : * everything to the end of the input as a comment. This is correct. Older
185 : * versions of Postgres failed to recognize -- as a comment if the input
186 : * did not end with a newline.
187 : *
188 : * non_newline_space tracks all space characters except newlines.
189 : *
190 : * XXX if you change the set of whitespace characters, fix ecpg_isspace()
191 : * to agree.
192 : */
193 :
194 : space [ \t\n\r\f\v]
195 : non_newline_space [ \t\f\v]
196 : newline [\n\r]
197 : non_newline [^\n\r]
198 :
199 : comment ("--"{non_newline}*)
200 :
201 : whitespace ({space}+|{comment})
202 :
203 : /*
204 : * SQL requires at least one newline in the whitespace separating
205 : * string literals that are to be concatenated. Silly, but who are we
206 : * to argue? Note that {whitespace_with_newline} should not have * after
207 : * it, whereas {whitespace} should generally have a * after it...
208 : */
209 :
210 : non_newline_whitespace ({non_newline_space}|{comment})
211 : whitespace_with_newline ({non_newline_whitespace}*{newline}{whitespace}*)
212 :
213 : quote '
214 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
215 : quotecontinue {whitespace_with_newline}{quote}
216 :
217 : /*
218 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
219 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
220 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
221 : * to see if there's another dash --- which would start a {comment} and thus
222 : * allow continuation of the {quotecontinue} token.
223 : */
224 : quotecontinuefail {whitespace}*"-"?
225 :
226 : /* Bit string
227 : */
228 : xbstart [bB]{quote}
229 : xbinside [^']*
230 :
231 : /* Hexadecimal byte string */
232 : xhstart [xX]{quote}
233 : xhinside [^']*
234 :
235 : /* National character */
236 : xnstart [nN]{quote}
237 :
238 : /* Quoted string that allows backslash escapes */
239 : xestart [eE]{quote}
240 : xeinside [^\\']+
241 : xeescape [\\][^0-7]
242 : xeoctesc [\\][0-7]{1,3}
243 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
244 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
245 :
246 : /* Extended quote
247 : * xqdouble implements embedded quote, ''''
248 : */
249 : xqstart {quote}
250 : xqdouble {quote}{quote}
251 : xqcquote [\\]{quote}
252 : xqinside [^']+
253 :
254 : /* $foo$ style quotes ("dollar quoting")
255 : * The quoted string starts with $foo$ where "foo" is an optional string
256 : * in the form of an identifier, except that it may not contain "$",
257 : * and extends to the first occurrence of an identical string.
258 : * There is *no* processing of the quoted text.
259 : *
260 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
261 : * fails to match its trailing "$".
262 : */
263 : dolq_start [A-Za-z\200-\377_]
264 : dolq_cont [A-Za-z\200-\377_0-9]
265 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
266 : dolqfailed \${dolq_start}{dolq_cont}*
267 : dolqinside [^$]+
268 :
269 : /* Double quote
270 : * Allows embedded spaces and other special characters into identifiers.
271 : */
272 : dquote \"
273 : xdstart {dquote}
274 : xdstop {dquote}
275 : xddouble {dquote}{dquote}
276 : xdinside [^"]+
277 :
278 : /* Quoted identifier with Unicode escapes */
279 : xuistart [uU]&{dquote}
280 :
281 : /* Quoted string with Unicode escapes */
282 : xusstart [uU]&{quote}
283 :
284 : /* special stuff for C strings */
285 : xdcqq \\\\
286 : xdcqdq \\\"
287 : xdcother [^"]
288 : xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
289 :
290 :
291 : /* C-style comments
292 : *
293 : * The "extended comment" syntax closely resembles allowable operator syntax.
294 : * The tricky part here is to get lex to recognize a string starting with
295 : * slash-star as a comment, when interpreting it as an operator would produce
296 : * a longer match --- remember lex will prefer a longer match! Also, if we
297 : * have something like plus-slash-star, lex will think this is a 3-character
298 : * operator whereas we want to see it as a + operator and a comment start.
299 : * The solution is two-fold:
300 : * 1. append {op_chars}* to xcstart so that it matches as much text as
301 : * {operator} would. Then the tie-breaker (first matching rule of same
302 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
303 : * in case it contains a star-slash that should terminate the comment.
304 : * 2. In the operator rule, check for slash-star within the operator, and
305 : * if found throw it back with yyless(). This handles the plus-slash-star
306 : * problem.
307 : * Dash-dash comments have similar interactions with the operator rule.
308 : */
309 : xcstart \/\*{op_chars}*
310 : xcstop \*+\/
311 : xcinside [^*/]+
312 :
313 : ident_start [A-Za-z\200-\377_]
314 : ident_cont [A-Za-z\200-\377_0-9\$]
315 :
316 : identifier {ident_start}{ident_cont}*
317 :
318 : array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
319 :
320 : /* Assorted special-case operators and operator-like tokens */
321 : typecast "::"
322 : dot_dot \.\.
323 : colon_equals ":="
324 :
325 : /*
326 : * These operator-like tokens (unlike the above ones) also match the {operator}
327 : * rule, which means that they might be overridden by a longer match if they
328 : * are followed by a comment start or a + or - character. Accordingly, if you
329 : * add to this list, you must also add corresponding code to the {operator}
330 : * block to return the correct token in such cases. (This is not needed in
331 : * psqlscan.l since the token value is ignored there.)
332 : */
333 : equals_greater "=>"
334 : less_equals "<="
335 : greater_equals ">="
336 : less_greater "<>"
337 : not_equals "!="
338 :
339 : /*
340 : * "self" is the set of chars that should be returned as single-character
341 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
342 : * which can be one or more characters long (but if a single-char token
343 : * appears in the "self" set, it is not to be returned as an Op). Note
344 : * that the sets overlap, but each has some chars that are not in the other.
345 : *
346 : * If you change either set, adjust the character lists appearing in the
347 : * rule for "operator"!
348 : */
349 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
350 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
351 : operator {op_chars}+
352 :
353 : /*
354 : * Numbers
355 : *
356 : * Unary minus is not part of a number here. Instead we pass it separately to
357 : * the parser, and there it gets coerced via doNegate().
358 : *
359 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
360 : *
361 : * {realfail} is added to prevent the need for scanner
362 : * backup when the {real} rule fails to match completely.
363 : */
364 : decdigit [0-9]
365 : hexdigit [0-9A-Fa-f]
366 : octdigit [0-7]
367 : bindigit [0-1]
368 :
369 : decinteger {decdigit}(_?{decdigit})*
370 : hexinteger 0[xX](_?{hexdigit})+
371 : octinteger 0[oO](_?{octdigit})+
372 : bininteger 0[bB](_?{bindigit})+
373 :
374 : hexfail 0[xX]_?
375 : octfail 0[oO]_?
376 : binfail 0[bB]_?
377 :
378 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
379 : numericfail {decinteger}\.\.
380 :
381 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
382 : realfail ({decinteger}|{numeric})[Ee][-+]
383 :
384 : decinteger_junk {decinteger}{ident_start}
385 : hexinteger_junk {hexinteger}{ident_start}
386 : octinteger_junk {octinteger}{ident_start}
387 : bininteger_junk {bininteger}{ident_start}
388 : numeric_junk {numeric}{ident_start}
389 : real_junk {real}{ident_start}
390 :
391 : /* Positional parameters don't accept underscores. */
392 : param \${decdigit}+
393 : param_junk \${decdigit}+{ident_start}
394 :
395 : /* special characters for other dbms */
396 : /* we have to react differently in compat mode */
397 : informix_special [\$]
398 :
399 : other .
400 :
401 : /*
402 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
403 : * Other quoted strings must allow some special characters such as single-quote
404 : * and newline.
405 : * Embedded single-quotes are implemented both in the SQL standard
406 : * style of two adjacent single quotes "''" and in the Postgres/Java style
407 : * of escaped-quote "\'".
408 : * Other embedded escaped characters are matched explicitly and the leading
409 : * backslash is dropped from the string.
410 : * Note that xcstart must appear before operator, as explained above!
411 : * Also whitespace (comment) must appear before operator.
412 : */
413 :
414 : /* some stuff needed for ecpg */
415 : exec [eE][xX][eE][cC]
416 : sql [sS][qQ][lL]
417 : define [dD][eE][fF][iI][nN][eE]
418 : include [iI][nN][cC][lL][uU][dD][eE]
419 : include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
420 : import [iI][mM][pP][oO][rR][tT]
421 : undef [uU][nN][dD][eE][fF]
422 :
423 : ccomment "//".*\n
424 :
425 : if [iI][fF]
426 : ifdef [iI][fF][dD][eE][fF]
427 : ifndef [iI][fF][nN][dD][eE][fF]
428 : else [eE][lL][sS][eE]
429 : elif [eE][lL][iI][fF]
430 : endif [eE][nN][dD][iI][fF]
431 :
432 : struct [sS][tT][rR][uU][cC][tT]
433 :
434 : exec_sql {exec}{space}*{sql}{space}*
435 : ipdigit ({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
436 : ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
437 :
438 : /* we might want to parse all cpp include files */
439 : cppinclude {space}*#{include}{space}*
440 : cppinclude_next {space}*#{include_next}{space}*
441 :
442 : /* take care of cpp lines, they may also be continued */
443 : /* first a general line for all commands not starting with "i" */
444 : /* and then the other commands starting with "i", we have to add these
445 : * separately because the cppline production would match on "include" too
446 : */
447 : cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
448 :
449 : %%
450 :
451 : %{
452 : /* code to execute during start of each call of yylex() */
453 : char *newdefsymbol = NULL;
454 :
455 : token_start = NULL;
456 : %}
457 :
458 : <SQL>{
459 : {whitespace} {
460 : /* ignore */
461 : }
462 : } /* <SQL> */
463 :
464 : <C,SQL>{
465 : {xcstart} {
466 : token_start = yytext;
467 : state_before_str_start = YYSTATE;
468 : xcdepth = 0;
469 : BEGIN(xc);
470 : /* Put back any characters past slash-star; see above */
471 : yyless(2);
472 : fputs("/*", yyout);
473 : }
474 : } /* <C,SQL> */
475 :
476 : <xc>{
477 : {xcstart} {
478 : if (state_before_str_start == SQL)
479 : {
480 : xcdepth++;
481 : /* Put back any characters past slash-star; see above */
482 : yyless(2);
483 : fputs("/_*", yyout);
484 : }
485 : else if (state_before_str_start == C)
486 : {
487 : ECHO;
488 : }
489 : }
490 :
491 : {xcstop} {
492 : if (state_before_str_start == SQL)
493 : {
494 : if (xcdepth <= 0)
495 : {
496 : ECHO;
497 : BEGIN(SQL);
498 : token_start = NULL;
499 : }
500 : else
501 : {
502 : xcdepth--;
503 : fputs("*_/", yyout);
504 : }
505 : }
506 : else if (state_before_str_start == C)
507 : {
508 : ECHO;
509 : BEGIN(C);
510 : token_start = NULL;
511 : }
512 : }
513 :
514 : {xcinside} {
515 : ECHO;
516 : }
517 :
518 : {op_chars} {
519 : ECHO;
520 : }
521 :
522 : \*+ {
523 : ECHO;
524 : }
525 :
526 : <<EOF>> {
527 : mmfatal(PARSE_ERROR, "unterminated /* comment");
528 : }
529 : } /* <xc> */
530 :
531 : <SQL>{
532 : {xbstart} {
533 : token_start = yytext;
534 : state_before_str_start = YYSTATE;
535 : BEGIN(xb);
536 : startlit();
537 : }
538 : } /* <SQL> */
539 :
540 : <xh>{xhinside} |
541 : <xb>{xbinside} {
542 : addlit(yytext, yyleng);
543 : }
544 : <xb><<EOF>> { mmfatal(PARSE_ERROR, "unterminated bit string literal"); }
545 :
546 : <SQL>{xhstart} {
547 : token_start = yytext;
548 : state_before_str_start = YYSTATE;
549 : BEGIN(xh);
550 : startlit();
551 : }
552 : <xh><<EOF>> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); }
553 :
554 : <C>{xqstart} {
555 : token_start = yytext;
556 : state_before_str_start = YYSTATE;
557 : BEGIN(xqc);
558 : startlit();
559 : }
560 :
561 : <SQL>{
562 : {xnstart} {
563 : /* National character.
564 : * Transfer it as-is to the backend.
565 : */
566 : token_start = yytext;
567 : state_before_str_start = YYSTATE;
568 : BEGIN(xn);
569 : startlit();
570 : }
571 :
572 : {xqstart} {
573 : token_start = yytext;
574 : state_before_str_start = YYSTATE;
575 : BEGIN(xq);
576 : startlit();
577 : }
578 : {xestart} {
579 : token_start = yytext;
580 : state_before_str_start = YYSTATE;
581 : BEGIN(xe);
582 : startlit();
583 : }
584 : {xusstart} {
585 : token_start = yytext;
586 : state_before_str_start = YYSTATE;
587 : BEGIN(xus);
588 : startlit();
589 : }
590 : } /* <SQL> */
591 :
592 : <xb,xh,xq,xqc,xe,xn,xus>{quote} {
593 : /*
594 : * When we are scanning a quoted string and see an end
595 : * quote, we must look ahead for a possible continuation.
596 : * If we don't see one, we know the end quote was in fact
597 : * the end of the string. To reduce the lexer table size,
598 : * we use a single "xqs" state to do the lookahead for all
599 : * types of strings.
600 : */
601 : state_before_str_stop = YYSTATE;
602 : BEGIN(xqs);
603 : }
604 : <xqs>{quotecontinue} {
605 : /*
606 : * Found a quote continuation, so return to the in-quote
607 : * state and continue scanning the literal. Nothing is
608 : * added to the literal's contents.
609 : */
610 : BEGIN(state_before_str_stop);
611 : }
612 : <xqs>{quotecontinuefail} |
613 : <xqs>{other} |
614 : <xqs><<EOF>> {
615 : /*
616 : * Failed to see a quote continuation. Throw back
617 : * everything after the end quote, and handle the string
618 : * according to the state we were in previously.
619 : */
620 : yyless(0);
621 : BEGIN(state_before_str_start);
622 :
623 : switch (state_before_str_stop)
624 : {
625 : case xb:
626 : if (literalbuf[strspn(literalbuf, "01")] != '\0')
627 : mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string literal");
628 : base_yylval.str = psprintf("b'%s'", literalbuf);
629 : return BCONST;
630 : case xh:
631 : if (literalbuf[strspn(literalbuf, "0123456789abcdefABCDEF")] != '\0')
632 : mmerror(PARSE_ERROR, ET_ERROR, "invalid hexadecimal string literal");
633 : base_yylval.str = psprintf("x'%s'", literalbuf);
634 : return XCONST;
635 : case xq:
636 : /* fallthrough */
637 : case xqc:
638 : base_yylval.str = psprintf("'%s'", literalbuf);
639 : return SCONST;
640 : case xe:
641 : base_yylval.str = psprintf("E'%s'", literalbuf);
642 : return SCONST;
643 : case xn:
644 : base_yylval.str = psprintf("N'%s'", literalbuf);
645 : return SCONST;
646 : case xus:
647 : base_yylval.str = psprintf("U&'%s'", literalbuf);
648 : return USCONST;
649 : default:
650 : mmfatal(PARSE_ERROR, "unhandled previous state in xqs\n");
651 : }
652 : }
653 :
654 : <xq,xe,xn,xus>{xqdouble} { addlit(yytext, yyleng); }
655 : <xqc>{xqcquote} { addlit(yytext, yyleng); }
656 : <xq,xqc,xn,xus>{xqinside} { addlit(yytext, yyleng); }
657 : <xe>{xeinside} {
658 : addlit(yytext, yyleng);
659 : }
660 : <xe>{xeunicode} {
661 : addlit(yytext, yyleng);
662 : }
663 : <xe>{xeescape} {
664 : addlit(yytext, yyleng);
665 : }
666 : <xe>{xeoctesc} {
667 : addlit(yytext, yyleng);
668 : }
669 : <xe>{xehexesc} {
670 : addlit(yytext, yyleng);
671 : }
672 : <xe>. {
673 : /* This is only needed for \ just before EOF */
674 : addlitchar(yytext[0]);
675 : }
676 : <xq,xqc,xe,xn,xus><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); }
677 :
678 : <SQL>{
679 : {dolqdelim} {
680 : token_start = yytext;
681 : if (dolqstart)
682 : free(dolqstart);
683 : dolqstart = mm_strdup(yytext);
684 : BEGIN(xdolq);
685 : startlit();
686 : addlit(yytext, yyleng);
687 : }
688 : {dolqfailed} {
689 : /* throw back all but the initial "$" */
690 : yyless(1);
691 : /* and treat it as {other} */
692 : return yytext[0];
693 : }
694 : } /* <SQL> */
695 :
696 : <xdolq>{dolqdelim} {
697 : if (strcmp(yytext, dolqstart) == 0)
698 : {
699 : addlit(yytext, yyleng);
700 : free(dolqstart);
701 : dolqstart = NULL;
702 : BEGIN(SQL);
703 : base_yylval.str = mm_strdup(literalbuf);
704 : return SCONST;
705 : }
706 : else
707 : {
708 : /*
709 : * When we fail to match $...$ to dolqstart, transfer
710 : * the $... part to the output, but put back the final
711 : * $ for rescanning. Consider $delim$...$junk$delim$
712 : */
713 : addlit(yytext, yyleng - 1);
714 : yyless(yyleng - 1);
715 : }
716 : }
717 : <xdolq>{dolqinside} {
718 : addlit(yytext, yyleng);
719 : }
720 : <xdolq>{dolqfailed} {
721 : addlit(yytext, yyleng);
722 : }
723 : <xdolq>. {
724 : /* single quote or dollar sign */
725 : addlitchar(yytext[0]);
726 : }
727 : <xdolq><<EOF>> { mmfatal(PARSE_ERROR, "unterminated dollar-quoted string"); }
728 :
729 : <SQL>{
730 : {xdstart} {
731 : state_before_str_start = YYSTATE;
732 : BEGIN(xd);
733 : startlit();
734 : }
735 : {xuistart} {
736 : state_before_str_start = YYSTATE;
737 : BEGIN(xui);
738 : startlit();
739 : }
740 : } /* <SQL> */
741 :
742 : <xd>{xdstop} {
743 : BEGIN(state_before_str_start);
744 : if (literallen == 0)
745 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
746 : /*
747 : * The server will truncate the identifier here. We do
748 : * not, as (1) it does not change the result; (2) we don't
749 : * know what NAMEDATALEN the server might use; (3) this
750 : * code path is also taken for literal query strings in
751 : * PREPARE and EXECUTE IMMEDIATE, which can certainly be
752 : * longer than NAMEDATALEN.
753 : */
754 : base_yylval.str = mm_strdup(literalbuf);
755 : return CSTRING;
756 : }
757 : <xdc>{xdstop} {
758 : BEGIN(state_before_str_start);
759 : base_yylval.str = mm_strdup(literalbuf);
760 : return CSTRING;
761 : }
762 : <xui>{dquote} {
763 : BEGIN(state_before_str_start);
764 : if (literallen == 0)
765 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
766 : /* The backend will truncate the identifier here. We do not as it does not change the result. */
767 : base_yylval.str = psprintf("U&\"%s\"", literalbuf);
768 : return UIDENT;
769 : }
770 : <xd,xui>{xddouble} {
771 : addlit(yytext, yyleng);
772 : }
773 : <xd,xui>{xdinside} {
774 : addlit(yytext, yyleng);
775 : }
776 : <xd,xui><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
777 : <C>{xdstart} {
778 : state_before_str_start = YYSTATE;
779 : BEGIN(xdc);
780 : startlit();
781 : }
782 : <xdc>{xdcinside} {
783 : addlit(yytext, yyleng);
784 : }
785 : <xdc><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); }
786 :
787 : <SQL>{
788 : {typecast} {
789 : return TYPECAST;
790 : }
791 :
792 : {dot_dot} {
793 : return DOT_DOT;
794 : }
795 :
796 : {colon_equals} {
797 : return COLON_EQUALS;
798 : }
799 :
800 : {equals_greater} {
801 : return EQUALS_GREATER;
802 : }
803 :
804 : {less_equals} {
805 : return LESS_EQUALS;
806 : }
807 :
808 : {greater_equals} {
809 : return GREATER_EQUALS;
810 : }
811 :
812 : {less_greater} {
813 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
814 : return NOT_EQUALS;
815 : }
816 :
817 : {not_equals} {
818 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
819 : return NOT_EQUALS;
820 : }
821 :
822 : {informix_special} {
823 : /* are we simulating Informix? */
824 : if (INFORMIX_MODE)
825 : {
826 : unput(':');
827 : }
828 : else
829 : return yytext[0];
830 : }
831 :
832 : {self} {
833 : /*
834 : * We may find a ';' inside a structure
835 : * definition in a TYPE or VAR statement.
836 : * This is not an EOL marker.
837 : */
838 : if (yytext[0] == ';' && struct_level == 0)
839 : BEGIN(C);
840 : return yytext[0];
841 : }
842 :
843 : {operator} {
844 : /*
845 : * Check for embedded slash-star or dash-dash; those
846 : * are comment starts, so operator must stop there.
847 : * Note that slash-star or dash-dash at the first
848 : * character will match a prior rule, not this one.
849 : */
850 : int nchars = yyleng;
851 : char *slashstar = strstr(yytext, "/*");
852 : char *dashdash = strstr(yytext, "--");
853 :
854 : if (slashstar && dashdash)
855 : {
856 : /* if both appear, take the first one */
857 : if (slashstar > dashdash)
858 : slashstar = dashdash;
859 : }
860 : else if (!slashstar)
861 : slashstar = dashdash;
862 : if (slashstar)
863 : nchars = slashstar - yytext;
864 :
865 : /*
866 : * For SQL compatibility, '+' and '-' cannot be the
867 : * last char of a multi-char operator unless the operator
868 : * contains chars that are not in SQL operators.
869 : * The idea is to lex '=-' as two operators, but not
870 : * to forbid operator names like '?-' that could not be
871 : * sequences of SQL operators.
872 : */
873 : if (nchars > 1 &&
874 : (yytext[nchars - 1] == '+' ||
875 : yytext[nchars - 1] == '-'))
876 : {
877 : int ic;
878 :
879 : for (ic = nchars - 2; ic >= 0; ic--)
880 : {
881 : char c = yytext[ic];
882 : if (c == '~' || c == '!' || c == '@' ||
883 : c == '#' || c == '^' || c == '&' ||
884 : c == '|' || c == '`' || c == '?' ||
885 : c == '%')
886 : break;
887 : }
888 : if (ic < 0)
889 : {
890 : /*
891 : * didn't find a qualifying character, so remove
892 : * all trailing [+-]
893 : */
894 : do {
895 : nchars--;
896 : } while (nchars > 1 &&
897 : (yytext[nchars - 1] == '+' ||
898 : yytext[nchars - 1] == '-'));
899 : }
900 : }
901 :
902 : if (nchars < yyleng)
903 : {
904 : /* Strip the unwanted chars from the token */
905 : yyless(nchars);
906 : /*
907 : * If what we have left is only one char, and it's
908 : * one of the characters matching "self", then
909 : * return it as a character token the same way
910 : * that the "self" rule would have.
911 : */
912 : if (nchars == 1 &&
913 : strchr(",()[].;:+-*/%^<>=", yytext[0]))
914 : return yytext[0];
915 : /*
916 : * Likewise, if what we have left is two chars, and
917 : * those match the tokens ">=", "<=", "=>", "<>" or
918 : * "!=", then we must return the appropriate token
919 : * rather than the generic Op.
920 : */
921 : if (nchars == 2)
922 : {
923 : if (yytext[0] == '=' && yytext[1] == '>')
924 : return EQUALS_GREATER;
925 : if (yytext[0] == '>' && yytext[1] == '=')
926 : return GREATER_EQUALS;
927 : if (yytext[0] == '<' && yytext[1] == '=')
928 : return LESS_EQUALS;
929 : if (yytext[0] == '<' && yytext[1] == '>')
930 : return NOT_EQUALS;
931 : if (yytext[0] == '!' && yytext[1] == '=')
932 : return NOT_EQUALS;
933 : }
934 : }
935 :
936 : base_yylval.str = mm_strdup(yytext);
937 : return Op;
938 : }
939 :
940 : {param} {
941 : int val;
942 :
943 : errno = 0;
944 : val = strtoint(yytext + 1, NULL, 10);
945 : if (errno == ERANGE)
946 : mmfatal(PARSE_ERROR, "parameter number too large");
947 : base_yylval.ival = val;
948 : return PARAM;
949 : }
950 : {param_junk} {
951 : mmfatal(PARSE_ERROR, "trailing junk after parameter");
952 : }
953 :
954 : {ip} {
955 : base_yylval.str = mm_strdup(yytext);
956 : return IP;
957 : }
958 : } /* <SQL> */
959 :
960 : <C,SQL>{
961 : {decinteger} {
962 : return process_integer_literal(yytext, &base_yylval, 10);
963 : }
964 : {hexinteger} {
965 : return process_integer_literal(yytext, &base_yylval, 16);
966 : }
967 : {numeric} {
968 : base_yylval.str = mm_strdup(yytext);
969 : return FCONST;
970 : }
971 : {numericfail} {
972 : /* throw back the .., and treat as integer */
973 : yyless(yyleng - 2);
974 : return process_integer_literal(yytext, &base_yylval, 10);
975 : }
976 : {real} {
977 : base_yylval.str = mm_strdup(yytext);
978 : return FCONST;
979 : }
980 : {realfail} {
981 : /*
982 : * throw back the [Ee][+-], and figure out whether what
983 : * remains is an {decinteger} or {numeric}.
984 : */
985 : yyless(yyleng - 2);
986 : return process_integer_literal(yytext, &base_yylval, 10);
987 : }
988 : } /* <C,SQL> */
989 :
990 : <SQL>{
991 : {octinteger} {
992 : return process_integer_literal(yytext, &base_yylval, 8);
993 : }
994 : {bininteger} {
995 : return process_integer_literal(yytext, &base_yylval, 2);
996 : }
997 :
998 : /*
999 : * Note that some trailing junk is valid in C (such as 100LL), so we
1000 : * contain this to SQL mode.
1001 : */
1002 : {decinteger_junk} {
1003 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1004 : }
1005 : {hexinteger_junk} {
1006 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1007 : }
1008 : {octinteger_junk} {
1009 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1010 : }
1011 : {bininteger_junk} {
1012 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1013 : }
1014 : {numeric_junk} {
1015 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1016 : }
1017 : {real_junk} {
1018 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1019 : }
1020 :
1021 : :{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
1022 : base_yylval.str = mm_strdup(yytext+1);
1023 : return CVARIABLE;
1024 : }
1025 :
1026 : {identifier} {
1027 : /* First check to see if it's a define symbol to expand */
1028 : if (!isdefine())
1029 : {
1030 : int kwvalue;
1031 :
1032 : /*
1033 : * User-defined typedefs override SQL keywords, but
1034 : * not C keywords. Currently, a typedef name is just
1035 : * reported as IDENT, but someday we might need to
1036 : * return a distinct token type.
1037 : */
1038 : if (get_typedef(yytext, true) == NULL)
1039 : {
1040 : /* Is it an SQL/ECPG keyword? */
1041 : kwvalue = ScanECPGKeywordLookup(yytext);
1042 : if (kwvalue >= 0)
1043 : return kwvalue;
1044 : }
1045 :
1046 : /* Is it a C keyword? */
1047 : kwvalue = ScanCKeywordLookup(yytext);
1048 : if (kwvalue >= 0)
1049 : return kwvalue;
1050 :
1051 : /*
1052 : * None of the above. Return it as an identifier.
1053 : *
1054 : * The backend will attempt to truncate and case-fold
1055 : * the identifier, but I see no good reason for ecpg
1056 : * to do so; that's just another way that ecpg could get
1057 : * out of step with the backend.
1058 : */
1059 : base_yylval.str = mm_strdup(yytext);
1060 : return IDENT;
1061 : }
1062 : }
1063 :
1064 : {other} {
1065 : return yytext[0];
1066 : }
1067 : } /* <SQL> */
1068 :
1069 : /*
1070 : * Begin ECPG-specific rules
1071 : */
1072 :
1073 : <C>{exec_sql} { BEGIN(SQL); return SQL_START; }
1074 : <C>{informix_special} {
1075 : /* are we simulating Informix? */
1076 : if (INFORMIX_MODE)
1077 : {
1078 : BEGIN(SQL);
1079 : return SQL_START;
1080 : }
1081 : else
1082 : return S_ANYTHING;
1083 : }
1084 : <C>{ccomment} { ECHO; }
1085 : <C>{cppinclude} {
1086 : if (system_includes)
1087 : {
1088 : include_next = false;
1089 : BEGIN(incl);
1090 : }
1091 : else
1092 : {
1093 : base_yylval.str = mm_strdup(yytext);
1094 : return CPP_LINE;
1095 : }
1096 : }
1097 : <C>{cppinclude_next} {
1098 : if (system_includes)
1099 : {
1100 : include_next = true;
1101 : BEGIN(incl);
1102 : }
1103 : else
1104 : {
1105 : base_yylval.str = mm_strdup(yytext);
1106 : return CPP_LINE;
1107 : }
1108 : }
1109 : <C,SQL>{cppline} {
1110 : base_yylval.str = mm_strdup(yytext);
1111 : return CPP_LINE;
1112 : }
1113 : <C>{identifier} {
1114 : /*
1115 : * Try to detect a function name:
1116 : * look for identifiers at the global scope
1117 : * keep the last identifier before the first '(' and '{'
1118 : */
1119 : if (braces_open == 0 && parenths_open == 0)
1120 : {
1121 : if (current_function)
1122 : free(current_function);
1123 : current_function = mm_strdup(yytext);
1124 : }
1125 : /* Informix uses SQL defines only in SQL space */
1126 : /* however, some defines have to be taken care of for compatibility */
1127 : if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine())
1128 : {
1129 : int kwvalue;
1130 :
1131 : kwvalue = ScanCKeywordLookup(yytext);
1132 : if (kwvalue >= 0)
1133 : return kwvalue;
1134 : else
1135 : {
1136 : base_yylval.str = mm_strdup(yytext);
1137 : return IDENT;
1138 : }
1139 : }
1140 : }
1141 : <C>{xcstop} { mmerror(PARSE_ERROR, ET_ERROR, "nested /* ... */ comments"); }
1142 : <C>":" { return ':'; }
1143 : <C>";" { return ';'; }
1144 : <C>"," { return ','; }
1145 : <C>"*" { return '*'; }
1146 : <C>"%" { return '%'; }
1147 : <C>"/" { return '/'; }
1148 : <C>"+" { return '+'; }
1149 : <C>"-" { return '-'; }
1150 : <C>"(" { parenths_open++; return '('; }
1151 : <C>")" { parenths_open--; return ')'; }
1152 : <C,xskip>{space} { ECHO; }
1153 : <C>\{ { return '{'; }
1154 : <C>\} { return '}'; }
1155 : <C>\[ { return '['; }
1156 : <C>\] { return ']'; }
1157 : <C>\= { return '='; }
1158 : <C>"->" { return S_MEMBER; }
1159 : <C>">>" { return S_RSHIFT; }
1160 : <C>"<<" { return S_LSHIFT; }
1161 : <C>"||" { return S_OR; }
1162 : <C>"&&" { return S_AND; }
1163 : <C>"++" { return S_INC; }
1164 : <C>"--" { return S_DEC; }
1165 : <C>"==" { return S_EQUAL; }
1166 : <C>"!=" { return S_NEQUAL; }
1167 : <C>"+=" { return S_ADD; }
1168 : <C>"-=" { return S_SUB; }
1169 : <C>"*=" { return S_MUL; }
1170 : <C>"/=" { return S_DIV; }
1171 : <C>"%=" { return S_MOD; }
1172 : <C>"->*" { return S_MEMPOINT; }
1173 : <C>".*" { return S_DOTPOINT; }
1174 : <C>{other} { return S_ANYTHING; }
1175 : <C>{exec_sql}{define}{space}* { BEGIN(def_ident); }
1176 : <C>{informix_special}{define}{space}* {
1177 : /* are we simulating Informix? */
1178 : if (INFORMIX_MODE)
1179 : {
1180 : BEGIN(def_ident);
1181 : }
1182 : else
1183 : {
1184 : yyless(1);
1185 : return S_ANYTHING;
1186 : }
1187 : }
1188 : <C>{exec_sql}{undef}{space}* { BEGIN(undef); }
1189 : <C>{informix_special}{undef}{space}* {
1190 : /* are we simulating Informix? */
1191 : if (INFORMIX_MODE)
1192 : {
1193 : BEGIN(undef);
1194 : }
1195 : else
1196 : {
1197 : yyless(1);
1198 : return S_ANYTHING;
1199 : }
1200 : }
1201 : <undef>{identifier}{space}*";" {
1202 : struct _defines *ptr, *ptr2 = NULL;
1203 : int i;
1204 :
1205 : /*
1206 : * Skip the ";" and trailing whitespace. Note that yytext
1207 : * contains at least one non-space character plus the ";"
1208 : */
1209 : for (i = strlen(yytext)-2;
1210 : i > 0 && ecpg_isspace(yytext[i]);
1211 : i--)
1212 : ;
1213 : yytext[i+1] = '\0';
1214 :
1215 :
1216 : /* Find and unset any matching define; should be only 1 */
1217 : for (ptr = defines; ptr; ptr2 = ptr, ptr = ptr->next)
1218 : {
1219 : if (strcmp(yytext, ptr->name) == 0)
1220 : {
1221 : free(ptr->value);
1222 : ptr->value = NULL;
1223 : /* We cannot forget it if there's a cmdvalue */
1224 : if (ptr->cmdvalue == NULL)
1225 : {
1226 : if (ptr2 == NULL)
1227 : defines = ptr->next;
1228 : else
1229 : ptr2->next = ptr->next;
1230 : free(ptr->name);
1231 : free(ptr);
1232 : }
1233 : break;
1234 : }
1235 : }
1236 :
1237 : BEGIN(C);
1238 : }
1239 : <undef>{other}|\n {
1240 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL UNDEF command");
1241 : yyterminate();
1242 : }
1243 : <C>{exec_sql}{include}{space}* { BEGIN(incl); }
1244 : <C>{informix_special}{include}{space}* {
1245 : /* are we simulating Informix? */
1246 : if (INFORMIX_MODE)
1247 : {
1248 : BEGIN(incl);
1249 : }
1250 : else
1251 : {
1252 : yyless(1);
1253 : return S_ANYTHING;
1254 : }
1255 : }
1256 : <C,xskip>{exec_sql}{ifdef}{space}* {
1257 : if (preproc_tos >= MAX_NESTED_IF-1)
1258 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1259 : preproc_tos++;
1260 : stacked_if_value[preproc_tos].active = false;
1261 : stacked_if_value[preproc_tos].saw_active = false;
1262 : stacked_if_value[preproc_tos].else_branch = false;
1263 : ifcond = true;
1264 : BEGIN(xcond);
1265 : }
1266 : <C,xskip>{informix_special}{ifdef}{space}* {
1267 : /* are we simulating Informix? */
1268 : if (INFORMIX_MODE)
1269 : {
1270 : if (preproc_tos >= MAX_NESTED_IF-1)
1271 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1272 : preproc_tos++;
1273 : stacked_if_value[preproc_tos].active = false;
1274 : stacked_if_value[preproc_tos].saw_active = false;
1275 : stacked_if_value[preproc_tos].else_branch = false;
1276 : ifcond = true;
1277 : BEGIN(xcond);
1278 : }
1279 : else
1280 : {
1281 : yyless(1);
1282 : return S_ANYTHING;
1283 : }
1284 : }
1285 : <C,xskip>{exec_sql}{ifndef}{space}* {
1286 : if (preproc_tos >= MAX_NESTED_IF-1)
1287 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1288 : preproc_tos++;
1289 : stacked_if_value[preproc_tos].active = false;
1290 : stacked_if_value[preproc_tos].saw_active = false;
1291 : stacked_if_value[preproc_tos].else_branch = false;
1292 : ifcond = false;
1293 : BEGIN(xcond);
1294 : }
1295 : <C,xskip>{informix_special}{ifndef}{space}* {
1296 : /* are we simulating Informix? */
1297 : if (INFORMIX_MODE)
1298 : {
1299 : if (preproc_tos >= MAX_NESTED_IF-1)
1300 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1301 : preproc_tos++;
1302 : stacked_if_value[preproc_tos].active = false;
1303 : stacked_if_value[preproc_tos].saw_active = false;
1304 : stacked_if_value[preproc_tos].else_branch = false;
1305 : ifcond = false;
1306 : BEGIN(xcond);
1307 : }
1308 : else
1309 : {
1310 : yyless(1);
1311 : return S_ANYTHING;
1312 : }
1313 : }
1314 : <C,xskip>{exec_sql}{elif}{space}* {
1315 : if (preproc_tos == 0)
1316 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1317 : if (stacked_if_value[preproc_tos].else_branch)
1318 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1319 : ifcond = true;
1320 : BEGIN(xcond);
1321 : }
1322 : <C,xskip>{informix_special}{elif}{space}* {
1323 : /* are we simulating Informix? */
1324 : if (INFORMIX_MODE)
1325 : {
1326 : if (preproc_tos == 0)
1327 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1328 : if (stacked_if_value[preproc_tos].else_branch)
1329 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1330 : ifcond = true;
1331 : BEGIN(xcond);
1332 : }
1333 : else
1334 : {
1335 : yyless(1);
1336 : return S_ANYTHING;
1337 : }
1338 : }
1339 :
1340 : <C,xskip>{exec_sql}{else}{space}*";" { /* only exec sql endif pops the stack, so take care of duplicated 'else' */
1341 : if (preproc_tos == 0)
1342 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1343 : else if (stacked_if_value[preproc_tos].else_branch)
1344 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1345 : else
1346 : {
1347 : stacked_if_value[preproc_tos].else_branch = true;
1348 : stacked_if_value[preproc_tos].active =
1349 : (stacked_if_value[preproc_tos-1].active &&
1350 : !stacked_if_value[preproc_tos].saw_active);
1351 : stacked_if_value[preproc_tos].saw_active = true;
1352 :
1353 : if (stacked_if_value[preproc_tos].active)
1354 : BEGIN(C);
1355 : else
1356 : BEGIN(xskip);
1357 : }
1358 : }
1359 : <C,xskip>{informix_special}{else}{space}*";" {
1360 : /* are we simulating Informix? */
1361 : if (INFORMIX_MODE)
1362 : {
1363 : if (preproc_tos == 0)
1364 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1365 : else if (stacked_if_value[preproc_tos].else_branch)
1366 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1367 : else
1368 : {
1369 : stacked_if_value[preproc_tos].else_branch = true;
1370 : stacked_if_value[preproc_tos].active =
1371 : (stacked_if_value[preproc_tos-1].active &&
1372 : !stacked_if_value[preproc_tos].saw_active);
1373 : stacked_if_value[preproc_tos].saw_active = true;
1374 :
1375 : if (stacked_if_value[preproc_tos].active)
1376 : BEGIN(C);
1377 : else
1378 : BEGIN(xskip);
1379 : }
1380 : }
1381 : else
1382 : {
1383 : yyless(1);
1384 : return S_ANYTHING;
1385 : }
1386 : }
1387 : <C,xskip>{exec_sql}{endif}{space}*";" {
1388 : if (preproc_tos == 0)
1389 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1390 : else
1391 : preproc_tos--;
1392 :
1393 : if (stacked_if_value[preproc_tos].active)
1394 : BEGIN(C);
1395 : else
1396 : BEGIN(xskip);
1397 : }
1398 : <C,xskip>{informix_special}{endif}{space}*";" {
1399 : /* are we simulating Informix? */
1400 : if (INFORMIX_MODE)
1401 : {
1402 : if (preproc_tos == 0)
1403 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1404 : else
1405 : preproc_tos--;
1406 :
1407 : if (stacked_if_value[preproc_tos].active)
1408 : BEGIN(C);
1409 : else
1410 : BEGIN(xskip);
1411 : }
1412 : else
1413 : {
1414 : yyless(1);
1415 : return S_ANYTHING;
1416 : }
1417 : }
1418 :
1419 : <xskip>{other} { /* ignore */ }
1420 :
1421 : <xcond>{identifier}{space}*";" {
1422 : {
1423 : struct _defines *defptr;
1424 : unsigned int i;
1425 : bool this_active;
1426 :
1427 : /*
1428 : * Skip the ";" and trailing whitespace. Note that yytext
1429 : * contains at least one non-space character plus the ";"
1430 : */
1431 : for (i = strlen(yytext)-2;
1432 : i > 0 && ecpg_isspace(yytext[i]);
1433 : i--)
1434 : ;
1435 : yytext[i+1] = '\0';
1436 :
1437 : /* Does a definition exist? */
1438 : for (defptr = defines; defptr; defptr = defptr->next)
1439 : {
1440 : if (strcmp(yytext, defptr->name) == 0)
1441 : {
1442 : /* Found it, but is it currently undefined? */
1443 : if (defptr->value == NULL)
1444 : defptr = NULL; /* pretend it's not found */
1445 : break;
1446 : }
1447 : }
1448 :
1449 : this_active = (defptr ? ifcond : !ifcond);
1450 : stacked_if_value[preproc_tos].active =
1451 : (stacked_if_value[preproc_tos-1].active &&
1452 : !stacked_if_value[preproc_tos].saw_active &&
1453 : this_active);
1454 : stacked_if_value[preproc_tos].saw_active |= this_active;
1455 : }
1456 :
1457 : if (stacked_if_value[preproc_tos].active)
1458 : BEGIN(C);
1459 : else
1460 : BEGIN(xskip);
1461 : }
1462 :
1463 : <xcond>{other}|\n {
1464 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL IFDEF command");
1465 : yyterminate();
1466 : }
1467 : <def_ident>{identifier} {
1468 : newdefsymbol = mm_strdup(yytext);
1469 : BEGIN(def);
1470 : startlit();
1471 : }
1472 : <def_ident>{other}|\n {
1473 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL DEFINE command");
1474 : yyterminate();
1475 : }
1476 : <def>{space}*";" {
1477 : struct _defines *ptr;
1478 :
1479 : /* Does it already exist? */
1480 : for (ptr = defines; ptr != NULL; ptr = ptr->next)
1481 : {
1482 : if (strcmp(newdefsymbol, ptr->name) == 0)
1483 : {
1484 : free(ptr->value);
1485 : ptr->value = mm_strdup(literalbuf);
1486 : /* Don't leak newdefsymbol */
1487 : free(newdefsymbol);
1488 : break;
1489 : }
1490 : }
1491 : if (ptr == NULL)
1492 : {
1493 : /* Not present, make a new entry */
1494 : ptr = (struct _defines *) mm_alloc(sizeof(struct _defines));
1495 :
1496 : ptr->name = newdefsymbol;
1497 : ptr->value = mm_strdup(literalbuf);
1498 : ptr->cmdvalue = NULL;
1499 : ptr->used = NULL;
1500 : ptr->next = defines;
1501 : defines = ptr;
1502 : }
1503 :
1504 : BEGIN(C);
1505 : }
1506 : <def>[^;] { addlit(yytext, yyleng); }
1507 : <incl>\<[^\>]+\>{space}*";"? { parse_include(); }
1508 : <incl>{dquote}{xdinside}{dquote}{space}*";"? { parse_include(); }
1509 : <incl>[^;\<\>\"]+";" { parse_include(); }
1510 : <incl>{other}|\n {
1511 : mmfatal(PARSE_ERROR, "syntax error in EXEC SQL INCLUDE command");
1512 : yyterminate();
1513 : }
1514 :
1515 : <<EOF>> {
1516 : if (yy_buffer == NULL)
1517 : {
1518 : /* No more input */
1519 : if (preproc_tos > 0)
1520 : {
1521 : preproc_tos = 0;
1522 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1523 : }
1524 : yyterminate();
1525 : }
1526 : else
1527 : {
1528 : /* Revert to previous input source */
1529 : struct _yy_buffer *yb = yy_buffer;
1530 : int i;
1531 : struct _defines *ptr;
1532 :
1533 : /* Check to see if we are exiting a macro value */
1534 : for (ptr = defines; ptr; ptr = ptr->next)
1535 : {
1536 : if (ptr->used == yy_buffer)
1537 : {
1538 : ptr->used = NULL;
1539 : break; /* there can't be multiple matches */
1540 : }
1541 : }
1542 :
1543 : if (yyin != NULL)
1544 : fclose(yyin);
1545 :
1546 : yy_delete_buffer(YY_CURRENT_BUFFER);
1547 : yy_switch_to_buffer(yy_buffer->buffer);
1548 :
1549 : yylineno = yy_buffer->lineno;
1550 :
1551 : /* We have to output the filename only if we change files here */
1552 : i = strcmp(input_filename, yy_buffer->filename);
1553 :
1554 : free(input_filename);
1555 : input_filename = yy_buffer->filename;
1556 :
1557 : yy_buffer = yy_buffer->next;
1558 : free(yb);
1559 :
1560 : if (i != 0)
1561 : output_line_number();
1562 :
1563 : }
1564 : }
1565 :
1566 : <INITIAL>{other}|\n { mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <%s>", PACKAGE_BUGREPORT); }
1567 :
1568 : %%
1569 :
1570 : /* LCOV_EXCL_STOP */
1571 :
1572 : void
1573 : lex_init(void)
1574 134 : {
1575 : braces_open = 0;
1576 134 : parenths_open = 0;
1577 134 : current_function = NULL;
1578 134 :
1579 : yylineno = 1;
1580 134 :
1581 : /* initialize state for if/else/endif */
1582 : preproc_tos = 0;
1583 134 : stacked_if_value[preproc_tos].active = true;
1584 134 : stacked_if_value[preproc_tos].saw_active = true;
1585 134 : stacked_if_value[preproc_tos].else_branch = false;
1586 134 :
1587 : /* initialize literal buffer to a reasonable but expansible size */
1588 : if (literalbuf == NULL)
1589 134 : {
1590 : literalalloc = 1024;
1591 132 : literalbuf = (char *) mm_alloc(literalalloc);
1592 132 : }
1593 : startlit();
1594 134 :
1595 : BEGIN(C);
1596 134 : }
1597 134 :
1598 : static void
1599 : addlit(char *ytext, int yleng)
1600 47902 : {
1601 : /* enlarge buffer if needed */
1602 : if ((literallen+yleng) >= literalalloc)
1603 47902 : {
1604 : do
1605 : literalalloc *= 2;
1606 0 : while ((literallen+yleng) >= literalalloc);
1607 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1608 0 : }
1609 : /* append new data, add trailing null */
1610 : memcpy(literalbuf+literallen, ytext, yleng);
1611 47902 : literallen += yleng;
1612 47902 : literalbuf[literallen] = '\0';
1613 47902 : }
1614 47902 :
1615 : static void
1616 : addlitchar(unsigned char ychar)
1617 0 : {
1618 : /* enlarge buffer if needed */
1619 : if ((literallen+1) >= literalalloc)
1620 0 : {
1621 : literalalloc *= 2;
1622 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1623 0 : }
1624 : /* append new data, add trailing null */
1625 : literalbuf[literallen] = ychar;
1626 0 : literallen += 1;
1627 0 : literalbuf[literallen] = '\0';
1628 0 : }
1629 0 :
1630 : /*
1631 : * Process {decinteger}, {hexinteger}, etc. Note this will also do the right
1632 : * thing with {numeric}, ie digits and a decimal point.
1633 : */
1634 : static int
1635 : process_integer_literal(const char *token, YYSTYPE *lval, int base)
1636 2386 : {
1637 : int val;
1638 : char *endptr;
1639 :
1640 : errno = 0;
1641 2386 : val = strtoint(base == 10 ? token : token + 2, &endptr, base);
1642 2386 : if (*endptr != '\0' || errno == ERANGE)
1643 2386 : {
1644 : /* integer too large (or contains decimal pt), treat it as a float */
1645 : lval->str = mm_strdup(token);
1646 12 : return FCONST;
1647 12 : }
1648 : lval->ival = val;
1649 2374 : return ICONST;
1650 2374 : }
1651 :
1652 : static void
1653 : parse_include(void)
1654 176 : {
1655 : /* got the include file name */
1656 : struct _yy_buffer *yb;
1657 : struct _include_path *ip;
1658 : char inc_file[MAXPGPATH];
1659 : unsigned int i;
1660 :
1661 : yb = mm_alloc(sizeof(struct _yy_buffer));
1662 176 :
1663 : yb->buffer = YY_CURRENT_BUFFER;
1664 176 : yb->lineno = yylineno;
1665 176 : yb->filename = input_filename;
1666 176 : yb->next = yy_buffer;
1667 176 :
1668 : yy_buffer = yb;
1669 176 :
1670 : /*
1671 : * skip the ";" if there is one and trailing whitespace. Note that
1672 : * yytext contains at least one non-space character plus the ";"
1673 : */
1674 : for (i = strlen(yytext)-2;
1675 178 : i > 0 && ecpg_isspace(yytext[i]);
1676 178 : i--)
1677 2 : ;
1678 :
1679 : if (yytext[i] == ';')
1680 176 : i--;
1681 0 :
1682 : yytext[i+1] = '\0';
1683 176 :
1684 : yyin = NULL;
1685 176 :
1686 : /* If file name is enclosed in '"' remove these and look only in '.' */
1687 : /* Informix does look into all include paths though, except filename starts with '/' */
1688 : if (yytext[0] == '"' && yytext[i] == '"' &&
1689 176 : ((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/'))
1690 0 : {
1691 : yytext[i] = '\0';
1692 0 : memmove(yytext, yytext+1, strlen(yytext));
1693 0 :
1694 : strlcpy(inc_file, yytext, sizeof(inc_file));
1695 0 : yyin = fopen(inc_file, "r");
1696 0 : if (!yyin)
1697 0 : {
1698 : if (strlen(inc_file) <= 2 || strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1699 0 : {
1700 : strcat(inc_file, ".h");
1701 0 : yyin = fopen(inc_file, "r");
1702 0 : }
1703 : }
1704 :
1705 : }
1706 : else
1707 : {
1708 : if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>'))
1709 176 : {
1710 : yytext[i] = '\0';
1711 4 : memmove(yytext, yytext+1, strlen(yytext));
1712 4 : }
1713 :
1714 : for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
1715 478 : {
1716 : if (strlen(ip->path) + strlen(yytext) + 4 > MAXPGPATH)
1717 302 : {
1718 : fprintf(stderr, _("Error: include path \"%s/%s\" is too long on line %d, skipping\n"), ip->path, yytext, yylineno);
1719 0 : continue;
1720 0 : }
1721 : snprintf (inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext);
1722 302 : yyin = fopen(inc_file, "r");
1723 302 : if (!yyin)
1724 302 : {
1725 : if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1726 278 : {
1727 : strcat(inc_file, ".h");
1728 272 : yyin = fopen(inc_file, "r");
1729 272 : }
1730 : }
1731 : /* if the command was "include_next" we have to disregard the first hit */
1732 : if (yyin && include_next)
1733 302 : {
1734 : fclose (yyin);
1735 0 : yyin = NULL;
1736 0 : include_next = false;
1737 0 : }
1738 : }
1739 : }
1740 : if (!yyin)
1741 176 : mmfatal(NO_INCLUDE_FILE, "could not open include file \"%s\" on line %d", yytext, yylineno);
1742 0 :
1743 : input_filename = mm_strdup(inc_file);
1744 176 : yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
1745 176 : yylineno = 1;
1746 176 : output_line_number();
1747 176 :
1748 : BEGIN(C);
1749 176 : }
1750 176 :
1751 : /*
1752 : * ecpg_isspace() --- return true if flex scanner considers char whitespace
1753 : */
1754 : static bool
1755 : ecpg_isspace(char ch)
1756 210 : {
1757 : if (ch == ' ' ||
1758 210 : ch == '\t' ||
1759 210 : ch == '\n' ||
1760 208 : ch == '\r' ||
1761 208 : ch == '\f' ||
1762 208 : ch == '\v')
1763 : return true;
1764 2 : return false;
1765 208 : }
1766 :
1767 : /*
1768 : * If yytext matches a define symbol, begin scanning the symbol's value
1769 : * and return true
1770 : */
1771 : static bool
1772 : isdefine(void)
1773 28118 : {
1774 : struct _defines *ptr;
1775 :
1776 : /* is it a define? */
1777 : for (ptr = defines; ptr; ptr = ptr->next)
1778 128258 : {
1779 : /* notice we do not match anything being actively expanded */
1780 : if (strcmp(yytext, ptr->name) == 0 &&
1781 100308 : ptr->value != NULL &&
1782 168 : ptr->used == NULL)
1783 168 : {
1784 : /* Save state associated with the current buffer */
1785 : struct _yy_buffer *yb;
1786 :
1787 : yb = mm_alloc(sizeof(struct _yy_buffer));
1788 168 :
1789 : yb->buffer = YY_CURRENT_BUFFER;
1790 168 : yb->lineno = yylineno;
1791 168 : yb->filename = mm_strdup(input_filename);
1792 168 : yb->next = yy_buffer;
1793 168 : yy_buffer = yb;
1794 168 :
1795 : /* Mark symbol as being actively expanded */
1796 : ptr->used = yb;
1797 168 :
1798 : /*
1799 : * We use yy_scan_string which will copy the value, so there's
1800 : * no need to worry about a possible undef happening while we
1801 : * are still scanning it.
1802 : */
1803 : yy_scan_string(ptr->value);
1804 168 : return true;
1805 168 : }
1806 : }
1807 :
1808 : return false;
1809 27950 : }
1810 :
1811 : /*
1812 : * Handle replacement of INFORMIX built-in defines. This works just
1813 : * like isdefine() except for the source of the string to scan.
1814 : */
1815 : static bool
1816 : isinformixdefine(void)
1817 3518 : {
1818 : const char *new = NULL;
1819 3518 :
1820 : if (strcmp(yytext, "dec_t") == 0)
1821 3518 : new = "decimal";
1822 2 : else if (strcmp(yytext, "intrvl_t") == 0)
1823 3516 : new = "interval";
1824 0 : else if (strcmp(yytext, "dtime_t") == 0)
1825 3516 : new = "timestamp";
1826 0 :
1827 : if (new)
1828 3518 : {
1829 : struct _yy_buffer *yb;
1830 :
1831 : yb = mm_alloc(sizeof(struct _yy_buffer));
1832 2 :
1833 : yb->buffer = YY_CURRENT_BUFFER;
1834 2 : yb->lineno = yylineno;
1835 2 : yb->filename = mm_strdup(input_filename);
1836 2 : yb->next = yy_buffer;
1837 2 : yy_buffer = yb;
1838 2 :
1839 : yy_scan_string(new);
1840 2 : return true;
1841 2 : }
1842 :
1843 : return false;
1844 3516 : }
|