Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * pgc.l
5 : * lexical scanner for ecpg
6 : *
7 : * This is a modified version of src/backend/parser/scan.l
8 : *
9 : * The ecpg scanner is not backup-free, so the fail rules are
10 : * only here to simplify syncing this file with scan.l.
11 : *
12 : *
13 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
14 : * Portions Copyright (c) 1994, Regents of the University of California
15 : *
16 : * IDENTIFICATION
17 : * src/interfaces/ecpg/preproc/pgc.l
18 : *
19 : *-------------------------------------------------------------------------
20 : */
21 : #include "postgres_fe.h"
22 :
23 : #include <ctype.h>
24 : #include <limits.h>
25 :
26 : #include "common/string.h"
27 :
28 : #include "preproc_extern.h"
29 : #include "preproc.h"
30 : }
31 :
32 : %{
33 :
34 : /* LCOV_EXCL_START */
35 :
36 : extern YYSTYPE base_yylval;
37 :
38 : static int xcdepth = 0; /* depth of nesting in slash-star comments */
39 : static char *dolqstart = NULL; /* current $foo$ quote start string */
40 :
41 : /*
42 : * literalbuf is used to accumulate literal values when multiple rules
43 : * are needed to parse a single literal. Call startlit to reset buffer
44 : * to empty, addlit to add text. Note that the buffer is permanently
45 : * malloc'd to the largest size needed so far in the current run.
46 : */
47 : static char *literalbuf = NULL; /* expandable buffer */
48 : static int literallen; /* actual current length */
49 : static int literalalloc; /* current allocated buffer size */
50 :
51 : /* Used for detecting global state together with braces_open */
52 : static int parenths_open;
53 :
54 : /* Used to tell parse_include() whether the command was #include or #include_next */
55 : static bool include_next;
56 :
57 : #define startlit() (literalbuf[0] = '\0', literallen = 0)
58 : static void addlit(char *ytext, int yleng);
59 : static void addlitchar(unsigned char ychar);
60 : static int process_integer_literal(const char *token, YYSTYPE *lval, int base);
61 : static void parse_include(void);
62 : static bool ecpg_isspace(char ch);
63 : static bool isdefine(void);
64 : static bool isinformixdefine(void);
65 :
66 : char *token_start;
67 :
68 : /* vars to keep track of start conditions when scanning literals */
69 : static int state_before_str_start;
70 : static int state_before_str_stop;
71 :
72 : /*
73 : * State for handling include files and macro expansion. We use a new
74 : * flex input buffer for each level of include or macro, and create a
75 : * struct _yy_buffer to remember the previous level. There is not a struct
76 : * for the currently active input source; that state is kept in the global
77 : * variables YY_CURRENT_BUFFER, yylineno, and input_filename.
78 : */
79 : static struct _yy_buffer
80 : {
81 : YY_BUFFER_STATE buffer;
82 : long lineno;
83 : char *filename;
84 : struct _yy_buffer *next;
85 : } *yy_buffer = NULL;
86 :
87 : /*
88 : * Vars for handling ifdef/elif/endif constructs. preproc_tos is the current
89 : * nesting depth of such constructs, and stacked_if_value[preproc_tos] is the
90 : * state for the innermost level. (For convenience, stacked_if_value[0] is
91 : * initialized as though we are in the active branch of some outermost IF.)
92 : * The active field is true if the current branch is active (being expanded).
93 : * The saw_active field is true if we have found any successful branch,
94 : * so that all subsequent branches of this level should be skipped.
95 : * The else_branch field is true if we've found an 'else' (so that another
96 : * 'else' or 'elif' at this level is an error.)
97 : * For IFs nested within an inactive branch, all branches always have active
98 : * set to false, but saw_active and else_branch are maintained normally.
99 : * ifcond is valid only while evaluating an if-condition; it's true if we
100 : * are doing ifdef, false if ifndef.
101 : */
102 : #define MAX_NESTED_IF 128
103 : static short preproc_tos;
104 : static bool ifcond;
105 : static struct _if_value
106 : {
107 : bool active;
108 : bool saw_active;
109 : bool else_branch;
110 : } stacked_if_value[MAX_NESTED_IF];
111 :
112 : %}
113 :
114 : %option 8bit
115 : %option never-interactive
116 : %option nodefault
117 : %option noinput
118 : %option noyywrap
119 : %option warn
120 : %option yylineno
121 : %option prefix="base_yy"
122 :
123 : /*
124 : * OK, here is a short description of lex/flex rules behavior.
125 : * The longest pattern which matches an input string is always chosen.
126 : * For equal-length patterns, the first occurring in the rules list is chosen.
127 : * INITIAL is the starting state, to which all non-conditional rules apply.
128 : * Exclusive states change parsing rules while the state is active. When in
129 : * an exclusive state, only those rules defined for that state apply.
130 : *
131 : * We use exclusive states for quoted strings, extended comments,
132 : * and to eliminate parsing troubles for numeric strings.
133 : * Exclusive states:
134 : * <xb> bit string literal
135 : * <xc> extended C-style comments
136 : * <xd> delimited identifiers (double-quoted identifiers)
137 : * <xdc> double-quoted strings in C
138 : * <xh> hexadecimal byte string
139 : * <xn> national character quoted strings
140 : * <xq> standard quoted strings
141 : * <xqs> quote stop (detect continued strings)
142 : * <xe> extended quoted strings (support backslash escape sequences)
143 : * <xqc> single-quoted strings in C
144 : * <xdolq> $foo$ quoted strings
145 : * <xui> quoted identifier with Unicode escapes
146 : * <xus> quoted string with Unicode escapes
147 : * <xcond> condition of an EXEC SQL IFDEF construct
148 : * <xskip> skipping the inactive part of an EXEC SQL IFDEF construct
149 : *
150 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
151 : * no need to distinguish it from <xe> state.
152 : *
153 : * Remember to add an <<EOF>> case whenever you add a new exclusive state!
154 : * The default one is probably not the right thing.
155 : */
156 :
157 : %x xb
158 : %x xc
159 : %x xd
160 : %x xdc
161 : %x xh
162 : %x xn
163 : %x xq
164 : %x xqs
165 : %x xe
166 : %x xqc
167 : %x xdolq
168 : %x xui
169 : %x xus
170 : %x xcond
171 : %x xskip
172 :
173 : /* Additional exclusive states that are specific to ECPG */
174 : %x C SQL incl def def_ident undef
175 :
176 : /*
177 : * In order to make the world safe for Windows and Mac clients as well as
178 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
179 : * sequence will be seen as two successive newlines, but that doesn't cause
180 : * any problems. SQL-style comments, which start with -- and extend to the
181 : * next newline, are treated as equivalent to a single whitespace character.
182 : *
183 : * NOTE a fine point: if there is no newline following --, we will absorb
184 : * everything to the end of the input as a comment. This is correct. Older
185 : * versions of Postgres failed to recognize -- as a comment if the input
186 : * did not end with a newline.
187 : *
188 : * non_newline_space tracks all space characters except newlines.
189 : *
190 : * XXX if you change the set of whitespace characters, fix ecpg_isspace()
191 : * to agree.
192 : */
193 :
194 : space [ \t\n\r\f\v]
195 : non_newline_space [ \t\f\v]
196 : newline [\n\r]
197 : non_newline [^\n\r]
198 :
199 : comment ("--"{non_newline}*)
200 :
201 : whitespace ({space}+|{comment})
202 :
203 : /*
204 : * SQL requires at least one newline in the whitespace separating
205 : * string literals that are to be concatenated. Silly, but who are we
206 : * to argue? Note that {whitespace_with_newline} should not have * after
207 : * it, whereas {whitespace} should generally have a * after it...
208 : */
209 :
210 : non_newline_whitespace ({non_newline_space}|{comment})
211 : whitespace_with_newline ({non_newline_whitespace}*{newline}{whitespace}*)
212 :
213 : quote '
214 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
215 : quotecontinue {whitespace_with_newline}{quote}
216 :
217 : /*
218 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
219 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
220 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
221 : * to see if there's another dash --- which would start a {comment} and thus
222 : * allow continuation of the {quotecontinue} token.
223 : */
224 : quotecontinuefail {whitespace}*"-"?
225 :
226 : /* Bit string
227 : */
228 : xbstart [bB]{quote}
229 : xbinside [^']*
230 :
231 : /* Hexadecimal byte string */
232 : xhstart [xX]{quote}
233 : xhinside [^']*
234 :
235 : /* National character */
236 : xnstart [nN]{quote}
237 :
238 : /* Quoted string that allows backslash escapes */
239 : xestart [eE]{quote}
240 : xeinside [^\\']+
241 : xeescape [\\][^0-7]
242 : xeoctesc [\\][0-7]{1,3}
243 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
244 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
245 :
246 : /* Extended quote
247 : * xqdouble implements embedded quote, ''''
248 : */
249 : xqstart {quote}
250 : xqdouble {quote}{quote}
251 : xqcquote [\\]{quote}
252 : xqinside [^']+
253 :
254 : /* $foo$ style quotes ("dollar quoting")
255 : * The quoted string starts with $foo$ where "foo" is an optional string
256 : * in the form of an identifier, except that it may not contain "$",
257 : * and extends to the first occurrence of an identical string.
258 : * There is *no* processing of the quoted text.
259 : *
260 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
261 : * fails to match its trailing "$".
262 : */
263 : dolq_start [A-Za-z\200-\377_]
264 : dolq_cont [A-Za-z\200-\377_0-9]
265 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
266 : dolqfailed \${dolq_start}{dolq_cont}*
267 : dolqinside [^$]+
268 :
269 : /* Double quote
270 : * Allows embedded spaces and other special characters into identifiers.
271 : */
272 : dquote \"
273 : xdstart {dquote}
274 : xdstop {dquote}
275 : xddouble {dquote}{dquote}
276 : xdinside [^"]+
277 :
278 : /* Quoted identifier with Unicode escapes */
279 : xuistart [uU]&{dquote}
280 :
281 : /* Quoted string with Unicode escapes */
282 : xusstart [uU]&{quote}
283 :
284 : /* special stuff for C strings */
285 : xdcqq \\\\
286 : xdcqdq \\\"
287 : xdcother [^"]
288 : xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
289 :
290 :
291 : /* C-style comments
292 : *
293 : * The "extended comment" syntax closely resembles allowable operator syntax.
294 : * The tricky part here is to get lex to recognize a string starting with
295 : * slash-star as a comment, when interpreting it as an operator would produce
296 : * a longer match --- remember lex will prefer a longer match! Also, if we
297 : * have something like plus-slash-star, lex will think this is a 3-character
298 : * operator whereas we want to see it as a + operator and a comment start.
299 : * The solution is two-fold:
300 : * 1. append {op_chars}* to xcstart so that it matches as much text as
301 : * {operator} would. Then the tie-breaker (first matching rule of same
302 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
303 : * in case it contains a star-slash that should terminate the comment.
304 : * 2. In the operator rule, check for slash-star within the operator, and
305 : * if found throw it back with yyless(). This handles the plus-slash-star
306 : * problem.
307 : * Dash-dash comments have similar interactions with the operator rule.
308 : */
309 : xcstart \/\*{op_chars}*
310 : xcstop \*+\/
311 : xcinside [^*/]+
312 :
313 : ident_start [A-Za-z\200-\377_]
314 : ident_cont [A-Za-z\200-\377_0-9\$]
315 :
316 : identifier {ident_start}{ident_cont}*
317 :
318 : array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
319 :
320 : /* Assorted special-case operators and operator-like tokens */
321 : typecast "::"
322 : dot_dot \.\.
323 : colon_equals ":="
324 :
325 : /*
326 : * These operator-like tokens (unlike the above ones) also match the {operator}
327 : * rule, which means that they might be overridden by a longer match if they
328 : * are followed by a comment start or a + or - character. Accordingly, if you
329 : * add to this list, you must also add corresponding code to the {operator}
330 : * block to return the correct token in such cases. (This is not needed in
331 : * psqlscan.l since the token value is ignored there.)
332 : */
333 : equals_greater "=>"
334 : less_equals "<="
335 : greater_equals ">="
336 : less_greater "<>"
337 : not_equals "!="
338 :
339 : /*
340 : * "self" is the set of chars that should be returned as single-character
341 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
342 : * which can be one or more characters long (but if a single-char token
343 : * appears in the "self" set, it is not to be returned as an Op). Note
344 : * that the sets overlap, but each has some chars that are not in the other.
345 : *
346 : * If you change either set, adjust the character lists appearing in the
347 : * rule for "operator"!
348 : */
349 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
350 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
351 : operator {op_chars}+
352 :
353 : /*
354 : * Numbers
355 : *
356 : * Unary minus is not part of a number here. Instead we pass it separately to
357 : * the parser, and there it gets coerced via doNegate().
358 : *
359 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
360 : *
361 : * {realfail} is added to prevent the need for scanner
362 : * backup when the {real} rule fails to match completely.
363 : */
364 : decdigit [0-9]
365 : hexdigit [0-9A-Fa-f]
366 : octdigit [0-7]
367 : bindigit [0-1]
368 :
369 : decinteger {decdigit}(_?{decdigit})*
370 : hexinteger 0[xX](_?{hexdigit})+
371 : octinteger 0[oO](_?{octdigit})+
372 : bininteger 0[bB](_?{bindigit})+
373 :
374 : hexfail 0[xX]_?
375 : octfail 0[oO]_?
376 : binfail 0[bB]_?
377 :
378 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
379 : numericfail {decinteger}\.\.
380 :
381 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
382 : realfail ({decinteger}|{numeric})[Ee][-+]
383 :
384 : /* Positional parameters don't accept underscores. */
385 : param \${decdigit}+
386 :
387 : /*
388 : * An identifier immediately following an integer literal is disallowed because
389 : * in some cases it's ambiguous what is meant: for example, 0x1234 could be
390 : * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
391 : * detect such problems by seeing if integer_junk matches a longer substring
392 : * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
393 : * bininteger). One "junk" pattern is sufficient because
394 : * {decinteger}{identifier} will match all the same strings we'd match with
395 : * {hexinteger}{identifier} etc.
396 : *
397 : * Note that the rule for integer_junk must appear after the ones for
398 : * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
399 : * and integer_junk, and we need hexinteger to be chosen in that case.
400 : *
401 : * Also disallow strings matched by numeric_junk, real_junk and param_junk
402 : * for consistency.
403 : */
404 : integer_junk {decinteger}{identifier}
405 : numeric_junk {numeric}{identifier}
406 : real_junk {real}{identifier}
407 : param_junk \${decdigit}+{identifier}
408 :
409 : /* special characters for other dbms */
410 : /* we have to react differently in compat mode */
411 : informix_special [\$]
412 :
413 : other .
414 :
415 : /*
416 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
417 : * Other quoted strings must allow some special characters such as single-quote
418 : * and newline.
419 : * Embedded single-quotes are implemented both in the SQL standard
420 : * style of two adjacent single quotes "''" and in the Postgres/Java style
421 : * of escaped-quote "\'".
422 : * Other embedded escaped characters are matched explicitly and the leading
423 : * backslash is dropped from the string.
424 : * Note that xcstart must appear before operator, as explained above!
425 : * Also whitespace (comment) must appear before operator.
426 : */
427 :
428 : /* some stuff needed for ecpg */
429 : exec [eE][xX][eE][cC]
430 : sql [sS][qQ][lL]
431 : define [dD][eE][fF][iI][nN][eE]
432 : include [iI][nN][cC][lL][uU][dD][eE]
433 : include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
434 : import [iI][mM][pP][oO][rR][tT]
435 : undef [uU][nN][dD][eE][fF]
436 :
437 : ccomment "//".*\n
438 :
439 : if [iI][fF]
440 : ifdef [iI][fF][dD][eE][fF]
441 : ifndef [iI][fF][nN][dD][eE][fF]
442 : else [eE][lL][sS][eE]
443 : elif [eE][lL][iI][fF]
444 : endif [eE][nN][dD][iI][fF]
445 :
446 : struct [sS][tT][rR][uU][cC][tT]
447 :
448 : exec_sql {exec}{space}*{sql}{space}*
449 : ipdigit ({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
450 : ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
451 :
452 : /* we might want to parse all cpp include files */
453 : cppinclude {space}*#{include}{space}*
454 : cppinclude_next {space}*#{include_next}{space}*
455 :
456 : /* take care of cpp lines, they may also be continued */
457 : /* first a general line for all commands not starting with "i" */
458 : /* and then the other commands starting with "i", we have to add these
459 : * separately because the cppline production would match on "include" too
460 : */
461 : cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
462 :
463 : %%
464 :
465 : %{
466 : /* code to execute during start of each call of yylex() */
467 : char *newdefsymbol = NULL;
468 :
469 : token_start = NULL;
470 : %}
471 :
472 : <SQL>{
473 : {whitespace} {
474 : /* ignore */
475 : }
476 : } /* <SQL> */
477 :
478 : <C,SQL>{
479 : {xcstart} {
480 : token_start = yytext;
481 : state_before_str_start = YYSTATE;
482 : xcdepth = 0;
483 : BEGIN(xc);
484 : /* Put back any characters past slash-star; see above */
485 : yyless(2);
486 : fputs("/*", yyout);
487 : }
488 : } /* <C,SQL> */
489 :
490 : <xc>{
491 : {xcstart} {
492 : if (state_before_str_start == SQL)
493 : {
494 : xcdepth++;
495 : /* Put back any characters past slash-star; see above */
496 : yyless(2);
497 : fputs("/_*", yyout);
498 : }
499 : else if (state_before_str_start == C)
500 : {
501 : ECHO;
502 : }
503 : }
504 :
505 : {xcstop} {
506 : if (state_before_str_start == SQL)
507 : {
508 : if (xcdepth <= 0)
509 : {
510 : ECHO;
511 : BEGIN(SQL);
512 : token_start = NULL;
513 : }
514 : else
515 : {
516 : xcdepth--;
517 : fputs("*_/", yyout);
518 : }
519 : }
520 : else if (state_before_str_start == C)
521 : {
522 : ECHO;
523 : BEGIN(C);
524 : token_start = NULL;
525 : }
526 : }
527 :
528 : {xcinside} {
529 : ECHO;
530 : }
531 :
532 : {op_chars} {
533 : ECHO;
534 : }
535 :
536 : \*+ {
537 : ECHO;
538 : }
539 :
540 : <<EOF>> {
541 : mmfatal(PARSE_ERROR, "unterminated /* comment");
542 : }
543 : } /* <xc> */
544 :
545 : <SQL>{
546 : {xbstart} {
547 : token_start = yytext;
548 : state_before_str_start = YYSTATE;
549 : BEGIN(xb);
550 : startlit();
551 : }
552 : } /* <SQL> */
553 :
554 : <xh>{xhinside} |
555 : <xb>{xbinside} {
556 : addlit(yytext, yyleng);
557 : }
558 : <xb><<EOF>> {
559 : mmfatal(PARSE_ERROR, "unterminated bit string literal");
560 : }
561 :
562 : <SQL>{xhstart} {
563 : token_start = yytext;
564 : state_before_str_start = YYSTATE;
565 : BEGIN(xh);
566 : startlit();
567 : }
568 : <xh><<EOF>> {
569 : mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal");
570 : }
571 :
572 : <C>{xqstart} {
573 : token_start = yytext;
574 : state_before_str_start = YYSTATE;
575 : BEGIN(xqc);
576 : startlit();
577 : }
578 :
579 : <SQL>{
580 : {xnstart} {
581 : /* National character. Transfer it as-is to the backend. */
582 : token_start = yytext;
583 : state_before_str_start = YYSTATE;
584 : BEGIN(xn);
585 : startlit();
586 : }
587 :
588 : {xqstart} {
589 : token_start = yytext;
590 : state_before_str_start = YYSTATE;
591 : BEGIN(xq);
592 : startlit();
593 : }
594 : {xestart} {
595 : token_start = yytext;
596 : state_before_str_start = YYSTATE;
597 : BEGIN(xe);
598 : startlit();
599 : }
600 : {xusstart} {
601 : token_start = yytext;
602 : state_before_str_start = YYSTATE;
603 : BEGIN(xus);
604 : startlit();
605 : }
606 : } /* <SQL> */
607 :
608 : <xb,xh,xq,xqc,xe,xn,xus>{quote} {
609 : /*
610 : * When we are scanning a quoted string and see an end
611 : * quote, we must look ahead for a possible continuation.
612 : * If we don't see one, we know the end quote was in fact
613 : * the end of the string. To reduce the lexer table size,
614 : * we use a single "xqs" state to do the lookahead for all
615 : * types of strings.
616 : */
617 : state_before_str_stop = YYSTATE;
618 : BEGIN(xqs);
619 : }
620 : <xqs>{quotecontinue} {
621 : /*
622 : * Found a quote continuation, so return to the in-quote
623 : * state and continue scanning the literal. Nothing is
624 : * added to the literal's contents.
625 : */
626 : BEGIN(state_before_str_stop);
627 : }
628 : <xqs>{quotecontinuefail} |
629 : <xqs>{other} |
630 : <xqs><<EOF>> {
631 : /*
632 : * Failed to see a quote continuation. Throw back
633 : * everything after the end quote, and handle the string
634 : * according to the state we were in previously.
635 : */
636 : yyless(0);
637 : BEGIN(state_before_str_start);
638 :
639 : switch (state_before_str_stop)
640 : {
641 : case xb:
642 : if (literalbuf[strspn(literalbuf, "01")] != '\0')
643 : mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string literal");
644 : base_yylval.str = psprintf("b'%s'", literalbuf);
645 : return BCONST;
646 : case xh:
647 : if (literalbuf[strspn(literalbuf, "0123456789abcdefABCDEF")] != '\0')
648 : mmerror(PARSE_ERROR, ET_ERROR, "invalid hexadecimal string literal");
649 : base_yylval.str = psprintf("x'%s'", literalbuf);
650 : return XCONST;
651 : case xq:
652 : /* fallthrough */
653 : case xqc:
654 : base_yylval.str = psprintf("'%s'", literalbuf);
655 : return SCONST;
656 : case xe:
657 : base_yylval.str = psprintf("E'%s'", literalbuf);
658 : return SCONST;
659 : case xn:
660 : base_yylval.str = psprintf("N'%s'", literalbuf);
661 : return SCONST;
662 : case xus:
663 : base_yylval.str = psprintf("U&'%s'", literalbuf);
664 : return USCONST;
665 : default:
666 : mmfatal(PARSE_ERROR, "unhandled previous state in xqs\n");
667 : }
668 : }
669 :
670 : <xq,xe,xn,xus>{xqdouble} {
671 : addlit(yytext, yyleng);
672 : }
673 : <xqc>{xqcquote} {
674 : addlit(yytext, yyleng);
675 : }
676 : <xq,xqc,xn,xus>{xqinside} {
677 : addlit(yytext, yyleng);
678 : }
679 : <xe>{xeinside} {
680 : addlit(yytext, yyleng);
681 : }
682 : <xe>{xeunicode} {
683 : addlit(yytext, yyleng);
684 : }
685 : <xe>{xeescape} {
686 : addlit(yytext, yyleng);
687 : }
688 : <xe>{xeoctesc} {
689 : addlit(yytext, yyleng);
690 : }
691 : <xe>{xehexesc} {
692 : addlit(yytext, yyleng);
693 : }
694 : <xe>. {
695 : /* This is only needed for \ just before EOF */
696 : addlitchar(yytext[0]);
697 : }
698 : <xq,xqc,xe,xn,xus><<EOF>> {
699 : mmfatal(PARSE_ERROR, "unterminated quoted string");
700 : }
701 :
702 : <SQL>{
703 : {dolqdelim} {
704 : token_start = yytext;
705 : if (dolqstart)
706 : free(dolqstart);
707 : dolqstart = mm_strdup(yytext);
708 : BEGIN(xdolq);
709 : startlit();
710 : addlit(yytext, yyleng);
711 : }
712 : {dolqfailed} {
713 : /* throw back all but the initial "$" */
714 : yyless(1);
715 : /* and treat it as {other} */
716 : return yytext[0];
717 : }
718 : } /* <SQL> */
719 :
720 : <xdolq>{dolqdelim} {
721 : if (strcmp(yytext, dolqstart) == 0)
722 : {
723 : addlit(yytext, yyleng);
724 : free(dolqstart);
725 : dolqstart = NULL;
726 : BEGIN(SQL);
727 : base_yylval.str = mm_strdup(literalbuf);
728 : return SCONST;
729 : }
730 : else
731 : {
732 : /*
733 : * When we fail to match $...$ to dolqstart, transfer
734 : * the $... part to the output, but put back the final
735 : * $ for rescanning. Consider $delim$...$junk$delim$
736 : */
737 : addlit(yytext, yyleng - 1);
738 : yyless(yyleng - 1);
739 : }
740 : }
741 : <xdolq>{dolqinside} {
742 : addlit(yytext, yyleng);
743 : }
744 : <xdolq>{dolqfailed} {
745 : addlit(yytext, yyleng);
746 : }
747 : <xdolq>. {
748 : /* single quote or dollar sign */
749 : addlitchar(yytext[0]);
750 : }
751 : <xdolq><<EOF>> {
752 : mmfatal(PARSE_ERROR, "unterminated dollar-quoted string");
753 : }
754 :
755 : <SQL>{
756 : {xdstart} {
757 : state_before_str_start = YYSTATE;
758 : BEGIN(xd);
759 : startlit();
760 : }
761 : {xuistart} {
762 : state_before_str_start = YYSTATE;
763 : BEGIN(xui);
764 : startlit();
765 : }
766 : } /* <SQL> */
767 :
768 : <xd>{xdstop} {
769 : BEGIN(state_before_str_start);
770 : if (literallen == 0)
771 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
772 :
773 : /*
774 : * The server will truncate the identifier here. We do
775 : * not, as (1) it does not change the result; (2) we don't
776 : * know what NAMEDATALEN the server might use; (3) this
777 : * code path is also taken for literal query strings in
778 : * PREPARE and EXECUTE IMMEDIATE, which can certainly be
779 : * longer than NAMEDATALEN.
780 : */
781 : base_yylval.str = mm_strdup(literalbuf);
782 : return CSTRING;
783 : }
784 : <xdc>{xdstop} {
785 : BEGIN(state_before_str_start);
786 : base_yylval.str = mm_strdup(literalbuf);
787 : return CSTRING;
788 : }
789 : <xui>{dquote} {
790 : BEGIN(state_before_str_start);
791 : if (literallen == 0)
792 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
793 :
794 : /*
795 : * The backend will truncate the identifier here. We do
796 : * not as it does not change the result.
797 : */
798 : base_yylval.str = psprintf("U&\"%s\"", literalbuf);
799 : return UIDENT;
800 : }
801 : <xd,xui>{xddouble} {
802 : addlit(yytext, yyleng);
803 : }
804 : <xd,xui>{xdinside} {
805 : addlit(yytext, yyleng);
806 : }
807 : <xd,xui><<EOF>> {
808 : mmfatal(PARSE_ERROR, "unterminated quoted identifier");
809 : }
810 : <C>{xdstart} {
811 : state_before_str_start = YYSTATE;
812 : BEGIN(xdc);
813 : startlit();
814 : }
815 : <xdc>{xdcinside} {
816 : addlit(yytext, yyleng);
817 : }
818 : <xdc><<EOF>> {
819 : mmfatal(PARSE_ERROR, "unterminated quoted string");
820 : }
821 :
822 : <SQL>{
823 : {typecast} {
824 : return TYPECAST;
825 : }
826 :
827 : {dot_dot} {
828 : return DOT_DOT;
829 : }
830 :
831 : {colon_equals} {
832 : return COLON_EQUALS;
833 : }
834 :
835 : {equals_greater} {
836 : return EQUALS_GREATER;
837 : }
838 :
839 : {less_equals} {
840 : return LESS_EQUALS;
841 : }
842 :
843 : {greater_equals} {
844 : return GREATER_EQUALS;
845 : }
846 :
847 : {less_greater} {
848 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
849 : return NOT_EQUALS;
850 : }
851 :
852 : {not_equals} {
853 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
854 : return NOT_EQUALS;
855 : }
856 :
857 : {informix_special} {
858 : /* are we simulating Informix? */
859 : if (INFORMIX_MODE)
860 : {
861 : unput(':');
862 : }
863 : else
864 : return yytext[0];
865 : }
866 :
867 : {self} {
868 : /*
869 : * We may find a ';' inside a structure definition in a
870 : * TYPE or VAR statement. This is not an EOL marker.
871 : */
872 : if (yytext[0] == ';' && struct_level == 0)
873 : BEGIN(C);
874 : return yytext[0];
875 : }
876 :
877 : {operator} {
878 : /*
879 : * Check for embedded slash-star or dash-dash; those
880 : * are comment starts, so operator must stop there.
881 : * Note that slash-star or dash-dash at the first
882 : * character will match a prior rule, not this one.
883 : */
884 : int nchars = yyleng;
885 : char *slashstar = strstr(yytext, "/*");
886 : char *dashdash = strstr(yytext, "--");
887 :
888 : if (slashstar && dashdash)
889 : {
890 : /* if both appear, take the first one */
891 : if (slashstar > dashdash)
892 : slashstar = dashdash;
893 : }
894 : else if (!slashstar)
895 : slashstar = dashdash;
896 : if (slashstar)
897 : nchars = slashstar - yytext;
898 :
899 : /*
900 : * For SQL compatibility, '+' and '-' cannot be the
901 : * last char of a multi-char operator unless the operator
902 : * contains chars that are not in SQL operators.
903 : * The idea is to lex '=-' as two operators, but not
904 : * to forbid operator names like '?-' that could not be
905 : * sequences of SQL operators.
906 : */
907 : if (nchars > 1 &&
908 : (yytext[nchars - 1] == '+' ||
909 : yytext[nchars - 1] == '-'))
910 : {
911 : int ic;
912 :
913 : for (ic = nchars - 2; ic >= 0; ic--)
914 : {
915 : char c = yytext[ic];
916 :
917 : if (c == '~' || c == '!' || c == '@' ||
918 : c == '#' || c == '^' || c == '&' ||
919 : c == '|' || c == '`' || c == '?' ||
920 : c == '%')
921 : break;
922 : }
923 : if (ic < 0)
924 : {
925 : /*
926 : * didn't find a qualifying character, so remove
927 : * all trailing [+-]
928 : */
929 : do
930 : {
931 : nchars--;
932 : } while (nchars > 1 &&
933 : (yytext[nchars - 1] == '+' ||
934 : yytext[nchars - 1] == '-'));
935 : }
936 : }
937 :
938 : if (nchars < yyleng)
939 : {
940 : /* Strip the unwanted chars from the token */
941 : yyless(nchars);
942 :
943 : /*
944 : * If what we have left is only one char, and it's
945 : * one of the characters matching "self", then
946 : * return it as a character token the same way
947 : * that the "self" rule would have.
948 : */
949 : if (nchars == 1 &&
950 : strchr(",()[].;:+-*/%^<>=", yytext[0]))
951 : return yytext[0];
952 :
953 : /*
954 : * Likewise, if what we have left is two chars, and
955 : * those match the tokens ">=", "<=", "=>", "<>" or
956 : * "!=", then we must return the appropriate token
957 : * rather than the generic Op.
958 : */
959 : if (nchars == 2)
960 : {
961 : if (yytext[0] == '=' && yytext[1] == '>')
962 : return EQUALS_GREATER;
963 : if (yytext[0] == '>' && yytext[1] == '=')
964 : return GREATER_EQUALS;
965 : if (yytext[0] == '<' && yytext[1] == '=')
966 : return LESS_EQUALS;
967 : if (yytext[0] == '<' && yytext[1] == '>')
968 : return NOT_EQUALS;
969 : if (yytext[0] == '!' && yytext[1] == '=')
970 : return NOT_EQUALS;
971 : }
972 : }
973 :
974 : base_yylval.str = mm_strdup(yytext);
975 : return Op;
976 : }
977 :
978 : {param} {
979 : int val;
980 :
981 : errno = 0;
982 : val = strtoint(yytext + 1, NULL, 10);
983 : if (errno == ERANGE)
984 : mmfatal(PARSE_ERROR, "parameter number too large");
985 : base_yylval.ival = val;
986 : return PARAM;
987 : }
988 : {param_junk} {
989 : mmfatal(PARSE_ERROR, "trailing junk after parameter");
990 : }
991 :
992 : {ip} {
993 : base_yylval.str = mm_strdup(yytext);
994 : return IP;
995 : }
996 : } /* <SQL> */
997 :
998 : <C,SQL>{
999 : {decinteger} {
1000 : return process_integer_literal(yytext, &base_yylval, 10);
1001 : }
1002 : {hexinteger} {
1003 : return process_integer_literal(yytext, &base_yylval, 16);
1004 : }
1005 : {numeric} {
1006 : base_yylval.str = mm_strdup(yytext);
1007 : return FCONST;
1008 : }
1009 : {numericfail} {
1010 : /* throw back the .., and treat as integer */
1011 : yyless(yyleng - 2);
1012 : return process_integer_literal(yytext, &base_yylval, 10);
1013 : }
1014 : {real} {
1015 : base_yylval.str = mm_strdup(yytext);
1016 : return FCONST;
1017 : }
1018 : {realfail} {
1019 : /*
1020 : * throw back the [Ee][+-], and figure out whether what
1021 : * remains is an {decinteger} or {numeric}.
1022 : */
1023 : yyless(yyleng - 2);
1024 : return process_integer_literal(yytext, &base_yylval, 10);
1025 : }
1026 : } /* <C,SQL> */
1027 :
1028 : <SQL>{
1029 : {octinteger} {
1030 : return process_integer_literal(yytext, &base_yylval, 8);
1031 : }
1032 : {bininteger} {
1033 : return process_integer_literal(yytext, &base_yylval, 2);
1034 : }
1035 :
1036 : /*
1037 : * Note that some trailing junk is valid in C (such as 100LL), so we
1038 : * contain this to SQL mode.
1039 : */
1040 : {integer_junk} {
1041 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1042 : }
1043 : {numeric_junk} {
1044 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1045 : }
1046 : {real_junk} {
1047 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1048 : }
1049 :
1050 : :{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
1051 : base_yylval.str = mm_strdup(yytext + 1);
1052 : return CVARIABLE;
1053 : }
1054 :
1055 : {identifier} {
1056 : /* First check to see if it's a define symbol to expand */
1057 : if (!isdefine())
1058 : {
1059 : int kwvalue;
1060 :
1061 : /*
1062 : * User-defined typedefs override SQL keywords, but
1063 : * not C keywords. Currently, a typedef name is just
1064 : * reported as IDENT, but someday we might need to
1065 : * return a distinct token type.
1066 : */
1067 : if (get_typedef(yytext, true) == NULL)
1068 : {
1069 : /* Is it an SQL/ECPG keyword? */
1070 : kwvalue = ScanECPGKeywordLookup(yytext);
1071 : if (kwvalue >= 0)
1072 : return kwvalue;
1073 : }
1074 :
1075 : /* Is it a C keyword? */
1076 : kwvalue = ScanCKeywordLookup(yytext);
1077 : if (kwvalue >= 0)
1078 : return kwvalue;
1079 :
1080 : /*
1081 : * None of the above. Return it as an identifier.
1082 : *
1083 : * The backend will attempt to truncate and case-fold
1084 : * the identifier, but I see no good reason for ecpg
1085 : * to do so; that's just another way that ecpg could
1086 : * get out of step with the backend.
1087 : */
1088 : base_yylval.str = mm_strdup(yytext);
1089 : return IDENT;
1090 : }
1091 : }
1092 :
1093 : {other} {
1094 : return yytext[0];
1095 : }
1096 : } /* <SQL> */
1097 :
1098 : /*
1099 : * Begin ECPG-specific rules
1100 : */
1101 :
1102 : <C>{exec_sql} {
1103 : BEGIN(SQL);
1104 : return SQL_START;
1105 : }
1106 : <C>{informix_special} {
1107 : /* are we simulating Informix? */
1108 : if (INFORMIX_MODE)
1109 : {
1110 : BEGIN(SQL);
1111 : return SQL_START;
1112 : }
1113 : else
1114 : return S_ANYTHING;
1115 : }
1116 : <C>{ccomment} {
1117 : ECHO;
1118 : }
1119 : <C>{cppinclude} {
1120 : if (system_includes)
1121 : {
1122 : include_next = false;
1123 : BEGIN(incl);
1124 : }
1125 : else
1126 : {
1127 : base_yylval.str = mm_strdup(yytext);
1128 : return CPP_LINE;
1129 : }
1130 : }
1131 : <C>{cppinclude_next} {
1132 : if (system_includes)
1133 : {
1134 : include_next = true;
1135 : BEGIN(incl);
1136 : }
1137 : else
1138 : {
1139 : base_yylval.str = mm_strdup(yytext);
1140 : return CPP_LINE;
1141 : }
1142 : }
1143 : <C,SQL>{cppline} {
1144 : base_yylval.str = mm_strdup(yytext);
1145 : return CPP_LINE;
1146 : }
1147 : <C>{identifier} {
1148 : /*
1149 : * Try to detect a function name:
1150 : * look for identifiers at the global scope
1151 : * keep the last identifier before the first '(' and '{'
1152 : */
1153 : if (braces_open == 0 && parenths_open == 0)
1154 : {
1155 : if (current_function)
1156 : free(current_function);
1157 : current_function = mm_strdup(yytext);
1158 : }
1159 : /* Informix uses SQL defines only in SQL space */
1160 : /* however, some defines have to be taken care of for compatibility */
1161 : if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine())
1162 : {
1163 : int kwvalue;
1164 :
1165 : kwvalue = ScanCKeywordLookup(yytext);
1166 : if (kwvalue >= 0)
1167 : return kwvalue;
1168 : else
1169 : {
1170 : base_yylval.str = mm_strdup(yytext);
1171 : return IDENT;
1172 : }
1173 : }
1174 : }
1175 : <C>{xcstop} {
1176 : mmerror(PARSE_ERROR, ET_ERROR, "nested /* ... */ comments");
1177 : }
1178 : <C>":" { return ':'; }
1179 : <C>";" { return ';'; }
1180 : <C>"," { return ','; }
1181 : <C>"*" { return '*'; }
1182 : <C>"%" { return '%'; }
1183 : <C>"/" { return '/'; }
1184 : <C>"+" { return '+'; }
1185 : <C>"-" { return '-'; }
1186 : <C>"(" { parenths_open++; return '('; }
1187 : <C>")" { parenths_open--; return ')'; }
1188 : <C,xskip>{space} { ECHO; }
1189 : <C>\{ { return '{'; }
1190 : <C>\} { return '}'; }
1191 : <C>\[ { return '['; }
1192 : <C>\] { return ']'; }
1193 : <C>\= { return '='; }
1194 : <C>"->" { return S_MEMBER; }
1195 : <C>">>" { return S_RSHIFT; }
1196 : <C>"<<" { return S_LSHIFT; }
1197 : <C>"||" { return S_OR; }
1198 : <C>"&&" { return S_AND; }
1199 : <C>"++" { return S_INC; }
1200 : <C>"--" { return S_DEC; }
1201 : <C>"==" { return S_EQUAL; }
1202 : <C>"!=" { return S_NEQUAL; }
1203 : <C>"+=" { return S_ADD; }
1204 : <C>"-=" { return S_SUB; }
1205 : <C>"*=" { return S_MUL; }
1206 : <C>"/=" { return S_DIV; }
1207 : <C>"%=" { return S_MOD; }
1208 : <C>"->*" { return S_MEMPOINT; }
1209 : <C>".*" { return S_DOTPOINT; }
1210 : <C>{other} { return S_ANYTHING; }
1211 : <C>{exec_sql}{define}{space}* { BEGIN(def_ident); }
1212 : <C>{informix_special}{define}{space}* {
1213 : /* are we simulating Informix? */
1214 : if (INFORMIX_MODE)
1215 : {
1216 : BEGIN(def_ident);
1217 : }
1218 : else
1219 : {
1220 : yyless(1);
1221 : return S_ANYTHING;
1222 : }
1223 : }
1224 : <C>{exec_sql}{undef}{space}* {
1225 : BEGIN(undef);
1226 : }
1227 : <C>{informix_special}{undef}{space}* {
1228 : /* are we simulating Informix? */
1229 : if (INFORMIX_MODE)
1230 : {
1231 : BEGIN(undef);
1232 : }
1233 : else
1234 : {
1235 : yyless(1);
1236 : return S_ANYTHING;
1237 : }
1238 : }
1239 : <undef>{identifier}{space}*";" {
1240 : struct _defines *ptr,
1241 : *ptr2 = NULL;
1242 : int i;
1243 :
1244 : /*
1245 : * Skip the ";" and trailing whitespace. Note that yytext
1246 : * contains at least one non-space character plus the ";"
1247 : */
1248 : for (i = strlen(yytext) - 2;
1249 : i > 0 && ecpg_isspace(yytext[i]);
1250 : i--)
1251 : ;
1252 : yytext[i + 1] = '\0';
1253 :
1254 : /* Find and unset any matching define; should be only 1 */
1255 : for (ptr = defines; ptr; ptr2 = ptr, ptr = ptr->next)
1256 : {
1257 : if (strcmp(yytext, ptr->name) == 0)
1258 : {
1259 : free(ptr->value);
1260 : ptr->value = NULL;
1261 : /* We cannot forget it if there's a cmdvalue */
1262 : if (ptr->cmdvalue == NULL)
1263 : {
1264 : if (ptr2 == NULL)
1265 : defines = ptr->next;
1266 : else
1267 : ptr2->next = ptr->next;
1268 : free(ptr->name);
1269 : free(ptr);
1270 : }
1271 : break;
1272 : }
1273 : }
1274 :
1275 : BEGIN(C);
1276 : }
1277 : <undef>{other}|\n {
1278 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL UNDEF command");
1279 : yyterminate();
1280 : }
1281 : <C>{exec_sql}{include}{space}* {
1282 : BEGIN(incl);
1283 : }
1284 : <C>{informix_special}{include}{space}* {
1285 : /* are we simulating Informix? */
1286 : if (INFORMIX_MODE)
1287 : {
1288 : BEGIN(incl);
1289 : }
1290 : else
1291 : {
1292 : yyless(1);
1293 : return S_ANYTHING;
1294 : }
1295 : }
1296 : <C,xskip>{exec_sql}{ifdef}{space}* {
1297 : if (preproc_tos >= MAX_NESTED_IF - 1)
1298 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1299 : preproc_tos++;
1300 : stacked_if_value[preproc_tos].active = false;
1301 : stacked_if_value[preproc_tos].saw_active = false;
1302 : stacked_if_value[preproc_tos].else_branch = false;
1303 : ifcond = true;
1304 : BEGIN(xcond);
1305 : }
1306 : <C,xskip>{informix_special}{ifdef}{space}* {
1307 : /* are we simulating Informix? */
1308 : if (INFORMIX_MODE)
1309 : {
1310 : if (preproc_tos >= MAX_NESTED_IF - 1)
1311 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1312 : preproc_tos++;
1313 : stacked_if_value[preproc_tos].active = false;
1314 : stacked_if_value[preproc_tos].saw_active = false;
1315 : stacked_if_value[preproc_tos].else_branch = false;
1316 : ifcond = true;
1317 : BEGIN(xcond);
1318 : }
1319 : else
1320 : {
1321 : yyless(1);
1322 : return S_ANYTHING;
1323 : }
1324 : }
1325 : <C,xskip>{exec_sql}{ifndef}{space}* {
1326 : if (preproc_tos >= MAX_NESTED_IF - 1)
1327 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1328 : preproc_tos++;
1329 : stacked_if_value[preproc_tos].active = false;
1330 : stacked_if_value[preproc_tos].saw_active = false;
1331 : stacked_if_value[preproc_tos].else_branch = false;
1332 : ifcond = false;
1333 : BEGIN(xcond);
1334 : }
1335 : <C,xskip>{informix_special}{ifndef}{space}* {
1336 : /* are we simulating Informix? */
1337 : if (INFORMIX_MODE)
1338 : {
1339 : if (preproc_tos >= MAX_NESTED_IF - 1)
1340 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1341 : preproc_tos++;
1342 : stacked_if_value[preproc_tos].active = false;
1343 : stacked_if_value[preproc_tos].saw_active = false;
1344 : stacked_if_value[preproc_tos].else_branch = false;
1345 : ifcond = false;
1346 : BEGIN(xcond);
1347 : }
1348 : else
1349 : {
1350 : yyless(1);
1351 : return S_ANYTHING;
1352 : }
1353 : }
1354 : <C,xskip>{exec_sql}{elif}{space}* {
1355 : if (preproc_tos == 0)
1356 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1357 : if (stacked_if_value[preproc_tos].else_branch)
1358 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1359 : ifcond = true;
1360 : BEGIN(xcond);
1361 : }
1362 : <C,xskip>{informix_special}{elif}{space}* {
1363 : /* are we simulating Informix? */
1364 : if (INFORMIX_MODE)
1365 : {
1366 : if (preproc_tos == 0)
1367 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1368 : if (stacked_if_value[preproc_tos].else_branch)
1369 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1370 : ifcond = true;
1371 : BEGIN(xcond);
1372 : }
1373 : else
1374 : {
1375 : yyless(1);
1376 : return S_ANYTHING;
1377 : }
1378 : }
1379 :
1380 : <C,xskip>{exec_sql}{else}{space}*";" {
1381 : /* only exec sql endif pops the stack, so take care of duplicated 'else' */
1382 : if (preproc_tos == 0)
1383 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1384 : else if (stacked_if_value[preproc_tos].else_branch)
1385 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1386 : else
1387 : {
1388 : stacked_if_value[preproc_tos].else_branch = true;
1389 : stacked_if_value[preproc_tos].active =
1390 : (stacked_if_value[preproc_tos - 1].active &&
1391 : !stacked_if_value[preproc_tos].saw_active);
1392 : stacked_if_value[preproc_tos].saw_active = true;
1393 :
1394 : if (stacked_if_value[preproc_tos].active)
1395 : BEGIN(C);
1396 : else
1397 : BEGIN(xskip);
1398 : }
1399 : }
1400 : <C,xskip>{informix_special}{else}{space}*";" {
1401 : /* are we simulating Informix? */
1402 : if (INFORMIX_MODE)
1403 : {
1404 : if (preproc_tos == 0)
1405 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1406 : else if (stacked_if_value[preproc_tos].else_branch)
1407 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1408 : else
1409 : {
1410 : stacked_if_value[preproc_tos].else_branch = true;
1411 : stacked_if_value[preproc_tos].active =
1412 : (stacked_if_value[preproc_tos - 1].active &&
1413 : !stacked_if_value[preproc_tos].saw_active);
1414 : stacked_if_value[preproc_tos].saw_active = true;
1415 :
1416 : if (stacked_if_value[preproc_tos].active)
1417 : BEGIN(C);
1418 : else
1419 : BEGIN(xskip);
1420 : }
1421 : }
1422 : else
1423 : {
1424 : yyless(1);
1425 : return S_ANYTHING;
1426 : }
1427 : }
1428 : <C,xskip>{exec_sql}{endif}{space}*";" {
1429 : if (preproc_tos == 0)
1430 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1431 : else
1432 : preproc_tos--;
1433 :
1434 : if (stacked_if_value[preproc_tos].active)
1435 : BEGIN(C);
1436 : else
1437 : BEGIN(xskip);
1438 : }
1439 : <C,xskip>{informix_special}{endif}{space}*";" {
1440 : /* are we simulating Informix? */
1441 : if (INFORMIX_MODE)
1442 : {
1443 : if (preproc_tos == 0)
1444 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1445 : else
1446 : preproc_tos--;
1447 :
1448 : if (stacked_if_value[preproc_tos].active)
1449 : BEGIN(C);
1450 : else
1451 : BEGIN(xskip);
1452 : }
1453 : else
1454 : {
1455 : yyless(1);
1456 : return S_ANYTHING;
1457 : }
1458 : }
1459 :
1460 : <xskip>{other} { /* ignore */ }
1461 :
1462 : <xcond>{identifier}{space}*";" {
1463 : {
1464 : struct _defines *defptr;
1465 : unsigned int i;
1466 : bool this_active;
1467 :
1468 : /*
1469 : * Skip the ";" and trailing whitespace. Note that
1470 : * yytext contains at least one non-space character
1471 : * plus the ";"
1472 : */
1473 : for (i = strlen(yytext) - 2;
1474 : i > 0 && ecpg_isspace(yytext[i]);
1475 : i--)
1476 : /* skip */ ;
1477 : yytext[i + 1] = '\0';
1478 :
1479 : /* Does a definition exist? */
1480 : for (defptr = defines; defptr; defptr = defptr->next)
1481 : {
1482 : if (strcmp(yytext, defptr->name) == 0)
1483 : {
1484 : /* Found it, but is it currently undefined? */
1485 : if (defptr->value == NULL)
1486 : defptr = NULL; /* pretend it's not found */
1487 : break;
1488 : }
1489 : }
1490 :
1491 : this_active = (defptr ? ifcond : !ifcond);
1492 : stacked_if_value[preproc_tos].active =
1493 : (stacked_if_value[preproc_tos - 1].active &&
1494 : !stacked_if_value[preproc_tos].saw_active &&
1495 : this_active);
1496 : stacked_if_value[preproc_tos].saw_active |= this_active;
1497 : }
1498 :
1499 : if (stacked_if_value[preproc_tos].active)
1500 : BEGIN(C);
1501 : else
1502 : BEGIN(xskip);
1503 : }
1504 :
1505 : <xcond>{other}|\n {
1506 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL IFDEF command");
1507 : yyterminate();
1508 : }
1509 : <def_ident>{identifier} {
1510 : newdefsymbol = mm_strdup(yytext);
1511 : BEGIN(def);
1512 : startlit();
1513 : }
1514 : <def_ident>{other}|\n {
1515 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL DEFINE command");
1516 : yyterminate();
1517 : }
1518 : <def>{space}*";" {
1519 : struct _defines *ptr;
1520 :
1521 : /* Does it already exist? */
1522 : for (ptr = defines; ptr != NULL; ptr = ptr->next)
1523 : {
1524 : if (strcmp(newdefsymbol, ptr->name) == 0)
1525 : {
1526 : free(ptr->value);
1527 : ptr->value = mm_strdup(literalbuf);
1528 : /* Don't leak newdefsymbol */
1529 : free(newdefsymbol);
1530 : break;
1531 : }
1532 : }
1533 : if (ptr == NULL)
1534 : {
1535 : /* Not present, make a new entry */
1536 : ptr = (struct _defines *) mm_alloc(sizeof(struct _defines));
1537 :
1538 : ptr->name = newdefsymbol;
1539 : ptr->value = mm_strdup(literalbuf);
1540 : ptr->cmdvalue = NULL;
1541 : ptr->used = NULL;
1542 : ptr->next = defines;
1543 : defines = ptr;
1544 : }
1545 :
1546 : BEGIN(C);
1547 : }
1548 : <def>[^;] { addlit(yytext, yyleng); }
1549 : <incl>\<[^\>]+\>{space}*";"? { parse_include(); }
1550 : <incl>{dquote}{xdinside}{dquote}{space}*";"? { parse_include(); }
1551 : <incl>[^;\<\>\"]+";" { parse_include(); }
1552 : <incl>{other}|\n {
1553 : mmfatal(PARSE_ERROR, "syntax error in EXEC SQL INCLUDE command");
1554 : yyterminate();
1555 : }
1556 :
1557 : <<EOF>> {
1558 : if (yy_buffer == NULL)
1559 : {
1560 : /* No more input */
1561 : if (preproc_tos > 0)
1562 : {
1563 : preproc_tos = 0;
1564 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1565 : }
1566 : yyterminate();
1567 : }
1568 : else
1569 : {
1570 : /* Revert to previous input source */
1571 : struct _yy_buffer *yb = yy_buffer;
1572 : int i;
1573 : struct _defines *ptr;
1574 :
1575 : /* Check to see if we are exiting a macro value */
1576 : for (ptr = defines; ptr; ptr = ptr->next)
1577 : {
1578 : if (ptr->used == yy_buffer)
1579 : {
1580 : ptr->used = NULL;
1581 : break; /* there can't be multiple matches */
1582 : }
1583 : }
1584 :
1585 : if (yyin != NULL)
1586 : fclose(yyin);
1587 :
1588 : yy_delete_buffer(YY_CURRENT_BUFFER);
1589 : yy_switch_to_buffer(yy_buffer->buffer);
1590 :
1591 : yylineno = yy_buffer->lineno;
1592 :
1593 : /* We have to output the filename only if we change files here */
1594 : i = strcmp(input_filename, yy_buffer->filename);
1595 :
1596 : free(input_filename);
1597 : input_filename = yy_buffer->filename;
1598 :
1599 : yy_buffer = yy_buffer->next;
1600 : free(yb);
1601 :
1602 : if (i != 0)
1603 : output_line_number();
1604 : }
1605 : }
1606 :
1607 : <INITIAL>{other}|\n {
1608 : mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <%s>", PACKAGE_BUGREPORT);
1609 : }
1610 :
1611 : %%
1612 :
1613 : /* LCOV_EXCL_STOP */
1614 :
1615 : void
1616 : lex_init(void)
1617 134 : {
1618 : braces_open = 0;
1619 134 : parenths_open = 0;
1620 134 : current_function = NULL;
1621 134 :
1622 : yylineno = 1;
1623 134 :
1624 : /* initialize state for if/else/endif */
1625 : preproc_tos = 0;
1626 134 : stacked_if_value[preproc_tos].active = true;
1627 134 : stacked_if_value[preproc_tos].saw_active = true;
1628 134 : stacked_if_value[preproc_tos].else_branch = false;
1629 134 :
1630 : /* initialize literal buffer to a reasonable but expansible size */
1631 : if (literalbuf == NULL)
1632 134 : {
1633 : literalalloc = 1024;
1634 132 : literalbuf = (char *) mm_alloc(literalalloc);
1635 132 : }
1636 : startlit();
1637 134 :
1638 : BEGIN(C);
1639 134 : }
1640 134 :
1641 : static void
1642 : addlit(char *ytext, int yleng)
1643 48148 : {
1644 : /* enlarge buffer if needed */
1645 : if ((literallen + yleng) >= literalalloc)
1646 48148 : {
1647 : do
1648 : literalalloc *= 2;
1649 0 : while ((literallen + yleng) >= literalalloc);
1650 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1651 0 : }
1652 : /* append new data, add trailing null */
1653 : memcpy(literalbuf + literallen, ytext, yleng);
1654 48148 : literallen += yleng;
1655 48148 : literalbuf[literallen] = '\0';
1656 48148 : }
1657 48148 :
1658 : static void
1659 : addlitchar(unsigned char ychar)
1660 0 : {
1661 : /* enlarge buffer if needed */
1662 : if ((literallen + 1) >= literalalloc)
1663 0 : {
1664 : literalalloc *= 2;
1665 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1666 0 : }
1667 : /* append new data, add trailing null */
1668 : literalbuf[literallen] = ychar;
1669 0 : literallen += 1;
1670 0 : literalbuf[literallen] = '\0';
1671 0 : }
1672 0 :
1673 : /*
1674 : * Process {decinteger}, {hexinteger}, etc. Note this will also do the right
1675 : * thing with {numeric}, ie digits and a decimal point.
1676 : */
1677 : static int
1678 : process_integer_literal(const char *token, YYSTYPE *lval, int base)
1679 2388 : {
1680 : int val;
1681 : char *endptr;
1682 :
1683 : errno = 0;
1684 2388 : val = strtoint(base == 10 ? token : token + 2, &endptr, base);
1685 2388 : if (*endptr != '\0' || errno == ERANGE)
1686 2388 : {
1687 : /* integer too large (or contains decimal pt), treat it as a float */
1688 : lval->str = mm_strdup(token);
1689 12 : return FCONST;
1690 12 : }
1691 : lval->ival = val;
1692 2376 : return ICONST;
1693 2376 : }
1694 :
1695 : static void
1696 : parse_include(void)
1697 176 : {
1698 : /* got the include file name */
1699 : struct _yy_buffer *yb;
1700 : struct _include_path *ip;
1701 : char inc_file[MAXPGPATH];
1702 : unsigned int i;
1703 :
1704 : yb = mm_alloc(sizeof(struct _yy_buffer));
1705 176 :
1706 : yb->buffer = YY_CURRENT_BUFFER;
1707 176 : yb->lineno = yylineno;
1708 176 : yb->filename = input_filename;
1709 176 : yb->next = yy_buffer;
1710 176 :
1711 : yy_buffer = yb;
1712 176 :
1713 : /*
1714 : * skip the ";" if there is one and trailing whitespace. Note that yytext
1715 : * contains at least one non-space character plus the ";"
1716 : */
1717 : for (i = strlen(yytext) - 2;
1718 178 : i > 0 && ecpg_isspace(yytext[i]);
1719 178 : i--)
1720 2 : ;
1721 :
1722 : if (yytext[i] == ';')
1723 176 : i--;
1724 0 :
1725 : yytext[i + 1] = '\0';
1726 176 :
1727 : yyin = NULL;
1728 176 :
1729 : /* If file name is enclosed in '"' remove these and look only in '.' */
1730 :
1731 : /*
1732 : * Informix does look into all include paths though, except filename
1733 : * starts with '/'
1734 : */
1735 : if (yytext[0] == '"' && yytext[i] == '"' &&
1736 176 : ((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/'))
1737 0 : {
1738 : yytext[i] = '\0';
1739 0 : memmove(yytext, yytext + 1, strlen(yytext));
1740 0 :
1741 : strlcpy(inc_file, yytext, sizeof(inc_file));
1742 0 : yyin = fopen(inc_file, "r");
1743 0 : if (!yyin)
1744 0 : {
1745 : if (strlen(inc_file) <= 2 || strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1746 0 : {
1747 : strcat(inc_file, ".h");
1748 0 : yyin = fopen(inc_file, "r");
1749 0 : }
1750 : }
1751 :
1752 : }
1753 : else
1754 : {
1755 : if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>'))
1756 176 : {
1757 : yytext[i] = '\0';
1758 4 : memmove(yytext, yytext + 1, strlen(yytext));
1759 4 : }
1760 :
1761 : for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
1762 478 : {
1763 : if (strlen(ip->path) + strlen(yytext) + 4 > MAXPGPATH)
1764 302 : {
1765 : fprintf(stderr, _("Error: include path \"%s/%s\" is too long on line %d, skipping\n"), ip->path, yytext, yylineno);
1766 0 : continue;
1767 0 : }
1768 : snprintf(inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext);
1769 302 : yyin = fopen(inc_file, "r");
1770 302 : if (!yyin)
1771 302 : {
1772 : if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1773 278 : {
1774 : strcat(inc_file, ".h");
1775 272 : yyin = fopen(inc_file, "r");
1776 272 : }
1777 : }
1778 :
1779 : /*
1780 : * if the command was "include_next" we have to disregard the
1781 : * first hit
1782 : */
1783 : if (yyin && include_next)
1784 302 : {
1785 : fclose(yyin);
1786 0 : yyin = NULL;
1787 0 : include_next = false;
1788 0 : }
1789 : }
1790 : }
1791 : if (!yyin)
1792 176 : mmfatal(NO_INCLUDE_FILE, "could not open include file \"%s\" on line %d", yytext, yylineno);
1793 0 :
1794 : input_filename = mm_strdup(inc_file);
1795 176 : yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1796 176 : yylineno = 1;
1797 176 : output_line_number();
1798 176 :
1799 : BEGIN(C);
1800 176 : }
1801 176 :
1802 : /*
1803 : * ecpg_isspace() --- return true if flex scanner considers char whitespace
1804 : */
1805 : static bool
1806 : ecpg_isspace(char ch)
1807 210 : {
1808 : if (ch == ' ' ||
1809 210 : ch == '\t' ||
1810 210 : ch == '\n' ||
1811 208 : ch == '\r' ||
1812 208 : ch == '\f' ||
1813 208 : ch == '\v')
1814 : return true;
1815 2 : return false;
1816 208 : }
1817 :
1818 : /*
1819 : * If yytext matches a define symbol, begin scanning the symbol's value
1820 : * and return true
1821 : */
1822 : static bool
1823 : isdefine(void)
1824 28194 : {
1825 : struct _defines *ptr;
1826 :
1827 : /* is it a define? */
1828 : for (ptr = defines; ptr; ptr = ptr->next)
1829 128622 : {
1830 : /* notice we do not match anything being actively expanded */
1831 : if (strcmp(yytext, ptr->name) == 0 &&
1832 100596 : ptr->value != NULL &&
1833 168 : ptr->used == NULL)
1834 168 : {
1835 : /* Save state associated with the current buffer */
1836 : struct _yy_buffer *yb;
1837 :
1838 : yb = mm_alloc(sizeof(struct _yy_buffer));
1839 168 :
1840 : yb->buffer = YY_CURRENT_BUFFER;
1841 168 : yb->lineno = yylineno;
1842 168 : yb->filename = mm_strdup(input_filename);
1843 168 : yb->next = yy_buffer;
1844 168 : yy_buffer = yb;
1845 168 :
1846 : /* Mark symbol as being actively expanded */
1847 : ptr->used = yb;
1848 168 :
1849 : /*
1850 : * We use yy_scan_string which will copy the value, so there's
1851 : * no need to worry about a possible undef happening while we
1852 : * are still scanning it.
1853 : */
1854 : yy_scan_string(ptr->value);
1855 168 : return true;
1856 168 : }
1857 : }
1858 :
1859 : return false;
1860 28026 : }
1861 :
1862 : /*
1863 : * Handle replacement of INFORMIX built-in defines. This works just
1864 : * like isdefine() except for the source of the string to scan.
1865 : */
1866 : static bool
1867 : isinformixdefine(void)
1868 3518 : {
1869 : const char *new = NULL;
1870 3518 :
1871 : if (strcmp(yytext, "dec_t") == 0)
1872 3518 : new = "decimal";
1873 2 : else if (strcmp(yytext, "intrvl_t") == 0)
1874 3516 : new = "interval";
1875 0 : else if (strcmp(yytext, "dtime_t") == 0)
1876 3516 : new = "timestamp";
1877 0 :
1878 : if (new)
1879 3518 : {
1880 : struct _yy_buffer *yb;
1881 :
1882 : yb = mm_alloc(sizeof(struct _yy_buffer));
1883 2 :
1884 : yb->buffer = YY_CURRENT_BUFFER;
1885 2 : yb->lineno = yylineno;
1886 2 : yb->filename = mm_strdup(input_filename);
1887 2 : yb->next = yy_buffer;
1888 2 : yy_buffer = yb;
1889 2 :
1890 : yy_scan_string(new);
1891 2 : return true;
1892 2 : }
1893 :
1894 : return false;
1895 3516 : }
|