LCOV - code coverage report
Current view: top level - src/test/modules/test_regex - test_regex.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 86.3 % 307 265
Test Date: 2026-03-03 14:15:12 Functions: 100.0 % 9 9
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*--------------------------------------------------------------------------
       2              :  *
       3              :  * test_regex.c
       4              :  *      Test harness for the regular expression package.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *      src/test/modules/test_regex/test_regex.c
      11              :  *
      12              :  * -------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "funcapi.h"
      18              : #include "regex/regex.h"
      19              : #include "utils/array.h"
      20              : #include "utils/builtins.h"
      21              : 
      22            2 : PG_MODULE_MAGIC;
      23              : 
      24              : 
      25              : /* all the options of interest for regex functions */
      26              : typedef struct test_re_flags
      27              : {
      28              :     int         cflags;         /* compile flags for Spencer's regex code */
      29              :     int         eflags;         /* execute flags for Spencer's regex code */
      30              :     long        info;           /* expected re_info bits */
      31              :     bool        glob;           /* do it globally (for each occurrence) */
      32              :     bool        indices;        /* report indices not actual strings */
      33              :     bool        partial;        /* expect partial match */
      34              : } test_re_flags;
      35              : 
      36              : /* cross-call state for test_regex() */
      37              : typedef struct test_regex_ctx
      38              : {
      39              :     test_re_flags re_flags;     /* flags */
      40              :     rm_detail_t details;        /* "details" from execution */
      41              :     text       *orig_str;       /* data string in original TEXT form */
      42              :     int         nmatches;       /* number of places where pattern matched */
      43              :     int         npatterns;      /* number of capturing subpatterns */
      44              :     /* We store start char index and end+1 char index for each match */
      45              :     /* so the number of entries in match_locs is nmatches * npatterns * 2 */
      46              :     int        *match_locs;     /* 0-based character indexes */
      47              :     int         next_match;     /* 0-based index of next match to process */
      48              :     /* workspace for build_test_match_result() */
      49              :     Datum      *elems;          /* has npatterns+1 elements */
      50              :     bool       *nulls;          /* has npatterns+1 elements */
      51              :     pg_wchar   *wide_str;       /* wide-char version of original string */
      52              :     char       *conv_buf;       /* conversion buffer, if needed */
      53              :     int         conv_bufsiz;    /* size thereof */
      54              : } test_regex_ctx;
      55              : 
      56              : /* Local functions */
      57              : static void test_re_compile(text *text_re, int cflags, Oid collation,
      58              :                             regex_t *result_re);
      59              : static void parse_test_flags(test_re_flags *flags, text *opts);
      60              : static test_regex_ctx *setup_test_matches(text *orig_str,
      61              :                                           regex_t *cpattern,
      62              :                                           test_re_flags *re_flags,
      63              :                                           Oid collation,
      64              :                                           bool use_subpatterns);
      65              : static ArrayType *build_test_info_result(regex_t *cpattern,
      66              :                                          test_re_flags *flags);
      67              : static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
      68              : 
      69              : 
      70              : /*
      71              :  * test_regex(pattern text, string text, flags text) returns setof text[]
      72              :  *
      73              :  * This is largely based on regexp.c's regexp_matches, with additions
      74              :  * for debugging purposes.
      75              :  */
      76            3 : PG_FUNCTION_INFO_V1(test_regex);
      77              : 
      78              : Datum
      79         1767 : test_regex(PG_FUNCTION_ARGS)
      80              : {
      81              :     FuncCallContext *funcctx;
      82              :     test_regex_ctx *matchctx;
      83              :     ArrayType  *result_ary;
      84              : 
      85         1767 :     if (SRF_IS_FIRSTCALL())
      86              :     {
      87          696 :         text       *pattern = PG_GETARG_TEXT_PP(0);
      88          696 :         text       *flags = PG_GETARG_TEXT_PP(2);
      89          696 :         Oid         collation = PG_GET_COLLATION();
      90              :         test_re_flags re_flags;
      91              :         regex_t     cpattern;
      92              :         MemoryContext oldcontext;
      93              : 
      94          696 :         funcctx = SRF_FIRSTCALL_INIT();
      95          696 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
      96              : 
      97              :         /* Determine options */
      98          696 :         parse_test_flags(&re_flags, flags);
      99              : 
     100              :         /* set up the compiled pattern */
     101          696 :         test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
     102              : 
     103              :         /* be sure to copy the input string into the multi-call ctx */
     104          590 :         matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
     105              :                                       &re_flags,
     106              :                                       collation,
     107              :                                       true);
     108              : 
     109              :         /* Pre-create workspace that build_test_match_result needs */
     110          590 :         matchctx->elems = palloc_array(Datum, matchctx->npatterns + 1);
     111          590 :         matchctx->nulls = palloc_array(bool, matchctx->npatterns + 1);
     112              : 
     113          590 :         MemoryContextSwitchTo(oldcontext);
     114          590 :         funcctx->user_fctx = matchctx;
     115              : 
     116              :         /*
     117              :          * Return the first result row, which is info equivalent to Tcl's
     118              :          * "regexp -about" output
     119              :          */
     120          590 :         result_ary = build_test_info_result(&cpattern, &re_flags);
     121              : 
     122          590 :         pg_regfree(&cpattern);
     123              : 
     124          590 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     125              :     }
     126              :     else
     127              :     {
     128              :         /* Each subsequent row describes one match */
     129         1071 :         funcctx = SRF_PERCALL_SETUP();
     130         1071 :         matchctx = (test_regex_ctx *) funcctx->user_fctx;
     131              : 
     132         1071 :         if (matchctx->next_match < matchctx->nmatches)
     133              :         {
     134          481 :             result_ary = build_test_match_result(matchctx);
     135          481 :             matchctx->next_match++;
     136          481 :             SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     137              :         }
     138              :     }
     139              : 
     140          590 :     SRF_RETURN_DONE(funcctx);
     141              : }
     142              : 
     143              : 
     144              : /*
     145              :  * test_re_compile - compile a RE
     146              :  *
     147              :  *  text_re --- the pattern, expressed as a TEXT object
     148              :  *  cflags --- compile options for the pattern
     149              :  *  collation --- collation to use for LC_CTYPE-dependent behavior
     150              :  *  result_re --- output, compiled RE is stored here
     151              :  *
     152              :  * Pattern is given in the database encoding.  We internally convert to
     153              :  * an array of pg_wchar, which is what Spencer's regex package wants.
     154              :  *
     155              :  * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
     156              :  */
     157              : static void
     158          696 : test_re_compile(text *text_re, int cflags, Oid collation,
     159              :                 regex_t *result_re)
     160              : {
     161          696 :     int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
     162          696 :     char       *text_re_val = VARDATA_ANY(text_re);
     163              :     pg_wchar   *pattern;
     164              :     int         pattern_len;
     165              :     int         regcomp_result;
     166              :     char        errMsg[100];
     167              : 
     168              :     /* Convert pattern string to wide characters */
     169          696 :     pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
     170          696 :     pattern_len = pg_mb2wchar_with_len(text_re_val,
     171              :                                        pattern,
     172              :                                        text_re_len);
     173              : 
     174          696 :     regcomp_result = pg_regcomp(result_re,
     175              :                                 pattern,
     176              :                                 pattern_len,
     177              :                                 cflags,
     178              :                                 collation);
     179              : 
     180          696 :     pfree(pattern);
     181              : 
     182          696 :     if (regcomp_result != REG_OKAY)
     183              :     {
     184              :         /* re didn't compile (no need for pg_regfree, if so) */
     185          106 :         pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
     186          106 :         ereport(ERROR,
     187              :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     188              :                  errmsg("invalid regular expression: %s", errMsg)));
     189              :     }
     190          590 : }
     191              : 
     192              : /*
     193              :  * test_re_execute - execute a RE on pg_wchar data
     194              :  *
     195              :  * Returns true on match, false on no match
     196              :  * Arguments are as for pg_regexec
     197              :  */
     198              : static bool
     199          590 : test_re_execute(regex_t *re, pg_wchar *data, int data_len,
     200              :                 int start_search,
     201              :                 rm_detail_t *details,
     202              :                 int nmatch, regmatch_t *pmatch,
     203              :                 int eflags)
     204              : {
     205              :     int         regexec_result;
     206              :     char        errMsg[100];
     207              : 
     208              :     /* Initialize match locations in case engine doesn't */
     209          590 :     details->rm_extend.rm_so = -1;
     210          590 :     details->rm_extend.rm_eo = -1;
     211         1466 :     for (int i = 0; i < nmatch; i++)
     212              :     {
     213          876 :         pmatch[i].rm_so = -1;
     214          876 :         pmatch[i].rm_eo = -1;
     215              :     }
     216              : 
     217              :     /* Perform RE match and return result */
     218          590 :     regexec_result = pg_regexec(re,
     219              :                                 data,
     220              :                                 data_len,
     221              :                                 start_search,
     222              :                                 details,
     223              :                                 nmatch,
     224              :                                 pmatch,
     225              :                                 eflags);
     226              : 
     227          590 :     if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
     228              :     {
     229              :         /* re failed??? */
     230            0 :         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
     231            0 :         ereport(ERROR,
     232              :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     233              :                  errmsg("regular expression failed: %s", errMsg)));
     234              :     }
     235              : 
     236          590 :     return (regexec_result == REG_OKAY);
     237              : }
     238              : 
     239              : 
     240              : /*
     241              :  * parse_test_flags - parse the flags argument
     242              :  *
     243              :  *  flags --- output argument, filled with desired options
     244              :  *  opts --- TEXT object, or NULL for defaults
     245              :  */
     246              : static void
     247          696 : parse_test_flags(test_re_flags *flags, text *opts)
     248              : {
     249              :     /* these defaults must match Tcl's */
     250          696 :     int         cflags = REG_ADVANCED;
     251          696 :     int         eflags = 0;
     252          696 :     long        info = 0;
     253              : 
     254          696 :     flags->glob = false;
     255          696 :     flags->indices = false;
     256          696 :     flags->partial = false;
     257              : 
     258          696 :     if (opts)
     259              :     {
     260          696 :         char       *opt_p = VARDATA_ANY(opts);
     261          696 :         int         opt_len = VARSIZE_ANY_EXHDR(opts);
     262              :         int         i;
     263              : 
     264         1891 :         for (i = 0; i < opt_len; i++)
     265              :         {
     266         1195 :             switch (opt_p[i])
     267              :             {
     268           78 :                 case '-':
     269              :                     /* allowed, no-op */
     270           78 :                     break;
     271            7 :                 case '!':
     272            7 :                     flags->partial = true;
     273            7 :                     break;
     274            1 :                 case '*':
     275              :                     /* test requires Unicode --- ignored here */
     276            1 :                     break;
     277           53 :                 case '0':
     278           53 :                     flags->indices = true;
     279           53 :                     break;
     280              : 
     281              :                     /* These flags correspond to user-exposed RE options: */
     282            0 :                 case 'g':       /* global match */
     283            0 :                     flags->glob = true;
     284            0 :                     break;
     285           20 :                 case 'i':       /* case insensitive */
     286           20 :                     cflags |= REG_ICASE;
     287           20 :                     break;
     288           35 :                 case 'n':       /* \n affects ^ $ . [^ */
     289           35 :                     cflags |= REG_NEWLINE;
     290           35 :                     break;
     291            2 :                 case 'p':       /* ~Perl, \n affects . [^ */
     292            2 :                     cflags |= REG_NLSTOP;
     293            2 :                     cflags &= ~REG_NLANCH;
     294            2 :                     break;
     295            2 :                 case 'w':       /* weird, \n affects ^ $ only */
     296            2 :                     cflags &= ~REG_NLSTOP;
     297            2 :                     cflags |= REG_NLANCH;
     298            2 :                     break;
     299           14 :                 case 'x':       /* expanded syntax */
     300           14 :                     cflags |= REG_EXPANDED;
     301           14 :                     break;
     302              : 
     303              :                     /* These flags correspond to Tcl's -xflags options: */
     304            2 :                 case 'a':
     305            2 :                     cflags |= REG_ADVF;
     306            2 :                     break;
     307          131 :                 case 'b':
     308          131 :                     cflags &= ~REG_ADVANCED;
     309          131 :                     break;
     310           11 :                 case 'c':
     311              : 
     312              :                     /*
     313              :                      * Tcl calls this TCL_REG_CANMATCH, but it's really
     314              :                      * REG_EXPECT.  In this implementation we must also set
     315              :                      * the partial and indices flags, so that
     316              :                      * setup_test_matches and build_test_match_result will
     317              :                      * emit the desired data.  (They'll emit more fields than
     318              :                      * Tcl would, but that's fine.)
     319              :                      */
     320           11 :                     cflags |= REG_EXPECT;
     321           11 :                     flags->partial = true;
     322           11 :                     flags->indices = true;
     323           11 :                     break;
     324           10 :                 case 'e':
     325           10 :                     cflags &= ~REG_ADVANCED;
     326           10 :                     cflags |= REG_EXTENDED;
     327           10 :                     break;
     328            6 :                 case 'q':
     329            6 :                     cflags &= ~REG_ADVANCED;
     330            6 :                     cflags |= REG_QUOTE;
     331            6 :                     break;
     332            2 :                 case 'o':       /* o for opaque */
     333            2 :                     cflags |= REG_NOSUB;
     334            2 :                     break;
     335            2 :                 case 's':       /* s for start */
     336            2 :                     cflags |= REG_BOSONLY;
     337            2 :                     break;
     338            6 :                 case '+':
     339            6 :                     cflags |= REG_FAKE;
     340            6 :                     break;
     341            0 :                 case ',':
     342            0 :                     cflags |= REG_PROGRESS;
     343            0 :                     break;
     344            0 :                 case '.':
     345            0 :                     cflags |= REG_DUMP;
     346            0 :                     break;
     347            0 :                 case ':':
     348            0 :                     eflags |= REG_MTRACE;
     349            0 :                     break;
     350            0 :                 case ';':
     351            0 :                     eflags |= REG_FTRACE;
     352            0 :                     break;
     353            6 :                 case '^':
     354            6 :                     eflags |= REG_NOTBOL;
     355            6 :                     break;
     356            4 :                 case '$':
     357            4 :                     eflags |= REG_NOTEOL;
     358            4 :                     break;
     359           17 :                 case 't':
     360           17 :                     cflags |= REG_EXPECT;
     361           17 :                     break;
     362            5 :                 case '%':
     363            5 :                     eflags |= REG_SMALL;
     364            5 :                     break;
     365              : 
     366              :                     /* These flags define expected info bits: */
     367            5 :                 case 'A':
     368            5 :                     info |= REG_UBSALNUM;
     369            5 :                     break;
     370            4 :                 case 'B':
     371            4 :                     info |= REG_UBRACES;
     372            4 :                     break;
     373           42 :                 case 'E':
     374           42 :                     info |= REG_UBBS;
     375           42 :                     break;
     376           34 :                 case 'H':
     377           34 :                     info |= REG_ULOOKAROUND;
     378           34 :                     break;
     379           11 :                 case 'I':
     380           11 :                     info |= REG_UIMPOSSIBLE;
     381           11 :                     break;
     382          164 :                 case 'L':
     383          164 :                     info |= REG_ULOCALE;
     384          164 :                     break;
     385           43 :                 case 'M':
     386           43 :                     info |= REG_UUNPORT;
     387           43 :                     break;
     388           47 :                 case 'N':
     389           47 :                     info |= REG_UEMPTYMATCH;
     390           47 :                     break;
     391          307 :                 case 'P':
     392          307 :                     info |= REG_UNONPOSIX;
     393          307 :                     break;
     394           36 :                 case 'Q':
     395           36 :                     info |= REG_UBOUNDS;
     396           36 :                     break;
     397           42 :                 case 'R':
     398           42 :                     info |= REG_UBACKREF;
     399           42 :                     break;
     400           25 :                 case 'S':
     401           25 :                     info |= REG_UUNSPEC;
     402           25 :                     break;
     403           20 :                 case 'T':
     404           20 :                     info |= REG_USHORTEST;
     405           20 :                     break;
     406            1 :                 case 'U':
     407            1 :                     info |= REG_UPBOTCH;
     408            1 :                     break;
     409              : 
     410            0 :                 default:
     411            0 :                     ereport(ERROR,
     412              :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     413              :                              errmsg("invalid regular expression test option: \"%.*s\"",
     414              :                                     pg_mblen_range(opt_p + i, opt_p + opt_len),
     415              :                                     opt_p + i)));
     416              :                     break;
     417              :             }
     418              :         }
     419              :     }
     420          696 :     flags->cflags = cflags;
     421          696 :     flags->eflags = eflags;
     422          696 :     flags->info = info;
     423          696 : }
     424              : 
     425              : /*
     426              :  * setup_test_matches --- do the initial matching
     427              :  *
     428              :  * To simplify memory management, we do all the matching in one swoop.
     429              :  * The returned test_regex_ctx contains the locations of all the substrings
     430              :  * matching the pattern.
     431              :  */
     432              : static test_regex_ctx *
     433          590 : setup_test_matches(text *orig_str,
     434              :                    regex_t *cpattern, test_re_flags *re_flags,
     435              :                    Oid collation,
     436              :                    bool use_subpatterns)
     437              : {
     438          590 :     test_regex_ctx *matchctx = palloc0_object(test_regex_ctx);
     439          590 :     int         eml = pg_database_encoding_max_length();
     440              :     int         orig_len;
     441              :     pg_wchar   *wide_str;
     442              :     int         wide_len;
     443              :     regmatch_t *pmatch;
     444              :     int         pmatch_len;
     445              :     int         array_len;
     446              :     int         array_idx;
     447              :     int         prev_match_end;
     448              :     int         start_search;
     449          590 :     int         maxlen = 0;     /* largest fetch length in characters */
     450              : 
     451              :     /* save flags */
     452          590 :     matchctx->re_flags = *re_flags;
     453              : 
     454              :     /* save original string --- we'll extract result substrings from it */
     455          590 :     matchctx->orig_str = orig_str;
     456              : 
     457              :     /* convert string to pg_wchar form for matching */
     458          590 :     orig_len = VARSIZE_ANY_EXHDR(orig_str);
     459          590 :     wide_str = palloc_array(pg_wchar, orig_len + 1);
     460          590 :     wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
     461              : 
     462              :     /* do we want to remember subpatterns? */
     463          590 :     if (use_subpatterns && cpattern->re_nsub > 0)
     464              :     {
     465          127 :         matchctx->npatterns = cpattern->re_nsub + 1;
     466          127 :         pmatch_len = cpattern->re_nsub + 1;
     467              :     }
     468              :     else
     469              :     {
     470          463 :         use_subpatterns = false;
     471          463 :         matchctx->npatterns = 1;
     472          463 :         pmatch_len = 1;
     473              :     }
     474              : 
     475              :     /* temporary output space for RE package */
     476          590 :     pmatch = palloc_array(regmatch_t, pmatch_len);
     477              : 
     478              :     /*
     479              :      * the real output space (grown dynamically if needed)
     480              :      *
     481              :      * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
     482              :      * than at 2^27
     483              :      */
     484          590 :     array_len = re_flags->glob ? 255 : 31;
     485          590 :     matchctx->match_locs = palloc_array(int, array_len);
     486          590 :     array_idx = 0;
     487              : 
     488              :     /* search for the pattern, perhaps repeatedly */
     489          590 :     prev_match_end = 0;
     490          590 :     start_search = 0;
     491          590 :     while (test_re_execute(cpattern, wide_str, wide_len,
     492              :                            start_search,
     493              :                            &matchctx->details,
     494              :                            pmatch_len, pmatch,
     495              :                            re_flags->eflags))
     496              :     {
     497              :         /* enlarge output space if needed */
     498          463 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     499              :         {
     500            0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     501            0 :             if (array_len > MaxAllocSize / sizeof(int))
     502            0 :                 ereport(ERROR,
     503              :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     504              :                          errmsg("too many regular expression matches")));
     505            0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     506              :                                                     sizeof(int) * array_len);
     507              :         }
     508              : 
     509              :         /* save this match's locations */
     510         1094 :         for (int i = 0; i < matchctx->npatterns; i++)
     511              :         {
     512          631 :             int         so = pmatch[i].rm_so;
     513          631 :             int         eo = pmatch[i].rm_eo;
     514              : 
     515          631 :             matchctx->match_locs[array_idx++] = so;
     516          631 :             matchctx->match_locs[array_idx++] = eo;
     517          631 :             if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
     518          438 :                 maxlen = (eo - so);
     519              :         }
     520          463 :         matchctx->nmatches++;
     521          463 :         prev_match_end = pmatch[0].rm_eo;
     522              : 
     523              :         /* if not glob, stop after one match */
     524          463 :         if (!re_flags->glob)
     525          463 :             break;
     526              : 
     527              :         /*
     528              :          * Advance search position.  Normally we start the next search at the
     529              :          * end of the previous match; but if the match was of zero length, we
     530              :          * have to advance by one character, or we'd just find the same match
     531              :          * again.
     532              :          */
     533            0 :         start_search = prev_match_end;
     534            0 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
     535            0 :             start_search++;
     536            0 :         if (start_search > wide_len)
     537            0 :             break;
     538              :     }
     539              : 
     540              :     /*
     541              :      * If we had no match, but "partial" and "indices" are set, emit the
     542              :      * details.
     543              :      */
     544          590 :     if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
     545              :     {
     546              :         /* enlarge output space if needed */
     547           18 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     548              :         {
     549            0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     550            0 :             if (array_len > MaxAllocSize / sizeof(int))
     551            0 :                 ereport(ERROR,
     552              :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     553              :                          errmsg("too many regular expression matches")));
     554            0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     555              :                                                     sizeof(int) * array_len);
     556              :         }
     557              : 
     558           18 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
     559           18 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
     560              :         /* we don't have pmatch data, so emit -1 */
     561           20 :         for (int i = 1; i < matchctx->npatterns; i++)
     562              :         {
     563            2 :             matchctx->match_locs[array_idx++] = -1;
     564            2 :             matchctx->match_locs[array_idx++] = -1;
     565              :         }
     566           18 :         matchctx->nmatches++;
     567              :     }
     568              : 
     569              :     Assert(array_idx <= array_len);
     570              : 
     571          590 :     if (eml > 1)
     572              :     {
     573          590 :         int64       maxsiz = eml * (int64) maxlen;
     574              :         int         conv_bufsiz;
     575              : 
     576              :         /*
     577              :          * Make the conversion buffer large enough for any substring of
     578              :          * interest.
     579              :          *
     580              :          * Worst case: assume we need the maximum size (maxlen*eml), but take
     581              :          * advantage of the fact that the original string length in bytes is
     582              :          * an upper bound on the byte length of any fetched substring (and we
     583              :          * know that len+1 is safe to allocate because the varlena header is
     584              :          * longer than 1 byte).
     585              :          */
     586          590 :         if (maxsiz > orig_len)
     587          415 :             conv_bufsiz = orig_len + 1;
     588              :         else
     589          175 :             conv_bufsiz = maxsiz + 1;   /* safe since maxsiz < 2^30 */
     590              : 
     591          590 :         matchctx->conv_buf = palloc(conv_bufsiz);
     592          590 :         matchctx->conv_bufsiz = conv_bufsiz;
     593          590 :         matchctx->wide_str = wide_str;
     594              :     }
     595              :     else
     596              :     {
     597              :         /* No need to keep the wide string if we're in a single-byte charset. */
     598            0 :         pfree(wide_str);
     599            0 :         matchctx->wide_str = NULL;
     600            0 :         matchctx->conv_buf = NULL;
     601            0 :         matchctx->conv_bufsiz = 0;
     602              :     }
     603              : 
     604              :     /* Clean up temp storage */
     605          590 :     pfree(pmatch);
     606              : 
     607          590 :     return matchctx;
     608              : }
     609              : 
     610              : /*
     611              :  * build_test_info_result - build output array describing compiled regexp
     612              :  *
     613              :  * This borrows some code from Tcl's TclRegAbout().
     614              :  */
     615              : static ArrayType *
     616          590 : build_test_info_result(regex_t *cpattern, test_re_flags *flags)
     617              : {
     618              :     /* Translation data for flag bits in regex_t.re_info */
     619              :     struct infoname
     620              :     {
     621              :         int         bit;
     622              :         const char *text;
     623              :     };
     624              :     static const struct infoname infonames[] = {
     625              :         {REG_UBACKREF, "REG_UBACKREF"},
     626              :         {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
     627              :         {REG_UBOUNDS, "REG_UBOUNDS"},
     628              :         {REG_UBRACES, "REG_UBRACES"},
     629              :         {REG_UBSALNUM, "REG_UBSALNUM"},
     630              :         {REG_UPBOTCH, "REG_UPBOTCH"},
     631              :         {REG_UBBS, "REG_UBBS"},
     632              :         {REG_UNONPOSIX, "REG_UNONPOSIX"},
     633              :         {REG_UUNSPEC, "REG_UUNSPEC"},
     634              :         {REG_UUNPORT, "REG_UUNPORT"},
     635              :         {REG_ULOCALE, "REG_ULOCALE"},
     636              :         {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
     637              :         {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
     638              :         {REG_USHORTEST, "REG_USHORTEST"},
     639              :         {0, NULL}
     640              :     };
     641              :     const struct infoname *inf;
     642              :     Datum       elems[lengthof(infonames) + 1];
     643          590 :     int         nresults = 0;
     644              :     char        buf[80];
     645              :     int         dims[1];
     646              :     int         lbs[1];
     647              : 
     648              :     /* Set up results: first, the number of subexpressions */
     649          590 :     snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
     650          590 :     elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     651              : 
     652              :     /* Report individual info bit states */
     653         8850 :     for (inf = infonames; inf->bit != 0; inf++)
     654              :     {
     655         8260 :         if (cpattern->re_info & inf->bit)
     656              :         {
     657          758 :             if (flags->info & inf->bit)
     658          758 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
     659              :             else
     660              :             {
     661            0 :                 snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
     662            0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     663              :             }
     664              :         }
     665              :         else
     666              :         {
     667         7502 :             if (flags->info & inf->bit)
     668              :             {
     669            0 :                 snprintf(buf, sizeof(buf), "missing %s!", inf->text);
     670            0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     671              :             }
     672              :         }
     673              :     }
     674              : 
     675              :     /* And form an array */
     676          590 :     dims[0] = nresults;
     677          590 :     lbs[0] = 1;
     678              :     /* XXX: this hardcodes assumptions about the text type */
     679          590 :     return construct_md_array(elems, NULL, 1, dims, lbs,
     680              :                               TEXTOID, -1, false, TYPALIGN_INT);
     681              : }
     682              : 
     683              : /*
     684              :  * build_test_match_result - build output array for current match
     685              :  *
     686              :  * Note that if the indices flag is set, we don't need any strings,
     687              :  * just the location data.
     688              :  */
     689              : static ArrayType *
     690          481 : build_test_match_result(test_regex_ctx *matchctx)
     691              : {
     692          481 :     char       *buf = matchctx->conv_buf;
     693          481 :     Datum      *elems = matchctx->elems;
     694          481 :     bool       *nulls = matchctx->nulls;
     695          481 :     bool        indices = matchctx->re_flags.indices;
     696              :     char        bufstr[80];
     697              :     int         dims[1];
     698              :     int         lbs[1];
     699              :     int         loc;
     700              :     int         i;
     701              : 
     702              :     /* Extract matching substrings from the original string */
     703          481 :     loc = matchctx->next_match * matchctx->npatterns * 2;
     704         1132 :     for (i = 0; i < matchctx->npatterns; i++)
     705              :     {
     706          651 :         int         so = matchctx->match_locs[loc++];
     707          651 :         int         eo = matchctx->match_locs[loc++];
     708              : 
     709          651 :         if (indices)
     710              :         {
     711              :             /* Report eo this way for consistency with Tcl */
     712           84 :             snprintf(bufstr, sizeof(bufstr), "%d %d",
     713              :                      so, so < 0 ? eo : eo - 1);
     714           84 :             elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     715           84 :             nulls[i] = false;
     716              :         }
     717          567 :         else if (so < 0 || eo < 0)
     718              :         {
     719           12 :             elems[i] = (Datum) 0;
     720           12 :             nulls[i] = true;
     721              :         }
     722          555 :         else if (buf)
     723              :         {
     724          555 :             int         len = pg_wchar2mb_with_len(matchctx->wide_str + so,
     725              :                                                    buf,
     726              :                                                    eo - so);
     727              : 
     728              :             Assert(len < matchctx->conv_bufsiz);
     729          555 :             elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
     730          555 :             nulls[i] = false;
     731              :         }
     732              :         else
     733              :         {
     734            0 :             elems[i] = DirectFunctionCall3(text_substr,
     735              :                                            PointerGetDatum(matchctx->orig_str),
     736              :                                            Int32GetDatum(so + 1),
     737              :                                            Int32GetDatum(eo - so));
     738            0 :             nulls[i] = false;
     739              :         }
     740              :     }
     741              : 
     742              :     /* In EXPECT indices mode, also report the "details" */
     743          481 :     if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
     744              :     {
     745           28 :         int         so = matchctx->details.rm_extend.rm_so;
     746           28 :         int         eo = matchctx->details.rm_extend.rm_eo;
     747              : 
     748           28 :         snprintf(bufstr, sizeof(bufstr), "%d %d",
     749              :                  so, so < 0 ? eo : eo - 1);
     750           28 :         elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     751           28 :         nulls[i] = false;
     752           28 :         i++;
     753              :     }
     754              : 
     755              :     /* And form an array */
     756          481 :     dims[0] = i;
     757          481 :     lbs[0] = 1;
     758              :     /* XXX: this hardcodes assumptions about the text type */
     759          481 :     return construct_md_array(elems, nulls, 1, dims, lbs,
     760              :                               TEXTOID, -1, false, TYPALIGN_INT);
     761              : }
        

Generated by: LCOV version 2.0-1