LCOV - code coverage report
Current view: top level - src/test/modules/test_regex - test_regex.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 86.3 % 307 265
Test Date: 2026-03-24 02:15:55 Functions: 100.0 % 9 9
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*--------------------------------------------------------------------------
       2              :  *
       3              :  * test_regex.c
       4              :  *      Test harness for the regular expression package.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *      src/test/modules/test_regex/test_regex.c
      11              :  *
      12              :  * -------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "catalog/pg_type_d.h"
      18              : #include "funcapi.h"
      19              : #include "regex/regex.h"
      20              : #include "utils/array.h"
      21              : #include "utils/builtins.h"
      22              : 
      23            2 : PG_MODULE_MAGIC;
      24              : 
      25              : 
      26              : /* all the options of interest for regex functions */
      27              : typedef struct test_re_flags
      28              : {
      29              :     int         cflags;         /* compile flags for Spencer's regex code */
      30              :     int         eflags;         /* execute flags for Spencer's regex code */
      31              :     long        info;           /* expected re_info bits */
      32              :     bool        glob;           /* do it globally (for each occurrence) */
      33              :     bool        indices;        /* report indices not actual strings */
      34              :     bool        partial;        /* expect partial match */
      35              : } test_re_flags;
      36              : 
      37              : /* cross-call state for test_regex() */
      38              : typedef struct test_regex_ctx
      39              : {
      40              :     test_re_flags re_flags;     /* flags */
      41              :     rm_detail_t details;        /* "details" from execution */
      42              :     text       *orig_str;       /* data string in original TEXT form */
      43              :     int         nmatches;       /* number of places where pattern matched */
      44              :     int         npatterns;      /* number of capturing subpatterns */
      45              :     /* We store start char index and end+1 char index for each match */
      46              :     /* so the number of entries in match_locs is nmatches * npatterns * 2 */
      47              :     int        *match_locs;     /* 0-based character indexes */
      48              :     int         next_match;     /* 0-based index of next match to process */
      49              :     /* workspace for build_test_match_result() */
      50              :     Datum      *elems;          /* has npatterns+1 elements */
      51              :     bool       *nulls;          /* has npatterns+1 elements */
      52              :     pg_wchar   *wide_str;       /* wide-char version of original string */
      53              :     char       *conv_buf;       /* conversion buffer, if needed */
      54              :     int         conv_bufsiz;    /* size thereof */
      55              : } test_regex_ctx;
      56              : 
      57              : /* Local functions */
      58              : static void test_re_compile(text *text_re, int cflags, Oid collation,
      59              :                             regex_t *result_re);
      60              : static void parse_test_flags(test_re_flags *flags, text *opts);
      61              : static test_regex_ctx *setup_test_matches(text *orig_str,
      62              :                                           regex_t *cpattern,
      63              :                                           test_re_flags *re_flags,
      64              :                                           Oid collation,
      65              :                                           bool use_subpatterns);
      66              : static ArrayType *build_test_info_result(regex_t *cpattern,
      67              :                                          test_re_flags *flags);
      68              : static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
      69              : 
      70              : 
      71              : /*
      72              :  * test_regex(pattern text, string text, flags text) returns setof text[]
      73              :  *
      74              :  * This is largely based on regexp.c's regexp_matches, with additions
      75              :  * for debugging purposes.
      76              :  */
      77            3 : PG_FUNCTION_INFO_V1(test_regex);
      78              : 
      79              : Datum
      80         1767 : test_regex(PG_FUNCTION_ARGS)
      81              : {
      82              :     FuncCallContext *funcctx;
      83              :     test_regex_ctx *matchctx;
      84              :     ArrayType  *result_ary;
      85              : 
      86         1767 :     if (SRF_IS_FIRSTCALL())
      87              :     {
      88          696 :         text       *pattern = PG_GETARG_TEXT_PP(0);
      89          696 :         text       *flags = PG_GETARG_TEXT_PP(2);
      90          696 :         Oid         collation = PG_GET_COLLATION();
      91              :         test_re_flags re_flags;
      92              :         regex_t     cpattern;
      93              :         MemoryContext oldcontext;
      94              : 
      95          696 :         funcctx = SRF_FIRSTCALL_INIT();
      96          696 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
      97              : 
      98              :         /* Determine options */
      99          696 :         parse_test_flags(&re_flags, flags);
     100              : 
     101              :         /* set up the compiled pattern */
     102          696 :         test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
     103              : 
     104              :         /* be sure to copy the input string into the multi-call ctx */
     105          590 :         matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
     106              :                                       &re_flags,
     107              :                                       collation,
     108              :                                       true);
     109              : 
     110              :         /* Pre-create workspace that build_test_match_result needs */
     111          590 :         matchctx->elems = palloc_array(Datum, matchctx->npatterns + 1);
     112          590 :         matchctx->nulls = palloc_array(bool, matchctx->npatterns + 1);
     113              : 
     114          590 :         MemoryContextSwitchTo(oldcontext);
     115          590 :         funcctx->user_fctx = matchctx;
     116              : 
     117              :         /*
     118              :          * Return the first result row, which is info equivalent to Tcl's
     119              :          * "regexp -about" output
     120              :          */
     121          590 :         result_ary = build_test_info_result(&cpattern, &re_flags);
     122              : 
     123          590 :         pg_regfree(&cpattern);
     124              : 
     125          590 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     126              :     }
     127              :     else
     128              :     {
     129              :         /* Each subsequent row describes one match */
     130         1071 :         funcctx = SRF_PERCALL_SETUP();
     131         1071 :         matchctx = (test_regex_ctx *) funcctx->user_fctx;
     132              : 
     133         1071 :         if (matchctx->next_match < matchctx->nmatches)
     134              :         {
     135          481 :             result_ary = build_test_match_result(matchctx);
     136          481 :             matchctx->next_match++;
     137          481 :             SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     138              :         }
     139              :     }
     140              : 
     141          590 :     SRF_RETURN_DONE(funcctx);
     142              : }
     143              : 
     144              : 
     145              : /*
     146              :  * test_re_compile - compile a RE
     147              :  *
     148              :  *  text_re --- the pattern, expressed as a TEXT object
     149              :  *  cflags --- compile options for the pattern
     150              :  *  collation --- collation to use for LC_CTYPE-dependent behavior
     151              :  *  result_re --- output, compiled RE is stored here
     152              :  *
     153              :  * Pattern is given in the database encoding.  We internally convert to
     154              :  * an array of pg_wchar, which is what Spencer's regex package wants.
     155              :  *
     156              :  * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
     157              :  */
     158              : static void
     159          696 : test_re_compile(text *text_re, int cflags, Oid collation,
     160              :                 regex_t *result_re)
     161              : {
     162          696 :     int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
     163          696 :     char       *text_re_val = VARDATA_ANY(text_re);
     164              :     pg_wchar   *pattern;
     165              :     int         pattern_len;
     166              :     int         regcomp_result;
     167              :     char        errMsg[100];
     168              : 
     169              :     /* Convert pattern string to wide characters */
     170          696 :     pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
     171          696 :     pattern_len = pg_mb2wchar_with_len(text_re_val,
     172              :                                        pattern,
     173              :                                        text_re_len);
     174              : 
     175          696 :     regcomp_result = pg_regcomp(result_re,
     176              :                                 pattern,
     177              :                                 pattern_len,
     178              :                                 cflags,
     179              :                                 collation);
     180              : 
     181          696 :     pfree(pattern);
     182              : 
     183          696 :     if (regcomp_result != REG_OKAY)
     184              :     {
     185              :         /* re didn't compile (no need for pg_regfree, if so) */
     186          106 :         pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
     187          106 :         ereport(ERROR,
     188              :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     189              :                  errmsg("invalid regular expression: %s", errMsg)));
     190              :     }
     191          590 : }
     192              : 
     193              : /*
     194              :  * test_re_execute - execute a RE on pg_wchar data
     195              :  *
     196              :  * Returns true on match, false on no match
     197              :  * Arguments are as for pg_regexec
     198              :  */
     199              : static bool
     200          590 : test_re_execute(regex_t *re, pg_wchar *data, int data_len,
     201              :                 int start_search,
     202              :                 rm_detail_t *details,
     203              :                 int nmatch, regmatch_t *pmatch,
     204              :                 int eflags)
     205              : {
     206              :     int         regexec_result;
     207              :     char        errMsg[100];
     208              : 
     209              :     /* Initialize match locations in case engine doesn't */
     210          590 :     details->rm_extend.rm_so = -1;
     211          590 :     details->rm_extend.rm_eo = -1;
     212         1466 :     for (int i = 0; i < nmatch; i++)
     213              :     {
     214          876 :         pmatch[i].rm_so = -1;
     215          876 :         pmatch[i].rm_eo = -1;
     216              :     }
     217              : 
     218              :     /* Perform RE match and return result */
     219          590 :     regexec_result = pg_regexec(re,
     220              :                                 data,
     221              :                                 data_len,
     222              :                                 start_search,
     223              :                                 details,
     224              :                                 nmatch,
     225              :                                 pmatch,
     226              :                                 eflags);
     227              : 
     228          590 :     if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
     229              :     {
     230              :         /* re failed??? */
     231            0 :         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
     232            0 :         ereport(ERROR,
     233              :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     234              :                  errmsg("regular expression failed: %s", errMsg)));
     235              :     }
     236              : 
     237          590 :     return (regexec_result == REG_OKAY);
     238              : }
     239              : 
     240              : 
     241              : /*
     242              :  * parse_test_flags - parse the flags argument
     243              :  *
     244              :  *  flags --- output argument, filled with desired options
     245              :  *  opts --- TEXT object, or NULL for defaults
     246              :  */
     247              : static void
     248          696 : parse_test_flags(test_re_flags *flags, text *opts)
     249              : {
     250              :     /* these defaults must match Tcl's */
     251          696 :     int         cflags = REG_ADVANCED;
     252          696 :     int         eflags = 0;
     253          696 :     long        info = 0;
     254              : 
     255          696 :     flags->glob = false;
     256          696 :     flags->indices = false;
     257          696 :     flags->partial = false;
     258              : 
     259          696 :     if (opts)
     260              :     {
     261          696 :         char       *opt_p = VARDATA_ANY(opts);
     262          696 :         int         opt_len = VARSIZE_ANY_EXHDR(opts);
     263              :         int         i;
     264              : 
     265         1891 :         for (i = 0; i < opt_len; i++)
     266              :         {
     267         1195 :             switch (opt_p[i])
     268              :             {
     269           78 :                 case '-':
     270              :                     /* allowed, no-op */
     271           78 :                     break;
     272            7 :                 case '!':
     273            7 :                     flags->partial = true;
     274            7 :                     break;
     275            1 :                 case '*':
     276              :                     /* test requires Unicode --- ignored here */
     277            1 :                     break;
     278           53 :                 case '0':
     279           53 :                     flags->indices = true;
     280           53 :                     break;
     281              : 
     282              :                     /* These flags correspond to user-exposed RE options: */
     283            0 :                 case 'g':       /* global match */
     284            0 :                     flags->glob = true;
     285            0 :                     break;
     286           20 :                 case 'i':       /* case insensitive */
     287           20 :                     cflags |= REG_ICASE;
     288           20 :                     break;
     289           35 :                 case 'n':       /* \n affects ^ $ . [^ */
     290           35 :                     cflags |= REG_NEWLINE;
     291           35 :                     break;
     292            2 :                 case 'p':       /* ~Perl, \n affects . [^ */
     293            2 :                     cflags |= REG_NLSTOP;
     294            2 :                     cflags &= ~REG_NLANCH;
     295            2 :                     break;
     296            2 :                 case 'w':       /* weird, \n affects ^ $ only */
     297            2 :                     cflags &= ~REG_NLSTOP;
     298            2 :                     cflags |= REG_NLANCH;
     299            2 :                     break;
     300           14 :                 case 'x':       /* expanded syntax */
     301           14 :                     cflags |= REG_EXPANDED;
     302           14 :                     break;
     303              : 
     304              :                     /* These flags correspond to Tcl's -xflags options: */
     305            2 :                 case 'a':
     306            2 :                     cflags |= REG_ADVF;
     307            2 :                     break;
     308          131 :                 case 'b':
     309          131 :                     cflags &= ~REG_ADVANCED;
     310          131 :                     break;
     311           11 :                 case 'c':
     312              : 
     313              :                     /*
     314              :                      * Tcl calls this TCL_REG_CANMATCH, but it's really
     315              :                      * REG_EXPECT.  In this implementation we must also set
     316              :                      * the partial and indices flags, so that
     317              :                      * setup_test_matches and build_test_match_result will
     318              :                      * emit the desired data.  (They'll emit more fields than
     319              :                      * Tcl would, but that's fine.)
     320              :                      */
     321           11 :                     cflags |= REG_EXPECT;
     322           11 :                     flags->partial = true;
     323           11 :                     flags->indices = true;
     324           11 :                     break;
     325           10 :                 case 'e':
     326           10 :                     cflags &= ~REG_ADVANCED;
     327           10 :                     cflags |= REG_EXTENDED;
     328           10 :                     break;
     329            6 :                 case 'q':
     330            6 :                     cflags &= ~REG_ADVANCED;
     331            6 :                     cflags |= REG_QUOTE;
     332            6 :                     break;
     333            2 :                 case 'o':       /* o for opaque */
     334            2 :                     cflags |= REG_NOSUB;
     335            2 :                     break;
     336            2 :                 case 's':       /* s for start */
     337            2 :                     cflags |= REG_BOSONLY;
     338            2 :                     break;
     339            6 :                 case '+':
     340            6 :                     cflags |= REG_FAKE;
     341            6 :                     break;
     342            0 :                 case ',':
     343            0 :                     cflags |= REG_PROGRESS;
     344            0 :                     break;
     345            0 :                 case '.':
     346            0 :                     cflags |= REG_DUMP;
     347            0 :                     break;
     348            0 :                 case ':':
     349            0 :                     eflags |= REG_MTRACE;
     350            0 :                     break;
     351            0 :                 case ';':
     352            0 :                     eflags |= REG_FTRACE;
     353            0 :                     break;
     354            6 :                 case '^':
     355            6 :                     eflags |= REG_NOTBOL;
     356            6 :                     break;
     357            4 :                 case '$':
     358            4 :                     eflags |= REG_NOTEOL;
     359            4 :                     break;
     360           17 :                 case 't':
     361           17 :                     cflags |= REG_EXPECT;
     362           17 :                     break;
     363            5 :                 case '%':
     364            5 :                     eflags |= REG_SMALL;
     365            5 :                     break;
     366              : 
     367              :                     /* These flags define expected info bits: */
     368            5 :                 case 'A':
     369            5 :                     info |= REG_UBSALNUM;
     370            5 :                     break;
     371            4 :                 case 'B':
     372            4 :                     info |= REG_UBRACES;
     373            4 :                     break;
     374           42 :                 case 'E':
     375           42 :                     info |= REG_UBBS;
     376           42 :                     break;
     377           34 :                 case 'H':
     378           34 :                     info |= REG_ULOOKAROUND;
     379           34 :                     break;
     380           11 :                 case 'I':
     381           11 :                     info |= REG_UIMPOSSIBLE;
     382           11 :                     break;
     383          164 :                 case 'L':
     384          164 :                     info |= REG_ULOCALE;
     385          164 :                     break;
     386           43 :                 case 'M':
     387           43 :                     info |= REG_UUNPORT;
     388           43 :                     break;
     389           47 :                 case 'N':
     390           47 :                     info |= REG_UEMPTYMATCH;
     391           47 :                     break;
     392          307 :                 case 'P':
     393          307 :                     info |= REG_UNONPOSIX;
     394          307 :                     break;
     395           36 :                 case 'Q':
     396           36 :                     info |= REG_UBOUNDS;
     397           36 :                     break;
     398           42 :                 case 'R':
     399           42 :                     info |= REG_UBACKREF;
     400           42 :                     break;
     401           25 :                 case 'S':
     402           25 :                     info |= REG_UUNSPEC;
     403           25 :                     break;
     404           20 :                 case 'T':
     405           20 :                     info |= REG_USHORTEST;
     406           20 :                     break;
     407            1 :                 case 'U':
     408            1 :                     info |= REG_UPBOTCH;
     409            1 :                     break;
     410              : 
     411            0 :                 default:
     412            0 :                     ereport(ERROR,
     413              :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     414              :                              errmsg("invalid regular expression test option: \"%.*s\"",
     415              :                                     pg_mblen_range(opt_p + i, opt_p + opt_len),
     416              :                                     opt_p + i)));
     417              :                     break;
     418              :             }
     419              :         }
     420              :     }
     421          696 :     flags->cflags = cflags;
     422          696 :     flags->eflags = eflags;
     423          696 :     flags->info = info;
     424          696 : }
     425              : 
     426              : /*
     427              :  * setup_test_matches --- do the initial matching
     428              :  *
     429              :  * To simplify memory management, we do all the matching in one swoop.
     430              :  * The returned test_regex_ctx contains the locations of all the substrings
     431              :  * matching the pattern.
     432              :  */
     433              : static test_regex_ctx *
     434          590 : setup_test_matches(text *orig_str,
     435              :                    regex_t *cpattern, test_re_flags *re_flags,
     436              :                    Oid collation,
     437              :                    bool use_subpatterns)
     438              : {
     439          590 :     test_regex_ctx *matchctx = palloc0_object(test_regex_ctx);
     440          590 :     int         eml = pg_database_encoding_max_length();
     441              :     int         orig_len;
     442              :     pg_wchar   *wide_str;
     443              :     int         wide_len;
     444              :     regmatch_t *pmatch;
     445              :     int         pmatch_len;
     446              :     int         array_len;
     447              :     int         array_idx;
     448              :     int         prev_match_end;
     449              :     int         start_search;
     450          590 :     int         maxlen = 0;     /* largest fetch length in characters */
     451              : 
     452              :     /* save flags */
     453          590 :     matchctx->re_flags = *re_flags;
     454              : 
     455              :     /* save original string --- we'll extract result substrings from it */
     456          590 :     matchctx->orig_str = orig_str;
     457              : 
     458              :     /* convert string to pg_wchar form for matching */
     459          590 :     orig_len = VARSIZE_ANY_EXHDR(orig_str);
     460          590 :     wide_str = palloc_array(pg_wchar, orig_len + 1);
     461          590 :     wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
     462              : 
     463              :     /* do we want to remember subpatterns? */
     464          590 :     if (use_subpatterns && cpattern->re_nsub > 0)
     465              :     {
     466          127 :         matchctx->npatterns = cpattern->re_nsub + 1;
     467          127 :         pmatch_len = cpattern->re_nsub + 1;
     468              :     }
     469              :     else
     470              :     {
     471          463 :         use_subpatterns = false;
     472          463 :         matchctx->npatterns = 1;
     473          463 :         pmatch_len = 1;
     474              :     }
     475              : 
     476              :     /* temporary output space for RE package */
     477          590 :     pmatch = palloc_array(regmatch_t, pmatch_len);
     478              : 
     479              :     /*
     480              :      * the real output space (grown dynamically if needed)
     481              :      *
     482              :      * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
     483              :      * than at 2^27
     484              :      */
     485          590 :     array_len = re_flags->glob ? 255 : 31;
     486          590 :     matchctx->match_locs = palloc_array(int, array_len);
     487          590 :     array_idx = 0;
     488              : 
     489              :     /* search for the pattern, perhaps repeatedly */
     490          590 :     prev_match_end = 0;
     491          590 :     start_search = 0;
     492          590 :     while (test_re_execute(cpattern, wide_str, wide_len,
     493              :                            start_search,
     494              :                            &matchctx->details,
     495              :                            pmatch_len, pmatch,
     496              :                            re_flags->eflags))
     497              :     {
     498              :         /* enlarge output space if needed */
     499          463 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     500              :         {
     501            0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     502            0 :             if (array_len > MaxAllocSize / sizeof(int))
     503            0 :                 ereport(ERROR,
     504              :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     505              :                          errmsg("too many regular expression matches")));
     506            0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     507              :                                                     sizeof(int) * array_len);
     508              :         }
     509              : 
     510              :         /* save this match's locations */
     511         1094 :         for (int i = 0; i < matchctx->npatterns; i++)
     512              :         {
     513          631 :             int         so = pmatch[i].rm_so;
     514          631 :             int         eo = pmatch[i].rm_eo;
     515              : 
     516          631 :             matchctx->match_locs[array_idx++] = so;
     517          631 :             matchctx->match_locs[array_idx++] = eo;
     518          631 :             if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
     519          438 :                 maxlen = (eo - so);
     520              :         }
     521          463 :         matchctx->nmatches++;
     522          463 :         prev_match_end = pmatch[0].rm_eo;
     523              : 
     524              :         /* if not glob, stop after one match */
     525          463 :         if (!re_flags->glob)
     526          463 :             break;
     527              : 
     528              :         /*
     529              :          * Advance search position.  Normally we start the next search at the
     530              :          * end of the previous match; but if the match was of zero length, we
     531              :          * have to advance by one character, or we'd just find the same match
     532              :          * again.
     533              :          */
     534            0 :         start_search = prev_match_end;
     535            0 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
     536            0 :             start_search++;
     537            0 :         if (start_search > wide_len)
     538            0 :             break;
     539              :     }
     540              : 
     541              :     /*
     542              :      * If we had no match, but "partial" and "indices" are set, emit the
     543              :      * details.
     544              :      */
     545          590 :     if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
     546              :     {
     547              :         /* enlarge output space if needed */
     548           18 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     549              :         {
     550            0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     551            0 :             if (array_len > MaxAllocSize / sizeof(int))
     552            0 :                 ereport(ERROR,
     553              :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     554              :                          errmsg("too many regular expression matches")));
     555            0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     556              :                                                     sizeof(int) * array_len);
     557              :         }
     558              : 
     559           18 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
     560           18 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
     561              :         /* we don't have pmatch data, so emit -1 */
     562           20 :         for (int i = 1; i < matchctx->npatterns; i++)
     563              :         {
     564            2 :             matchctx->match_locs[array_idx++] = -1;
     565            2 :             matchctx->match_locs[array_idx++] = -1;
     566              :         }
     567           18 :         matchctx->nmatches++;
     568              :     }
     569              : 
     570              :     Assert(array_idx <= array_len);
     571              : 
     572          590 :     if (eml > 1)
     573              :     {
     574          590 :         int64       maxsiz = eml * (int64) maxlen;
     575              :         int         conv_bufsiz;
     576              : 
     577              :         /*
     578              :          * Make the conversion buffer large enough for any substring of
     579              :          * interest.
     580              :          *
     581              :          * Worst case: assume we need the maximum size (maxlen*eml), but take
     582              :          * advantage of the fact that the original string length in bytes is
     583              :          * an upper bound on the byte length of any fetched substring (and we
     584              :          * know that len+1 is safe to allocate because the varlena header is
     585              :          * longer than 1 byte).
     586              :          */
     587          590 :         if (maxsiz > orig_len)
     588          415 :             conv_bufsiz = orig_len + 1;
     589              :         else
     590          175 :             conv_bufsiz = maxsiz + 1;   /* safe since maxsiz < 2^30 */
     591              : 
     592          590 :         matchctx->conv_buf = palloc(conv_bufsiz);
     593          590 :         matchctx->conv_bufsiz = conv_bufsiz;
     594          590 :         matchctx->wide_str = wide_str;
     595              :     }
     596              :     else
     597              :     {
     598              :         /* No need to keep the wide string if we're in a single-byte charset. */
     599            0 :         pfree(wide_str);
     600            0 :         matchctx->wide_str = NULL;
     601            0 :         matchctx->conv_buf = NULL;
     602            0 :         matchctx->conv_bufsiz = 0;
     603              :     }
     604              : 
     605              :     /* Clean up temp storage */
     606          590 :     pfree(pmatch);
     607              : 
     608          590 :     return matchctx;
     609              : }
     610              : 
     611              : /*
     612              :  * build_test_info_result - build output array describing compiled regexp
     613              :  *
     614              :  * This borrows some code from Tcl's TclRegAbout().
     615              :  */
     616              : static ArrayType *
     617          590 : build_test_info_result(regex_t *cpattern, test_re_flags *flags)
     618              : {
     619              :     /* Translation data for flag bits in regex_t.re_info */
     620              :     struct infoname
     621              :     {
     622              :         int         bit;
     623              :         const char *text;
     624              :     };
     625              :     static const struct infoname infonames[] = {
     626              :         {REG_UBACKREF, "REG_UBACKREF"},
     627              :         {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
     628              :         {REG_UBOUNDS, "REG_UBOUNDS"},
     629              :         {REG_UBRACES, "REG_UBRACES"},
     630              :         {REG_UBSALNUM, "REG_UBSALNUM"},
     631              :         {REG_UPBOTCH, "REG_UPBOTCH"},
     632              :         {REG_UBBS, "REG_UBBS"},
     633              :         {REG_UNONPOSIX, "REG_UNONPOSIX"},
     634              :         {REG_UUNSPEC, "REG_UUNSPEC"},
     635              :         {REG_UUNPORT, "REG_UUNPORT"},
     636              :         {REG_ULOCALE, "REG_ULOCALE"},
     637              :         {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
     638              :         {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
     639              :         {REG_USHORTEST, "REG_USHORTEST"},
     640              :         {0, NULL}
     641              :     };
     642              :     const struct infoname *inf;
     643              :     Datum       elems[lengthof(infonames) + 1];
     644          590 :     int         nresults = 0;
     645              :     char        buf[80];
     646              :     int         dims[1];
     647              :     int         lbs[1];
     648              : 
     649              :     /* Set up results: first, the number of subexpressions */
     650          590 :     snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
     651          590 :     elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     652              : 
     653              :     /* Report individual info bit states */
     654         8850 :     for (inf = infonames; inf->bit != 0; inf++)
     655              :     {
     656         8260 :         if (cpattern->re_info & inf->bit)
     657              :         {
     658          758 :             if (flags->info & inf->bit)
     659          758 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
     660              :             else
     661              :             {
     662            0 :                 snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
     663            0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     664              :             }
     665              :         }
     666              :         else
     667              :         {
     668         7502 :             if (flags->info & inf->bit)
     669              :             {
     670            0 :                 snprintf(buf, sizeof(buf), "missing %s!", inf->text);
     671            0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     672              :             }
     673              :         }
     674              :     }
     675              : 
     676              :     /* And form an array */
     677          590 :     dims[0] = nresults;
     678          590 :     lbs[0] = 1;
     679              :     /* XXX: this hardcodes assumptions about the text type */
     680          590 :     return construct_md_array(elems, NULL, 1, dims, lbs,
     681              :                               TEXTOID, -1, false, TYPALIGN_INT);
     682              : }
     683              : 
     684              : /*
     685              :  * build_test_match_result - build output array for current match
     686              :  *
     687              :  * Note that if the indices flag is set, we don't need any strings,
     688              :  * just the location data.
     689              :  */
     690              : static ArrayType *
     691          481 : build_test_match_result(test_regex_ctx *matchctx)
     692              : {
     693          481 :     char       *buf = matchctx->conv_buf;
     694          481 :     Datum      *elems = matchctx->elems;
     695          481 :     bool       *nulls = matchctx->nulls;
     696          481 :     bool        indices = matchctx->re_flags.indices;
     697              :     char        bufstr[80];
     698              :     int         dims[1];
     699              :     int         lbs[1];
     700              :     int         loc;
     701              :     int         i;
     702              : 
     703              :     /* Extract matching substrings from the original string */
     704          481 :     loc = matchctx->next_match * matchctx->npatterns * 2;
     705         1132 :     for (i = 0; i < matchctx->npatterns; i++)
     706              :     {
     707          651 :         int         so = matchctx->match_locs[loc++];
     708          651 :         int         eo = matchctx->match_locs[loc++];
     709              : 
     710          651 :         if (indices)
     711              :         {
     712              :             /* Report eo this way for consistency with Tcl */
     713           84 :             snprintf(bufstr, sizeof(bufstr), "%d %d",
     714              :                      so, so < 0 ? eo : eo - 1);
     715           84 :             elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     716           84 :             nulls[i] = false;
     717              :         }
     718          567 :         else if (so < 0 || eo < 0)
     719              :         {
     720           12 :             elems[i] = (Datum) 0;
     721           12 :             nulls[i] = true;
     722              :         }
     723          555 :         else if (buf)
     724              :         {
     725          555 :             int         len = pg_wchar2mb_with_len(matchctx->wide_str + so,
     726              :                                                    buf,
     727              :                                                    eo - so);
     728              : 
     729              :             Assert(len < matchctx->conv_bufsiz);
     730          555 :             elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
     731          555 :             nulls[i] = false;
     732              :         }
     733              :         else
     734              :         {
     735            0 :             elems[i] = DirectFunctionCall3(text_substr,
     736              :                                            PointerGetDatum(matchctx->orig_str),
     737              :                                            Int32GetDatum(so + 1),
     738              :                                            Int32GetDatum(eo - so));
     739            0 :             nulls[i] = false;
     740              :         }
     741              :     }
     742              : 
     743              :     /* In EXPECT indices mode, also report the "details" */
     744          481 :     if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
     745              :     {
     746           28 :         int         so = matchctx->details.rm_extend.rm_so;
     747           28 :         int         eo = matchctx->details.rm_extend.rm_eo;
     748              : 
     749           28 :         snprintf(bufstr, sizeof(bufstr), "%d %d",
     750              :                  so, so < 0 ? eo : eo - 1);
     751           28 :         elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     752           28 :         nulls[i] = false;
     753           28 :         i++;
     754              :     }
     755              : 
     756              :     /* And form an array */
     757          481 :     dims[0] = i;
     758          481 :     lbs[0] = 1;
     759              :     /* XXX: this hardcodes assumptions about the text type */
     760          481 :     return construct_md_array(elems, nulls, 1, dims, lbs,
     761              :                               TEXTOID, -1, false, TYPALIGN_INT);
     762              : }
        

Generated by: LCOV version 2.0-1