LCOV - code coverage report
Current view: top level - src/test/modules/test_regex - test_regex.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 268 311 86.2 %
Date: 2021-09-17 15:07:27 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*--------------------------------------------------------------------------
       2             :  *
       3             :  * test_regex.c
       4             :  *      Test harness for the regular expression package.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *      src/test/modules/test_regex/test_regex.c
      11             :  *
      12             :  * -------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "funcapi.h"
      18             : #include "miscadmin.h"
      19             : #include "regex/regex.h"
      20             : #include "utils/array.h"
      21             : #include "utils/builtins.h"
      22             : 
      23           4 : PG_MODULE_MAGIC;
      24             : 
      25             : 
      26             : /* all the options of interest for regex functions */
      27             : typedef struct test_re_flags
      28             : {
      29             :     int         cflags;         /* compile flags for Spencer's regex code */
      30             :     int         eflags;         /* execute flags for Spencer's regex code */
      31             :     long        info;           /* expected re_info bits */
      32             :     bool        glob;           /* do it globally (for each occurrence) */
      33             :     bool        indices;        /* report indices not actual strings */
      34             :     bool        partial;        /* expect partial match */
      35             : } test_re_flags;
      36             : 
      37             : /* cross-call state for test_regex() */
      38             : typedef struct test_regex_ctx
      39             : {
      40             :     test_re_flags re_flags;     /* flags */
      41             :     rm_detail_t details;        /* "details" from execution */
      42             :     text       *orig_str;       /* data string in original TEXT form */
      43             :     int         nmatches;       /* number of places where pattern matched */
      44             :     int         npatterns;      /* number of capturing subpatterns */
      45             :     /* We store start char index and end+1 char index for each match */
      46             :     /* so the number of entries in match_locs is nmatches * npatterns * 2 */
      47             :     int        *match_locs;     /* 0-based character indexes */
      48             :     int         next_match;     /* 0-based index of next match to process */
      49             :     /* workspace for build_test_match_result() */
      50             :     Datum      *elems;          /* has npatterns+1 elements */
      51             :     bool       *nulls;          /* has npatterns+1 elements */
      52             :     pg_wchar   *wide_str;       /* wide-char version of original string */
      53             :     char       *conv_buf;       /* conversion buffer, if needed */
      54             :     int         conv_bufsiz;    /* size thereof */
      55             : } test_regex_ctx;
      56             : 
      57             : /* Local functions */
      58             : static void test_re_compile(text *text_re, int cflags, Oid collation,
      59             :                             regex_t *result_re);
      60             : static void parse_test_flags(test_re_flags *flags, text *opts);
      61             : static test_regex_ctx *setup_test_matches(text *orig_str,
      62             :                                           regex_t *cpattern,
      63             :                                           test_re_flags *flags,
      64             :                                           Oid collation,
      65             :                                           bool use_subpatterns);
      66             : static ArrayType *build_test_info_result(regex_t *cpattern,
      67             :                                          test_re_flags *flags);
      68             : static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
      69             : 
      70             : 
      71             : /*
      72             :  * test_regex(pattern text, string text, flags text) returns setof text[]
      73             :  *
      74             :  * This is largely based on regexp.c's regexp_matches, with additions
      75             :  * for debugging purposes.
      76             :  */
      77           6 : PG_FUNCTION_INFO_V1(test_regex);
      78             : 
      79             : Datum
      80        3524 : test_regex(PG_FUNCTION_ARGS)
      81             : {
      82             :     FuncCallContext *funcctx;
      83             :     test_regex_ctx *matchctx;
      84             :     ArrayType  *result_ary;
      85             : 
      86        3524 :     if (SRF_IS_FIRSTCALL())
      87             :     {
      88        1388 :         text       *pattern = PG_GETARG_TEXT_PP(0);
      89        1388 :         text       *flags = PG_GETARG_TEXT_PP(2);
      90        1388 :         Oid         collation = PG_GET_COLLATION();
      91             :         test_re_flags re_flags;
      92             :         regex_t     cpattern;
      93             :         MemoryContext oldcontext;
      94             : 
      95        1388 :         funcctx = SRF_FIRSTCALL_INIT();
      96        1388 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
      97             : 
      98             :         /* Determine options */
      99        1388 :         parse_test_flags(&re_flags, flags);
     100             : 
     101             :         /* set up the compiled pattern */
     102        1388 :         test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
     103             : 
     104             :         /* be sure to copy the input string into the multi-call ctx */
     105        1176 :         matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
     106             :                                       &re_flags,
     107             :                                       collation,
     108             :                                       true);
     109             : 
     110             :         /* Pre-create workspace that build_test_match_result needs */
     111        2352 :         matchctx->elems = (Datum *) palloc(sizeof(Datum) *
     112        1176 :                                            (matchctx->npatterns + 1));
     113        2352 :         matchctx->nulls = (bool *) palloc(sizeof(bool) *
     114        1176 :                                           (matchctx->npatterns + 1));
     115             : 
     116        1176 :         MemoryContextSwitchTo(oldcontext);
     117        1176 :         funcctx->user_fctx = (void *) matchctx;
     118             : 
     119             :         /*
     120             :          * Return the first result row, which is info equivalent to Tcl's
     121             :          * "regexp -about" output
     122             :          */
     123        1176 :         result_ary = build_test_info_result(&cpattern, &re_flags);
     124             : 
     125        1176 :         pg_regfree(&cpattern);
     126             : 
     127        1176 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     128             :     }
     129             :     else
     130             :     {
     131             :         /* Each subsequent row describes one match */
     132        2136 :         funcctx = SRF_PERCALL_SETUP();
     133        2136 :         matchctx = (test_regex_ctx *) funcctx->user_fctx;
     134             : 
     135        2136 :         if (matchctx->next_match < matchctx->nmatches)
     136             :         {
     137         960 :             result_ary = build_test_match_result(matchctx);
     138         960 :             matchctx->next_match++;
     139         960 :             SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     140             :         }
     141             :     }
     142             : 
     143        1176 :     SRF_RETURN_DONE(funcctx);
     144             : }
     145             : 
     146             : 
     147             : /*
     148             :  * test_re_compile - compile a RE
     149             :  *
     150             :  *  text_re --- the pattern, expressed as a TEXT object
     151             :  *  cflags --- compile options for the pattern
     152             :  *  collation --- collation to use for LC_CTYPE-dependent behavior
     153             :  *  result_re --- output, compiled RE is stored here
     154             :  *
     155             :  * Pattern is given in the database encoding.  We internally convert to
     156             :  * an array of pg_wchar, which is what Spencer's regex package wants.
     157             :  *
     158             :  * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
     159             :  */
     160             : static void
     161        1388 : test_re_compile(text *text_re, int cflags, Oid collation,
     162             :                 regex_t *result_re)
     163             : {
     164        1388 :     int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
     165        1388 :     char       *text_re_val = VARDATA_ANY(text_re);
     166             :     pg_wchar   *pattern;
     167             :     int         pattern_len;
     168             :     int         regcomp_result;
     169             :     char        errMsg[100];
     170             : 
     171             :     /* Convert pattern string to wide characters */
     172        1388 :     pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
     173        1388 :     pattern_len = pg_mb2wchar_with_len(text_re_val,
     174             :                                        pattern,
     175             :                                        text_re_len);
     176             : 
     177        1388 :     regcomp_result = pg_regcomp(result_re,
     178             :                                 pattern,
     179             :                                 pattern_len,
     180             :                                 cflags,
     181             :                                 collation);
     182             : 
     183        1388 :     pfree(pattern);
     184             : 
     185        1388 :     if (regcomp_result != REG_OKAY)
     186             :     {
     187             :         /* re didn't compile (no need for pg_regfree, if so) */
     188             : 
     189             :         /*
     190             :          * Here and in other places in this file, do CHECK_FOR_INTERRUPTS
     191             :          * before reporting a regex error.  This is so that if the regex
     192             :          * library aborts and returns REG_CANCEL, we don't print an error
     193             :          * message that implies the regex was invalid.
     194             :          */
     195         212 :         CHECK_FOR_INTERRUPTS();
     196             : 
     197         212 :         pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
     198         212 :         ereport(ERROR,
     199             :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     200             :                  errmsg("invalid regular expression: %s", errMsg)));
     201             :     }
     202        1176 : }
     203             : 
     204             : /*
     205             :  * test_re_execute - execute a RE on pg_wchar data
     206             :  *
     207             :  * Returns true on match, false on no match
     208             :  * Arguments are as for pg_regexec
     209             :  */
     210             : static bool
     211        1176 : test_re_execute(regex_t *re, pg_wchar *data, int data_len,
     212             :                 int start_search,
     213             :                 rm_detail_t *details,
     214             :                 int nmatch, regmatch_t *pmatch,
     215             :                 int eflags)
     216             : {
     217             :     int         regexec_result;
     218             :     char        errMsg[100];
     219             : 
     220             :     /* Initialize match locations in case engine doesn't */
     221        1176 :     details->rm_extend.rm_so = -1;
     222        1176 :     details->rm_extend.rm_eo = -1;
     223        2924 :     for (int i = 0; i < nmatch; i++)
     224             :     {
     225        1748 :         pmatch[i].rm_so = -1;
     226        1748 :         pmatch[i].rm_eo = -1;
     227             :     }
     228             : 
     229             :     /* Perform RE match and return result */
     230        1176 :     regexec_result = pg_regexec(re,
     231             :                                 data,
     232             :                                 data_len,
     233             :                                 start_search,
     234             :                                 details,
     235             :                                 nmatch,
     236             :                                 pmatch,
     237             :                                 eflags);
     238             : 
     239        1176 :     if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
     240             :     {
     241             :         /* re failed??? */
     242           0 :         CHECK_FOR_INTERRUPTS();
     243           0 :         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
     244           0 :         ereport(ERROR,
     245             :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     246             :                  errmsg("regular expression failed: %s", errMsg)));
     247             :     }
     248             : 
     249        1176 :     return (regexec_result == REG_OKAY);
     250             : }
     251             : 
     252             : 
     253             : /*
     254             :  * parse_test_flags - parse the flags argument
     255             :  *
     256             :  *  flags --- output argument, filled with desired options
     257             :  *  opts --- TEXT object, or NULL for defaults
     258             :  */
     259             : static void
     260        1388 : parse_test_flags(test_re_flags *flags, text *opts)
     261             : {
     262             :     /* these defaults must match Tcl's */
     263        1388 :     int         cflags = REG_ADVANCED;
     264        1388 :     int         eflags = 0;
     265        1388 :     long        info = 0;
     266             : 
     267        1388 :     flags->glob = false;
     268        1388 :     flags->indices = false;
     269        1388 :     flags->partial = false;
     270             : 
     271        1388 :     if (opts)
     272             :     {
     273        1388 :         char       *opt_p = VARDATA_ANY(opts);
     274        1388 :         int         opt_len = VARSIZE_ANY_EXHDR(opts);
     275             :         int         i;
     276             : 
     277        3760 :         for (i = 0; i < opt_len; i++)
     278             :         {
     279        2372 :             switch (opt_p[i])
     280             :             {
     281         156 :                 case '-':
     282             :                     /* allowed, no-op */
     283         156 :                     break;
     284          14 :                 case '!':
     285          14 :                     flags->partial = true;
     286          14 :                     break;
     287           2 :                 case '*':
     288             :                     /* test requires Unicode --- ignored here */
     289           2 :                     break;
     290         106 :                 case '0':
     291         106 :                     flags->indices = true;
     292         106 :                     break;
     293             : 
     294             :                     /* These flags correspond to user-exposed RE options: */
     295           0 :                 case 'g':       /* global match */
     296           0 :                     flags->glob = true;
     297           0 :                     break;
     298          40 :                 case 'i':       /* case insensitive */
     299          40 :                     cflags |= REG_ICASE;
     300          40 :                     break;
     301          70 :                 case 'n':       /* \n affects ^ $ . [^ */
     302          70 :                     cflags |= REG_NEWLINE;
     303          70 :                     break;
     304           4 :                 case 'p':       /* ~Perl, \n affects . [^ */
     305           4 :                     cflags |= REG_NLSTOP;
     306           4 :                     cflags &= ~REG_NLANCH;
     307           4 :                     break;
     308           4 :                 case 'w':       /* weird, \n affects ^ $ only */
     309           4 :                     cflags &= ~REG_NLSTOP;
     310           4 :                     cflags |= REG_NLANCH;
     311           4 :                     break;
     312          28 :                 case 'x':       /* expanded syntax */
     313          28 :                     cflags |= REG_EXPANDED;
     314          28 :                     break;
     315             : 
     316             :                     /* These flags correspond to Tcl's -xflags options: */
     317           4 :                 case 'a':
     318           4 :                     cflags |= REG_ADVF;
     319           4 :                     break;
     320         262 :                 case 'b':
     321         262 :                     cflags &= ~REG_ADVANCED;
     322         262 :                     break;
     323          22 :                 case 'c':
     324             : 
     325             :                     /*
     326             :                      * Tcl calls this TCL_REG_CANMATCH, but it's really
     327             :                      * REG_EXPECT.  In this implementation we must also set
     328             :                      * the partial and indices flags, so that
     329             :                      * setup_test_matches and build_test_match_result will
     330             :                      * emit the desired data.  (They'll emit more fields than
     331             :                      * Tcl would, but that's fine.)
     332             :                      */
     333          22 :                     cflags |= REG_EXPECT;
     334          22 :                     flags->partial = true;
     335          22 :                     flags->indices = true;
     336          22 :                     break;
     337          20 :                 case 'e':
     338          20 :                     cflags &= ~REG_ADVANCED;
     339          20 :                     cflags |= REG_EXTENDED;
     340          20 :                     break;
     341          12 :                 case 'q':
     342          12 :                     cflags &= ~REG_ADVANCED;
     343          12 :                     cflags |= REG_QUOTE;
     344          12 :                     break;
     345           4 :                 case 'o':       /* o for opaque */
     346           4 :                     cflags |= REG_NOSUB;
     347           4 :                     break;
     348           4 :                 case 's':       /* s for start */
     349           4 :                     cflags |= REG_BOSONLY;
     350           4 :                     break;
     351          12 :                 case '+':
     352          12 :                     cflags |= REG_FAKE;
     353          12 :                     break;
     354           0 :                 case ',':
     355           0 :                     cflags |= REG_PROGRESS;
     356           0 :                     break;
     357           0 :                 case '.':
     358           0 :                     cflags |= REG_DUMP;
     359           0 :                     break;
     360           0 :                 case ':':
     361           0 :                     eflags |= REG_MTRACE;
     362           0 :                     break;
     363           0 :                 case ';':
     364           0 :                     eflags |= REG_FTRACE;
     365           0 :                     break;
     366          12 :                 case '^':
     367          12 :                     eflags |= REG_NOTBOL;
     368          12 :                     break;
     369           8 :                 case '$':
     370           8 :                     eflags |= REG_NOTEOL;
     371           8 :                     break;
     372          34 :                 case 't':
     373          34 :                     cflags |= REG_EXPECT;
     374          34 :                     break;
     375          10 :                 case '%':
     376          10 :                     eflags |= REG_SMALL;
     377          10 :                     break;
     378             : 
     379             :                     /* These flags define expected info bits: */
     380          10 :                 case 'A':
     381          10 :                     info |= REG_UBSALNUM;
     382          10 :                     break;
     383           8 :                 case 'B':
     384           8 :                     info |= REG_UBRACES;
     385           8 :                     break;
     386          80 :                 case 'E':
     387          80 :                     info |= REG_UBBS;
     388          80 :                     break;
     389          68 :                 case 'H':
     390          68 :                     info |= REG_ULOOKAROUND;
     391          68 :                     break;
     392          20 :                 case 'I':
     393          20 :                     info |= REG_UIMPOSSIBLE;
     394          20 :                     break;
     395         324 :                 case 'L':
     396         324 :                     info |= REG_ULOCALE;
     397         324 :                     break;
     398          86 :                 case 'M':
     399          86 :                     info |= REG_UUNPORT;
     400          86 :                     break;
     401          92 :                 case 'N':
     402          92 :                     info |= REG_UEMPTYMATCH;
     403          92 :                     break;
     404         610 :                 case 'P':
     405         610 :                     info |= REG_UNONPOSIX;
     406         610 :                     break;
     407          70 :                 case 'Q':
     408          70 :                     info |= REG_UBOUNDS;
     409          70 :                     break;
     410          84 :                 case 'R':
     411          84 :                     info |= REG_UBACKREF;
     412          84 :                     break;
     413          50 :                 case 'S':
     414          50 :                     info |= REG_UUNSPEC;
     415          50 :                     break;
     416          40 :                 case 'T':
     417          40 :                     info |= REG_USHORTEST;
     418          40 :                     break;
     419           2 :                 case 'U':
     420           2 :                     info |= REG_UPBOTCH;
     421           2 :                     break;
     422             : 
     423           0 :                 default:
     424           0 :                     ereport(ERROR,
     425             :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     426             :                              errmsg("invalid regular expression test option: \"%.*s\"",
     427             :                                     pg_mblen(opt_p + i), opt_p + i)));
     428             :                     break;
     429             :             }
     430             :         }
     431             :     }
     432        1388 :     flags->cflags = cflags;
     433        1388 :     flags->eflags = eflags;
     434        1388 :     flags->info = info;
     435        1388 : }
     436             : 
     437             : /*
     438             :  * setup_test_matches --- do the initial matching
     439             :  *
     440             :  * To simplify memory management, we do all the matching in one swoop.
     441             :  * The returned test_regex_ctx contains the locations of all the substrings
     442             :  * matching the pattern.
     443             :  */
     444             : static test_regex_ctx *
     445        1176 : setup_test_matches(text *orig_str,
     446             :                    regex_t *cpattern, test_re_flags *re_flags,
     447             :                    Oid collation,
     448             :                    bool use_subpatterns)
     449             : {
     450        1176 :     test_regex_ctx *matchctx = palloc0(sizeof(test_regex_ctx));
     451        1176 :     int         eml = pg_database_encoding_max_length();
     452             :     int         orig_len;
     453             :     pg_wchar   *wide_str;
     454             :     int         wide_len;
     455             :     regmatch_t *pmatch;
     456             :     int         pmatch_len;
     457             :     int         array_len;
     458             :     int         array_idx;
     459             :     int         prev_match_end;
     460             :     int         start_search;
     461        1176 :     int         maxlen = 0;     /* largest fetch length in characters */
     462             : 
     463             :     /* save flags */
     464        1176 :     matchctx->re_flags = *re_flags;
     465             : 
     466             :     /* save original string --- we'll extract result substrings from it */
     467        1176 :     matchctx->orig_str = orig_str;
     468             : 
     469             :     /* convert string to pg_wchar form for matching */
     470        1176 :     orig_len = VARSIZE_ANY_EXHDR(orig_str);
     471        1176 :     wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
     472        1176 :     wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
     473             : 
     474             :     /* do we want to remember subpatterns? */
     475        1176 :     if (use_subpatterns && cpattern->re_nsub > 0)
     476             :     {
     477         254 :         matchctx->npatterns = cpattern->re_nsub + 1;
     478         254 :         pmatch_len = cpattern->re_nsub + 1;
     479             :     }
     480             :     else
     481             :     {
     482         922 :         use_subpatterns = false;
     483         922 :         matchctx->npatterns = 1;
     484         922 :         pmatch_len = 1;
     485             :     }
     486             : 
     487             :     /* temporary output space for RE package */
     488        1176 :     pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
     489             : 
     490             :     /*
     491             :      * the real output space (grown dynamically if needed)
     492             :      *
     493             :      * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
     494             :      * than at 2^27
     495             :      */
     496        1176 :     array_len = re_flags->glob ? 255 : 31;
     497        1176 :     matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
     498        1176 :     array_idx = 0;
     499             : 
     500             :     /* search for the pattern, perhaps repeatedly */
     501        1176 :     prev_match_end = 0;
     502        1176 :     start_search = 0;
     503        1176 :     while (test_re_execute(cpattern, wide_str, wide_len,
     504             :                            start_search,
     505             :                            &matchctx->details,
     506             :                            pmatch_len, pmatch,
     507             :                            re_flags->eflags))
     508             :     {
     509             :         /* enlarge output space if needed */
     510         924 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     511             :         {
     512           0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     513           0 :             if (array_len > MaxAllocSize / sizeof(int))
     514           0 :                 ereport(ERROR,
     515             :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     516             :                          errmsg("too many regular expression matches")));
     517           0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     518             :                                                     sizeof(int) * array_len);
     519             :         }
     520             : 
     521             :         /* save this match's locations */
     522        2184 :         for (int i = 0; i < matchctx->npatterns; i++)
     523             :         {
     524        1260 :             int         so = pmatch[i].rm_so;
     525        1260 :             int         eo = pmatch[i].rm_eo;
     526             : 
     527        1260 :             matchctx->match_locs[array_idx++] = so;
     528        1260 :             matchctx->match_locs[array_idx++] = eo;
     529        1260 :             if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
     530         876 :                 maxlen = (eo - so);
     531             :         }
     532         924 :         matchctx->nmatches++;
     533         924 :         prev_match_end = pmatch[0].rm_eo;
     534             : 
     535             :         /* if not glob, stop after one match */
     536         924 :         if (!re_flags->glob)
     537         924 :             break;
     538             : 
     539             :         /*
     540             :          * Advance search position.  Normally we start the next search at the
     541             :          * end of the previous match; but if the match was of zero length, we
     542             :          * have to advance by one character, or we'd just find the same match
     543             :          * again.
     544             :          */
     545           0 :         start_search = prev_match_end;
     546           0 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
     547           0 :             start_search++;
     548           0 :         if (start_search > wide_len)
     549           0 :             break;
     550             :     }
     551             : 
     552             :     /*
     553             :      * If we had no match, but "partial" and "indices" are set, emit the
     554             :      * details.
     555             :      */
     556        1176 :     if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
     557             :     {
     558             :         /* enlarge output space if needed */
     559          36 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     560             :         {
     561           0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     562           0 :             if (array_len > MaxAllocSize / sizeof(int))
     563           0 :                 ereport(ERROR,
     564             :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     565             :                          errmsg("too many regular expression matches")));
     566           0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     567             :                                                     sizeof(int) * array_len);
     568             :         }
     569             : 
     570          36 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
     571          36 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
     572             :         /* we don't have pmatch data, so emit -1 */
     573          40 :         for (int i = 1; i < matchctx->npatterns; i++)
     574             :         {
     575           4 :             matchctx->match_locs[array_idx++] = -1;
     576           4 :             matchctx->match_locs[array_idx++] = -1;
     577             :         }
     578          36 :         matchctx->nmatches++;
     579             :     }
     580             : 
     581             :     Assert(array_idx <= array_len);
     582             : 
     583        1176 :     if (eml > 1)
     584             :     {
     585        1176 :         int64       maxsiz = eml * (int64) maxlen;
     586             :         int         conv_bufsiz;
     587             : 
     588             :         /*
     589             :          * Make the conversion buffer large enough for any substring of
     590             :          * interest.
     591             :          *
     592             :          * Worst case: assume we need the maximum size (maxlen*eml), but take
     593             :          * advantage of the fact that the original string length in bytes is
     594             :          * an upper bound on the byte length of any fetched substring (and we
     595             :          * know that len+1 is safe to allocate because the varlena header is
     596             :          * longer than 1 byte).
     597             :          */
     598        1176 :         if (maxsiz > orig_len)
     599         830 :             conv_bufsiz = orig_len + 1;
     600             :         else
     601         346 :             conv_bufsiz = maxsiz + 1;   /* safe since maxsiz < 2^30 */
     602             : 
     603        1176 :         matchctx->conv_buf = palloc(conv_bufsiz);
     604        1176 :         matchctx->conv_bufsiz = conv_bufsiz;
     605        1176 :         matchctx->wide_str = wide_str;
     606             :     }
     607             :     else
     608             :     {
     609             :         /* No need to keep the wide string if we're in a single-byte charset. */
     610           0 :         pfree(wide_str);
     611           0 :         matchctx->wide_str = NULL;
     612           0 :         matchctx->conv_buf = NULL;
     613           0 :         matchctx->conv_bufsiz = 0;
     614             :     }
     615             : 
     616             :     /* Clean up temp storage */
     617        1176 :     pfree(pmatch);
     618             : 
     619        1176 :     return matchctx;
     620             : }
     621             : 
     622             : /*
     623             :  * build_test_info_result - build output array describing compiled regexp
     624             :  *
     625             :  * This borrows some code from Tcl's TclRegAbout().
     626             :  */
     627             : static ArrayType *
     628        1176 : build_test_info_result(regex_t *cpattern, test_re_flags *flags)
     629             : {
     630             :     /* Translation data for flag bits in regex_t.re_info */
     631             :     struct infoname
     632             :     {
     633             :         int         bit;
     634             :         const char *text;
     635             :     };
     636             :     static const struct infoname infonames[] = {
     637             :         {REG_UBACKREF, "REG_UBACKREF"},
     638             :         {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
     639             :         {REG_UBOUNDS, "REG_UBOUNDS"},
     640             :         {REG_UBRACES, "REG_UBRACES"},
     641             :         {REG_UBSALNUM, "REG_UBSALNUM"},
     642             :         {REG_UPBOTCH, "REG_UPBOTCH"},
     643             :         {REG_UBBS, "REG_UBBS"},
     644             :         {REG_UNONPOSIX, "REG_UNONPOSIX"},
     645             :         {REG_UUNSPEC, "REG_UUNSPEC"},
     646             :         {REG_UUNPORT, "REG_UUNPORT"},
     647             :         {REG_ULOCALE, "REG_ULOCALE"},
     648             :         {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
     649             :         {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
     650             :         {REG_USHORTEST, "REG_USHORTEST"},
     651             :         {0, NULL}
     652             :     };
     653             :     const struct infoname *inf;
     654             :     Datum       elems[lengthof(infonames) + 1];
     655        1176 :     int         nresults = 0;
     656             :     char        buf[80];
     657             :     int         dims[1];
     658             :     int         lbs[1];
     659             : 
     660             :     /* Set up results: first, the number of subexpressions */
     661        1176 :     snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
     662        1176 :     elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     663             : 
     664             :     /* Report individual info bit states */
     665       17640 :     for (inf = infonames; inf->bit != 0; inf++)
     666             :     {
     667       16464 :         if (cpattern->re_info & inf->bit)
     668             :         {
     669        1498 :             if (flags->info & inf->bit)
     670        1498 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
     671             :             else
     672             :             {
     673           0 :                 snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
     674           0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     675             :             }
     676             :         }
     677             :         else
     678             :         {
     679       14966 :             if (flags->info & inf->bit)
     680             :             {
     681           0 :                 snprintf(buf, sizeof(buf), "missing %s!", inf->text);
     682           0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     683             :             }
     684             :         }
     685             :     }
     686             : 
     687             :     /* And form an array */
     688        1176 :     dims[0] = nresults;
     689        1176 :     lbs[0] = 1;
     690             :     /* XXX: this hardcodes assumptions about the text type */
     691        1176 :     return construct_md_array(elems, NULL, 1, dims, lbs,
     692             :                               TEXTOID, -1, false, TYPALIGN_INT);
     693             : }
     694             : 
     695             : /*
     696             :  * build_test_match_result - build output array for current match
     697             :  *
     698             :  * Note that if the indices flag is set, we don't need any strings,
     699             :  * just the location data.
     700             :  */
     701             : static ArrayType *
     702         960 : build_test_match_result(test_regex_ctx *matchctx)
     703             : {
     704         960 :     char       *buf = matchctx->conv_buf;
     705         960 :     Datum      *elems = matchctx->elems;
     706         960 :     bool       *nulls = matchctx->nulls;
     707         960 :     bool        indices = matchctx->re_flags.indices;
     708             :     char        bufstr[80];
     709             :     int         dims[1];
     710             :     int         lbs[1];
     711             :     int         loc;
     712             :     int         i;
     713             : 
     714             :     /* Extract matching substrings from the original string */
     715         960 :     loc = matchctx->next_match * matchctx->npatterns * 2;
     716        2260 :     for (i = 0; i < matchctx->npatterns; i++)
     717             :     {
     718        1300 :         int         so = matchctx->match_locs[loc++];
     719        1300 :         int         eo = matchctx->match_locs[loc++];
     720             : 
     721        1300 :         if (indices)
     722             :         {
     723             :             /* Report eo this way for consistency with Tcl */
     724         168 :             snprintf(bufstr, sizeof(bufstr), "%d %d",
     725             :                      so, so < 0 ? eo : eo - 1);
     726         168 :             elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     727         168 :             nulls[i] = false;
     728             :         }
     729        1132 :         else if (so < 0 || eo < 0)
     730             :         {
     731          24 :             elems[i] = (Datum) 0;
     732          24 :             nulls[i] = true;
     733             :         }
     734        1108 :         else if (buf)
     735             :         {
     736        1108 :             int         len = pg_wchar2mb_with_len(matchctx->wide_str + so,
     737             :                                                    buf,
     738             :                                                    eo - so);
     739             : 
     740             :             Assert(len < matchctx->conv_bufsiz);
     741        1108 :             elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
     742        1108 :             nulls[i] = false;
     743             :         }
     744             :         else
     745             :         {
     746           0 :             elems[i] = DirectFunctionCall3(text_substr,
     747             :                                            PointerGetDatum(matchctx->orig_str),
     748             :                                            Int32GetDatum(so + 1),
     749             :                                            Int32GetDatum(eo - so));
     750           0 :             nulls[i] = false;
     751             :         }
     752             :     }
     753             : 
     754             :     /* In EXPECT indices mode, also report the "details" */
     755         960 :     if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
     756             :     {
     757          56 :         int         so = matchctx->details.rm_extend.rm_so;
     758          56 :         int         eo = matchctx->details.rm_extend.rm_eo;
     759             : 
     760          56 :         snprintf(bufstr, sizeof(bufstr), "%d %d",
     761             :                  so, so < 0 ? eo : eo - 1);
     762          56 :         elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     763          56 :         nulls[i] = false;
     764          56 :         i++;
     765             :     }
     766             : 
     767             :     /* And form an array */
     768         960 :     dims[0] = i;
     769         960 :     lbs[0] = 1;
     770             :     /* XXX: this hardcodes assumptions about the text type */
     771         960 :     return construct_md_array(elems, nulls, 1, dims, lbs,
     772             :                               TEXTOID, -1, false, TYPALIGN_INT);
     773             : }

Generated by: LCOV version 1.13