LCOV - code coverage report
Current view: top level - src/backend/regex - regexec.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 85.2 % 506 431
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 16 16
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * re_*exec and friends - match REs
       3              :  *
       4              :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
       5              :  *
       6              :  * Development of this software was funded, in part, by Cray Research Inc.,
       7              :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
       8              :  * Corporation, none of whom are responsible for the results.  The author
       9              :  * thanks all of them.
      10              :  *
      11              :  * Redistribution and use in source and binary forms -- with or without
      12              :  * modification -- are permitted for any purpose, provided that
      13              :  * redistributions in source form retain this entire copyright notice and
      14              :  * indicate the origin and nature of any modifications.
      15              :  *
      16              :  * I'd appreciate being given credit for this package in the documentation
      17              :  * of software which uses it, but that is not a requirement.
      18              :  *
      19              :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
      20              :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      21              :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      22              :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      23              :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      24              :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
      25              :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      26              :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      27              :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      28              :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      29              :  *
      30              :  * src/backend/regex/regexec.c
      31              :  *
      32              :  */
      33              : 
      34              : #include "regex/regguts.h"
      35              : 
      36              : 
      37              : 
      38              : /* lazy-DFA representation */
      39              : struct arcp
      40              : {                               /* "pointer" to an outarc */
      41              :     struct sset *ss;
      42              :     color       co;
      43              : };
      44              : 
      45              : struct sset
      46              : {                               /* state set */
      47              :     unsigned   *states;         /* pointer to bitvector */
      48              :     unsigned    hash;           /* hash of bitvector */
      49              : #define  HASH(bv, nw)    (((nw) == 1) ? *(bv) : hash(bv, nw))
      50              : #define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
      51              :         memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
      52              :     int         flags;
      53              : #define  STARTER     01         /* the initial state set */
      54              : #define  POSTSTATE   02         /* includes the goal state */
      55              : #define  LOCKED      04         /* locked in cache */
      56              : #define  NOPROGRESS  010        /* zero-progress state set */
      57              :     struct arcp ins;            /* chain of inarcs pointing here */
      58              :     chr        *lastseen;       /* last entered on arrival here */
      59              :     struct sset **outs;         /* outarc vector indexed by color */
      60              :     struct arcp *inchain;       /* chain-pointer vector for outarcs */
      61              : };
      62              : 
      63              : struct dfa
      64              : {
      65              :     int         nssets;         /* size of cache */
      66              :     int         nssused;        /* how many entries occupied yet */
      67              :     int         nstates;        /* number of states */
      68              :     int         ncolors;        /* length of outarc and inchain vectors */
      69              :     int         wordsper;       /* length of state-set bitvectors */
      70              :     struct sset *ssets;         /* state-set cache */
      71              :     unsigned   *statesarea;     /* bitvector storage */
      72              :     unsigned   *work;           /* pointer to work area within statesarea */
      73              :     struct sset **outsarea;     /* outarc-vector storage */
      74              :     struct arcp *incarea;       /* inchain storage */
      75              :     struct cnfa *cnfa;
      76              :     struct colormap *cm;
      77              :     chr        *lastpost;       /* location of last cache-flushed success */
      78              :     chr        *lastnopr;       /* location of last cache-flushed NOPROGRESS */
      79              :     struct sset *search;        /* replacement-search-pointer memory */
      80              :     int         backno;         /* if DFA for a backref, subno it refers to */
      81              :     short       backmin;        /* min repetitions for backref */
      82              :     short       backmax;        /* max repetitions for backref */
      83              :     bool        ismalloced;     /* should this struct dfa be freed? */
      84              :     bool        arraysmalloced; /* should its subsidiary arrays be freed? */
      85              : };
      86              : 
      87              : #define WORK    1               /* number of work bitvectors needed */
      88              : 
      89              : /* setup for non-malloc allocation for small cases */
      90              : #define FEWSTATES   20          /* must be less than UBITS */
      91              : #define FEWCOLORS   15
      92              : struct smalldfa
      93              : {
      94              :     struct dfa  dfa;            /* must be first */
      95              :     struct sset ssets[FEWSTATES * 2];
      96              :     unsigned    statesarea[FEWSTATES * 2 + WORK];
      97              :     struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
      98              :     struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
      99              : };
     100              : 
     101              : #define DOMALLOC    ((struct smalldfa *)NULL)   /* force malloc */
     102              : 
     103              : 
     104              : 
     105              : /* internal variables, bundled for easy passing around */
     106              : struct vars
     107              : {
     108              :     regex_t    *re;
     109              :     struct guts *g;
     110              :     int         eflags;         /* copies of arguments */
     111              :     size_t      nmatch;
     112              :     regmatch_t *pmatch;
     113              :     rm_detail_t *details;
     114              :     chr        *start;          /* start of string */
     115              :     chr        *search_start;   /* search start of string */
     116              :     chr        *stop;           /* just past end of string */
     117              :     int         err;            /* error code if any (0 none) */
     118              :     struct dfa **subdfas;       /* per-tree-subre DFAs */
     119              :     struct dfa **ladfas;        /* per-lacon-subre DFAs */
     120              :     struct sset **lblastcss;    /* per-lacon-subre lookbehind restart data */
     121              :     chr       **lblastcp;       /* per-lacon-subre lookbehind restart data */
     122              :     struct smalldfa dfa1;
     123              :     struct smalldfa dfa2;
     124              : };
     125              : 
     126              : #define VISERR(vv)  ((vv)->err != 0) /* have we seen an error yet? */
     127              : #define ISERR() VISERR(v)
     128              : #define VERR(vv,e)  ((vv)->err = ((vv)->err ? (vv)->err : (e)))
     129              : #define ERR(e)  VERR(v, e)      /* record an error */
     130              : #define NOERR() {if (ISERR()) return v->err;}    /* if error seen, return it */
     131              : #define OFF(p)  ((p) - v->start)
     132              : #define LOFF(p) ((long)OFF(p))
     133              : 
     134              : 
     135              : 
     136              : /*
     137              :  * forward declarations
     138              :  */
     139              : /* === regexec.c === */
     140              : static struct dfa *getsubdfa(struct vars *v, struct subre *t);
     141              : static struct dfa *getladfa(struct vars *v, int n);
     142              : static int  find(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
     143              : static int  cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
     144              : static int  cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm,
     145              :                       struct dfa *d, struct dfa *s, chr **coldp);
     146              : static void zapallsubs(regmatch_t *p, size_t n);
     147              : static void zaptreesubs(struct vars *v, struct subre *t);
     148              : static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end);
     149              : static int  cdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     150              : static int  ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     151              : static int  crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     152              : static int  cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     153              : static int  caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     154              : static int  citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     155              : static int  creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     156              : 
     157              : /* === rege_dfa.c === */
     158              : static chr *longest(struct vars *v, struct dfa *d,
     159              :                     chr *start, chr *stop, int *hitstopp);
     160              : static chr *shortest(struct vars *v, struct dfa *d, chr *start, chr *min,
     161              :                      chr *max, chr **coldp, int *hitstopp);
     162              : static int  matchuntil(struct vars *v, struct dfa *d, chr *probe,
     163              :                        struct sset **lastcss, chr **lastcp);
     164              : static chr *dfa_backref(struct vars *v, struct dfa *d, chr *start,
     165              :                         chr *min, chr *max, bool shortest);
     166              : static chr *lastcold(struct vars *v, struct dfa *d);
     167              : static struct dfa *newdfa(struct vars *v, struct cnfa *cnfa,
     168              :                           struct colormap *cm, struct smalldfa *sml);
     169              : static void freedfa(struct dfa *d);
     170              : static unsigned hash(unsigned *uv, int n);
     171              : static struct sset *initialize(struct vars *v, struct dfa *d, chr *start);
     172              : static struct sset *miss(struct vars *v, struct dfa *d, struct sset *css,
     173              :                          color co, chr *cp, chr *start);
     174              : static int  lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co);
     175              : static struct sset *getvacant(struct vars *v, struct dfa *d, chr *cp,
     176              :                               chr *start);
     177              : static struct sset *pickss(struct vars *v, struct dfa *d, chr *cp,
     178              :                            chr *start);
     179              : 
     180              : 
     181              : /*
     182              :  * pg_regexec - match regular expression
     183              :  */
     184              : int
     185      4135127 : pg_regexec(regex_t *re,
     186              :            const chr *string,
     187              :            size_t len,
     188              :            size_t search_start,
     189              :            rm_detail_t *details,
     190              :            size_t nmatch,
     191              :            regmatch_t pmatch[],
     192              :            int flags)
     193              : {
     194              :     struct vars var;
     195      4135127 :     struct vars *v = &var;
     196              :     int         st;
     197              :     size_t      n;
     198              :     size_t      i;
     199              :     int         backref;
     200              : 
     201              : #define  LOCALMAT    20
     202              :     regmatch_t  mat[LOCALMAT];
     203              : 
     204              : #define  LOCALDFAS   40
     205              :     struct dfa *subdfas[LOCALDFAS];
     206              : 
     207              :     /* sanity checks */
     208      4135127 :     if (re == NULL || string == NULL || re->re_magic != REMAGIC)
     209            0 :         return REG_INVARG;
     210      4135127 :     if (re->re_csize != sizeof(chr))
     211            0 :         return REG_MIXED;
     212      4135127 :     if (search_start > len)
     213            3 :         return REG_NOMATCH;
     214              : 
     215              :     /* Initialize locale-dependent support */
     216      4135124 :     pg_set_regex_collation(re->re_collation);
     217              : 
     218              :     /* setup */
     219      4135124 :     v->re = re;
     220      4135124 :     v->g = (struct guts *) re->re_guts;
     221      4135124 :     if ((v->g->cflags & REG_EXPECT) && details == NULL)
     222            0 :         return REG_INVARG;
     223      4135124 :     if (v->g->info & REG_UIMPOSSIBLE)
     224          716 :         return REG_NOMATCH;
     225      4134408 :     backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
     226      4134408 :     v->eflags = flags;
     227      4134408 :     if (backref && nmatch <= v->g->nsub)
     228              :     {
     229              :         /* need larger work area */
     230           91 :         v->nmatch = v->g->nsub + 1;
     231           91 :         if (v->nmatch <= LOCALMAT)
     232           90 :             v->pmatch = mat;
     233              :         else
     234            1 :             v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
     235           91 :         if (v->pmatch == NULL)
     236            0 :             return REG_ESPACE;
     237           91 :         zapallsubs(v->pmatch, v->nmatch);
     238              :     }
     239              :     else
     240              :     {
     241              :         /* we can store results directly in caller's array */
     242      4134317 :         v->pmatch = pmatch;
     243              :         /* ensure any extra entries in caller's array are filled with -1 */
     244      4134317 :         if (nmatch > 0)
     245       562465 :             zapallsubs(pmatch, nmatch);
     246              :         /* then forget about extra entries, to avoid useless work in find() */
     247      4134317 :         if (nmatch > v->g->nsub + 1)
     248          524 :             nmatch = v->g->nsub + 1;
     249      4134317 :         v->nmatch = nmatch;
     250              :     }
     251      4134408 :     v->details = details;
     252      4134408 :     v->start = (chr *) string;
     253      4134408 :     v->search_start = (chr *) string + search_start;
     254      4134408 :     v->stop = (chr *) string + len;
     255      4134408 :     v->err = 0;
     256      4134408 :     v->subdfas = NULL;
     257      4134408 :     v->ladfas = NULL;
     258      4134408 :     v->lblastcss = NULL;
     259      4134408 :     v->lblastcp = NULL;
     260              :     /* below this point, "goto cleanup" will behave sanely */
     261              : 
     262              :     assert(v->g->ntree >= 0);
     263      4134408 :     n = (size_t) v->g->ntree;
     264      4134408 :     if (n <= LOCALDFAS)
     265      4134406 :         v->subdfas = subdfas;
     266              :     else
     267              :     {
     268            2 :         v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
     269            2 :         if (v->subdfas == NULL)
     270              :         {
     271            0 :             st = REG_ESPACE;
     272            0 :             goto cleanup;
     273              :         }
     274              :     }
     275     12419366 :     for (i = 0; i < n; i++)
     276      8284958 :         v->subdfas[i] = NULL;
     277              : 
     278              :     assert(v->g->nlacons >= 0);
     279      4134408 :     n = (size_t) v->g->nlacons;
     280      4134408 :     if (n > 0)
     281              :     {
     282          630 :         v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
     283          630 :         if (v->ladfas == NULL)
     284              :         {
     285            0 :             st = REG_ESPACE;
     286            0 :             goto cleanup;
     287              :         }
     288         1922 :         for (i = 0; i < n; i++)
     289         1292 :             v->ladfas[i] = NULL;
     290          630 :         v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
     291          630 :         v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
     292          630 :         if (v->lblastcss == NULL || v->lblastcp == NULL)
     293              :         {
     294            0 :             st = REG_ESPACE;
     295            0 :             goto cleanup;
     296              :         }
     297         1922 :         for (i = 0; i < n; i++)
     298              :         {
     299         1292 :             v->lblastcss[i] = NULL;
     300         1292 :             v->lblastcp[i] = NULL;
     301              :         }
     302              :     }
     303              : 
     304              :     /* do it */
     305              :     assert(v->g->tree != NULL);
     306      4134408 :     if (backref)
     307          145 :         st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
     308              :     else
     309      4134263 :         st = find(v, &v->g->tree->cnfa, &v->g->cmap);
     310              : 
     311              :     /* on success, ensure caller's match vector is filled correctly */
     312      4134408 :     if (st == REG_OKAY && nmatch > 0)
     313              :     {
     314       452897 :         if (v->pmatch != pmatch)
     315              :         {
     316              :             /* copy portion of match vector over from (larger) work area */
     317              :             assert(nmatch <= v->nmatch);
     318           21 :             memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
     319              :         }
     320       452897 :         if (v->g->cflags & REG_NOSUB)
     321              :         {
     322              :             /* don't expose possibly-partial sub-match results to caller */
     323       449917 :             zapallsubs(pmatch, nmatch);
     324              :         }
     325              :     }
     326              : 
     327              :     /* clean up */
     328      3684491 : cleanup:
     329      4134408 :     if (v->pmatch != pmatch && v->pmatch != mat)
     330            1 :         FREE(v->pmatch);
     331      4134408 :     if (v->subdfas != NULL)
     332              :     {
     333      4134408 :         n = (size_t) v->g->ntree;
     334     12419366 :         for (i = 0; i < n; i++)
     335              :         {
     336      8284958 :             if (v->subdfas[i] != NULL)
     337        12484 :                 freedfa(v->subdfas[i]);
     338              :         }
     339      4134408 :         if (v->subdfas != subdfas)
     340            2 :             FREE(v->subdfas);
     341              :     }
     342      4134408 :     if (v->ladfas != NULL)
     343              :     {
     344          630 :         n = (size_t) v->g->nlacons;
     345         1922 :         for (i = 0; i < n; i++)
     346              :         {
     347         1292 :             if (v->ladfas[i] != NULL)
     348           53 :                 freedfa(v->ladfas[i]);
     349              :         }
     350          630 :         FREE(v->ladfas);
     351              :     }
     352      4134408 :     if (v->lblastcss != NULL)
     353          630 :         FREE(v->lblastcss);
     354      4134408 :     if (v->lblastcp != NULL)
     355          630 :         FREE(v->lblastcp);
     356              : 
     357              : #ifdef REG_DEBUG
     358              :     if (v->eflags & (REG_FTRACE | REG_MTRACE))
     359              :         fflush(stdout);
     360              : #endif
     361              : 
     362      4134408 :     return st;
     363              : }
     364              : 
     365              : /*
     366              :  * getsubdfa - create or re-fetch the DFA for a tree subre node
     367              :  *
     368              :  * We only need to create the DFA once per overall regex execution.
     369              :  * The DFA will be freed by the cleanup step in pg_regexec().
     370              :  */
     371              : static struct dfa *
     372        13170 : getsubdfa(struct vars *v,
     373              :           struct subre *t)
     374              : {
     375        13170 :     struct dfa *d = v->subdfas[t->id];
     376              : 
     377        13170 :     if (d == NULL)
     378              :     {
     379        12484 :         d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
     380        12484 :         if (d == NULL)
     381            0 :             return NULL;
     382              :         /* set up additional info if this is a backref node */
     383        12484 :         if (t->op == 'b')
     384              :         {
     385          140 :             d->backno = t->backno;
     386          140 :             d->backmin = t->min;
     387          140 :             d->backmax = t->max;
     388              :         }
     389        12484 :         v->subdfas[t->id] = d;
     390              :     }
     391        13170 :     return d;
     392              : }
     393              : 
     394              : /*
     395              :  * getladfa - create or re-fetch the DFA for a LACON subre node
     396              :  *
     397              :  * Same as above, but for LACONs.
     398              :  */
     399              : static struct dfa *
     400          120 : getladfa(struct vars *v,
     401              :          int n)
     402              : {
     403              :     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
     404              : 
     405          120 :     if (v->ladfas[n] == NULL)
     406              :     {
     407           53 :         struct subre *sub = &v->g->lacons[n];
     408              : 
     409           53 :         v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
     410              :         /* a LACON can't contain a backref, so nothing else to do */
     411              :     }
     412          120 :     return v->ladfas[n];
     413              : }
     414              : 
     415              : /*
     416              :  * find - find a match for the main NFA (no-complications case)
     417              :  */
     418              : static int
     419      4134263 : find(struct vars *v,
     420              :      struct cnfa *cnfa,
     421              :      struct colormap *cm)
     422              : {
     423              :     struct dfa *s;
     424              :     struct dfa *d;
     425              :     chr        *begin;
     426      4134263 :     chr        *end = NULL;
     427              :     chr        *cold;
     428              :     chr        *open;           /* open and close of range of possible starts */
     429              :     chr        *close;
     430              :     int         hitend;
     431      4134263 :     int         shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
     432              : 
     433              :     /* first, a shot with the search RE */
     434      4134263 :     s = newdfa(v, &v->g->search, cm, &v->dfa1);
     435      4134263 :     if (s == NULL)
     436            0 :         return v->err;
     437              :     MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
     438      4134263 :     cold = NULL;
     439      4134263 :     close = shortest(v, s, v->search_start, v->search_start, v->stop,
     440              :                      &cold, (int *) NULL);
     441      4134263 :     freedfa(s);
     442      4134263 :     NOERR();
     443      4134263 :     if (v->g->cflags & REG_EXPECT)
     444              :     {
     445              :         assert(v->details != NULL);
     446           27 :         if (cold != NULL)
     447           27 :             v->details->rm_extend.rm_so = OFF(cold);
     448              :         else
     449            0 :             v->details->rm_extend.rm_so = OFF(v->stop);
     450           27 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     451              :     }
     452      4134263 :     if (close == NULL)          /* not found */
     453      3558661 :         return REG_NOMATCH;
     454       575602 :     if (v->nmatch == 0)          /* found, don't need exact location */
     455       122763 :         return REG_OKAY;
     456              : 
     457              :     /* find starting point and match */
     458              :     assert(cold != NULL);
     459       452839 :     open = cold;
     460       452839 :     cold = NULL;
     461              :     MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
     462       452839 :     d = newdfa(v, cnfa, cm, &v->dfa1);
     463       452839 :     if (d == NULL)
     464            0 :         return v->err;
     465       452868 :     for (begin = open; begin <= close; begin++)
     466              :     {
     467              :         MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
     468       452868 :         if (shorter)
     469           62 :             end = shortest(v, d, begin, begin, v->stop,
     470              :                            (chr **) NULL, &hitend);
     471              :         else
     472       452806 :             end = longest(v, d, begin, v->stop, &hitend);
     473       452868 :         if (ISERR())
     474              :         {
     475            0 :             freedfa(d);
     476            0 :             return v->err;
     477              :         }
     478       452868 :         if (hitend && cold == NULL)
     479         3585 :             cold = begin;
     480       452868 :         if (end != NULL)
     481       452839 :             break;              /* NOTE BREAK OUT */
     482              :     }
     483              :     assert(end != NULL);        /* search RE succeeded so loop should */
     484       452839 :     freedfa(d);
     485              : 
     486              :     /* and pin down details */
     487              :     assert(v->nmatch > 0);
     488       452839 :     v->pmatch[0].rm_so = OFF(begin);
     489       452839 :     v->pmatch[0].rm_eo = OFF(end);
     490       452839 :     if (v->g->cflags & REG_EXPECT)
     491              :     {
     492           10 :         if (cold != NULL)
     493            5 :             v->details->rm_extend.rm_so = OFF(cold);
     494              :         else
     495            5 :             v->details->rm_extend.rm_so = OFF(v->stop);
     496           10 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     497              :     }
     498       452839 :     if (v->nmatch == 1)          /* no need for submatches */
     499       450616 :         return REG_OKAY;
     500              : 
     501              :     /* find submatches */
     502         2223 :     return cdissect(v, v->g->tree, begin, end);
     503              : }
     504              : 
     505              : /*
     506              :  * cfind - find a match for the main NFA (with complications)
     507              :  */
     508              : static int
     509          145 : cfind(struct vars *v,
     510              :       struct cnfa *cnfa,
     511              :       struct colormap *cm)
     512              : {
     513              :     struct dfa *s;
     514              :     struct dfa *d;
     515              :     chr        *cold;
     516              :     int         ret;
     517              : 
     518          145 :     s = newdfa(v, &v->g->search, cm, &v->dfa1);
     519          145 :     if (s == NULL)
     520            0 :         return v->err;
     521          145 :     d = newdfa(v, cnfa, cm, &v->dfa2);
     522          145 :     if (d == NULL)
     523              :     {
     524            0 :         freedfa(s);
     525            0 :         return v->err;
     526              :     }
     527              : 
     528          145 :     ret = cfindloop(v, cnfa, cm, d, s, &cold);
     529              : 
     530          145 :     freedfa(d);
     531          145 :     freedfa(s);
     532          145 :     NOERR();
     533          145 :     if (v->g->cflags & REG_EXPECT)
     534              :     {
     535              :         assert(v->details != NULL);
     536            0 :         if (cold != NULL)
     537            0 :             v->details->rm_extend.rm_so = OFF(cold);
     538              :         else
     539            0 :             v->details->rm_extend.rm_so = OFF(v->stop);
     540            0 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     541              :     }
     542          145 :     return ret;
     543              : }
     544              : 
     545              : /*
     546              :  * cfindloop - the heart of cfind
     547              :  */
     548              : static int
     549          145 : cfindloop(struct vars *v,
     550              :           struct cnfa *cnfa,
     551              :           struct colormap *cm,
     552              :           struct dfa *d,
     553              :           struct dfa *s,
     554              :           chr **coldp)          /* where to put coldstart pointer */
     555              : {
     556              :     chr        *begin;
     557              :     chr        *end;
     558              :     chr        *cold;
     559              :     chr        *open;           /* open and close of range of possible starts */
     560              :     chr        *close;
     561              :     chr        *estart;
     562              :     chr        *estop;
     563              :     int         er;
     564          145 :     int         shorter = v->g->tree->flags & SHORTER;
     565              :     int         hitend;
     566              : 
     567              :     assert(d != NULL && s != NULL);
     568          145 :     cold = NULL;
     569          145 :     close = v->search_start;
     570              :     do
     571              :     {
     572              :         /* Search with the search RE for match range at/beyond "close" */
     573              :         MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
     574          161 :         close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
     575          161 :         if (ISERR())
     576              :         {
     577            0 :             *coldp = cold;
     578            0 :             return v->err;
     579              :         }
     580          161 :         if (close == NULL)
     581           15 :             break;              /* no more possible match anywhere */
     582              :         assert(cold != NULL);
     583          146 :         open = cold;
     584          146 :         cold = NULL;
     585              :         /* Search for matches starting between "open" and "close" inclusive */
     586              :         MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
     587          513 :         for (begin = open; begin <= close; begin++)
     588              :         {
     589              :             MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
     590          452 :             estart = begin;
     591          452 :             estop = v->stop;
     592              :             for (;;)
     593              :             {
     594              :                 /* Here we use the top node's detailed RE */
     595          646 :                 if (shorter)
     596           97 :                     end = shortest(v, d, begin, estart,
     597              :                                    estop, (chr **) NULL, &hitend);
     598              :                 else
     599          549 :                     end = longest(v, d, begin, estop,
     600              :                                   &hitend);
     601          646 :                 if (ISERR())
     602              :                 {
     603            0 :                     *coldp = cold;
     604            0 :                     return v->err;
     605              :                 }
     606          646 :                 if (hitend && cold == NULL)
     607          104 :                     cold = begin;
     608          646 :                 if (end == NULL)
     609          349 :                     break;      /* no match with this begin point, try next */
     610              :                 MDEBUG(("tentative end %ld\n", LOFF(end)));
     611              :                 /* Dissect the potential match to see if it really matches */
     612          297 :                 er = cdissect(v, v->g->tree, begin, end);
     613          297 :                 if (er == REG_OKAY)
     614              :                 {
     615           85 :                     if (v->nmatch > 0)
     616              :                     {
     617           85 :                         v->pmatch[0].rm_so = OFF(begin);
     618           85 :                         v->pmatch[0].rm_eo = OFF(end);
     619              :                     }
     620           85 :                     *coldp = cold;
     621           85 :                     return REG_OKAY;
     622              :                 }
     623          212 :                 if (er != REG_NOMATCH)
     624              :                 {
     625            0 :                     ERR(er);
     626            0 :                     *coldp = cold;
     627            0 :                     return er;
     628              :                 }
     629              :                 /* Try next longer/shorter match with same begin point */
     630          212 :                 if (shorter)
     631              :                 {
     632           81 :                     if (end == estop)
     633           12 :                         break;  /* no more, so try next begin point */
     634           69 :                     estart = end + 1;
     635              :                 }
     636              :                 else
     637              :                 {
     638          131 :                     if (end == begin)
     639            6 :                         break;  /* no more, so try next begin point */
     640          125 :                     estop = end - 1;
     641              :                 }
     642              :             }                   /* end loop over endpoint positions */
     643              :         }                       /* end loop over beginning positions */
     644              : 
     645              :         /*
     646              :          * If we get here, there is no possible match starting at or before
     647              :          * "close", so consider matches beyond that.  We'll do a fresh search
     648              :          * with the search RE to find a new promising match range.
     649              :          */
     650           61 :         close++;
     651           61 :     } while (close < v->stop);
     652              : 
     653           60 :     *coldp = cold;
     654           60 :     return REG_NOMATCH;
     655              : }
     656              : 
     657              : /*
     658              :  * zapallsubs - initialize all subexpression matches to "no match"
     659              :  *
     660              :  * Note that p[0], the overall-match location, is not touched.
     661              :  */
     662              : static void
     663      1012473 : zapallsubs(regmatch_t *p,
     664              :            size_t n)
     665              : {
     666              :     size_t      i;
     667              : 
     668      1020808 :     for (i = n - 1; i > 0; i--)
     669              :     {
     670         8335 :         p[i].rm_so = -1;
     671         8335 :         p[i].rm_eo = -1;
     672              :     }
     673      1012473 : }
     674              : 
     675              : /*
     676              :  * zaptreesubs - initialize subexpressions within subtree to "no match"
     677              :  */
     678              : static void
     679          520 : zaptreesubs(struct vars *v,
     680              :             struct subre *t)
     681              : {
     682          520 :     int         n = t->capno;
     683              :     struct subre *t2;
     684              : 
     685          520 :     if (n > 0)
     686              :     {
     687          243 :         if ((size_t) n < v->nmatch)
     688              :         {
     689          243 :             v->pmatch[n].rm_so = -1;
     690          243 :             v->pmatch[n].rm_eo = -1;
     691              :         }
     692              :     }
     693              : 
     694          695 :     for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
     695          175 :         zaptreesubs(v, t2);
     696          520 : }
     697              : 
     698              : /*
     699              :  * subset - set subexpression match data for a successful subre
     700              :  */
     701              : static void
     702         4088 : subset(struct vars *v,
     703              :        struct subre *sub,
     704              :        chr *begin,
     705              :        chr *end)
     706              : {
     707         4088 :     int         n = sub->capno;
     708              : 
     709              :     assert(n > 0);
     710         4088 :     if ((size_t) n >= v->nmatch)
     711            9 :         return;
     712              : 
     713              :     MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
     714         4079 :     v->pmatch[n].rm_so = OFF(begin);
     715         4079 :     v->pmatch[n].rm_eo = OFF(end);
     716              : }
     717              : 
     718              : /*
     719              :  * cdissect - check backrefs and determine subexpression matches
     720              :  *
     721              :  * cdissect recursively processes a subre tree to check matching of backrefs
     722              :  * and/or identify submatch boundaries for capture nodes.  The proposed match
     723              :  * runs from "begin" to "end" (not including "end"), and we are basically
     724              :  * "dissecting" it to see where the submatches are.
     725              :  *
     726              :  * Before calling any level of cdissect, the caller must have run the node's
     727              :  * DFA and found that the proposed substring satisfies the DFA.  (We make
     728              :  * the caller do that because in concatenation and iteration nodes, it's
     729              :  * much faster to check all the substrings against the child DFAs before we
     730              :  * recurse.)
     731              :  *
     732              :  * A side-effect of a successful match is to save match locations for
     733              :  * capturing subexpressions in v->pmatch[].  This is a little bit tricky,
     734              :  * so we make the following rules:
     735              :  * 1. Before initial entry to cdissect, all match data must have been
     736              :  *    cleared (this is seen to by zapallsubs).
     737              :  * 2. Before any recursive entry to cdissect, the match data for that
     738              :  *    subexpression tree must be guaranteed clear (see zaptreesubs).
     739              :  * 3. When returning REG_OKAY, each level of cdissect will have saved
     740              :  *    any relevant match locations.
     741              :  * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
     742              :  *    that its subexpression match locations are again clear.
     743              :  * 5. No guarantees are made for error cases (i.e., other result codes).
     744              :  * 6. When a level of cdissect abandons a successful sub-match, it will
     745              :  *    clear that subtree's match locations with zaptreesubs before trying
     746              :  *    any new DFA match or cdissect call for that subtree or any subtree
     747              :  *    to its right (that is, any subtree that could have a backref into the
     748              :  *    abandoned match).
     749              :  * This may seem overly complicated, but it's difficult to simplify it
     750              :  * because of the provision that match locations must be reset before
     751              :  * any fresh DFA match (a rule that is needed to make dfa_backref safe).
     752              :  * That means it won't work to just reset relevant match locations at the
     753              :  * start of each cdissect level.
     754              :  */
     755              : static int                      /* regexec return code */
     756        15369 : cdissect(struct vars *v,
     757              :          struct subre *t,
     758              :          chr *begin,            /* beginning of relevant substring */
     759              :          chr *end)              /* end of same */
     760              : {
     761              :     int         er;
     762              : 
     763              :     assert(t != NULL);
     764              :     MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
     765              : 
     766              :     /* handy place to check for operation cancel */
     767        15369 :     INTERRUPT(v->re);
     768              :     /* ... and stack overrun */
     769        15369 :     if (STACK_TOO_DEEP(v->re))
     770            0 :         return REG_ETOOBIG;
     771              : 
     772        15369 :     switch (t->op)
     773              :     {
     774         8458 :         case '=':               /* terminal node */
     775              :             assert(t->child == NULL);
     776         8458 :             er = REG_OKAY;      /* no action, parent did the work */
     777         8458 :             break;
     778          209 :         case 'b':               /* back reference */
     779              :             assert(t->child == NULL);
     780          209 :             er = cbrdissect(v, t, begin, end);
     781          209 :             break;
     782         6468 :         case '.':               /* concatenation */
     783              :             assert(t->child != NULL);
     784         6468 :             if (t->child->flags & SHORTER)    /* reverse scan */
     785          164 :                 er = crevcondissect(v, t, begin, end);
     786              :             else
     787         6304 :                 er = ccondissect(v, t, begin, end);
     788         6468 :             break;
     789           80 :         case '|':               /* alternation */
     790              :             assert(t->child != NULL);
     791           80 :             er = caltdissect(v, t, begin, end);
     792           80 :             break;
     793          124 :         case '*':               /* iteration */
     794              :             assert(t->child != NULL);
     795          124 :             if (t->child->flags & SHORTER)    /* reverse scan */
     796            7 :                 er = creviterdissect(v, t, begin, end);
     797              :             else
     798          117 :                 er = citerdissect(v, t, begin, end);
     799          124 :             break;
     800           30 :         case '(':               /* no-op capture node */
     801              :             assert(t->child != NULL);
     802           30 :             er = cdissect(v, t->child, begin, end);
     803           30 :             break;
     804            0 :         default:
     805            0 :             er = REG_ASSERT;
     806            0 :             break;
     807              :     }
     808              : 
     809              :     /*
     810              :      * We should never have a match failure unless backrefs lurk below;
     811              :      * otherwise, either caller failed to check the DFA, or there's some
     812              :      * inconsistency between the DFA and the node's innards.
     813              :      */
     814              :     assert(er != REG_NOMATCH || (t->flags & BACKR));
     815              : 
     816              :     /*
     817              :      * If this node is marked as capturing, save successful match's location.
     818              :      */
     819        15369 :     if (t->capno > 0 && er == REG_OKAY)
     820         4088 :         subset(v, t, begin, end);
     821              : 
     822        15369 :     return er;
     823              : }
     824              : 
     825              : /*
     826              :  * ccondissect - dissect match for concatenation node
     827              :  */
     828              : static int                      /* regexec return code */
     829         6304 : ccondissect(struct vars *v,
     830              :             struct subre *t,
     831              :             chr *begin,         /* beginning of relevant substring */
     832              :             chr *end)           /* end of same */
     833              : {
     834         6304 :     struct subre *left = t->child;
     835         6304 :     struct subre *right = left->sibling;
     836              :     struct dfa *d;
     837              :     struct dfa *d2;
     838              :     chr        *mid;
     839              :     int         er;
     840              : 
     841              :     assert(t->op == '.');
     842              :     assert(left != NULL && left->cnfa.nstates > 0);
     843              :     assert(right != NULL && right->cnfa.nstates > 0);
     844              :     assert(right->sibling == NULL);
     845              :     assert(!(left->flags & SHORTER));
     846              : 
     847         6304 :     d = getsubdfa(v, left);
     848         6304 :     NOERR();
     849         6304 :     d2 = getsubdfa(v, right);
     850         6304 :     NOERR();
     851              :     MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
     852              : 
     853              :     /* pick a tentative midpoint */
     854         6304 :     mid = longest(v, d, begin, end, (int *) NULL);
     855         6304 :     NOERR();
     856         6304 :     if (mid == NULL)
     857            8 :         return REG_NOMATCH;
     858              :     MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
     859              : 
     860              :     /* iterate until satisfaction or failure */
     861              :     for (;;)
     862              :     {
     863              :         /* try this midpoint on for size */
     864         7632 :         if (longest(v, d2, mid, end, (int *) NULL) == end)
     865              :         {
     866         6255 :             er = cdissect(v, left, begin, mid);
     867         6255 :             if (er == REG_OKAY)
     868              :             {
     869         6205 :                 er = cdissect(v, right, mid, end);
     870         6205 :                 if (er == REG_OKAY)
     871              :                 {
     872              :                     /* satisfaction */
     873              :                     MDEBUG(("%d: successful\n", t->id));
     874         5957 :                     return REG_OKAY;
     875              :                 }
     876              :                 /* Reset left's matches (right should have done so itself) */
     877          248 :                 zaptreesubs(v, left);
     878              :             }
     879          298 :             if (er != REG_NOMATCH)
     880            0 :                 return er;
     881              :         }
     882         1675 :         NOERR();
     883              : 
     884              :         /* that midpoint didn't work, find a new one */
     885         1675 :         if (mid == begin)
     886              :         {
     887              :             /* all possibilities exhausted */
     888              :             MDEBUG(("%d: no midpoint\n", t->id));
     889           75 :             return REG_NOMATCH;
     890              :         }
     891         1600 :         mid = longest(v, d, begin, mid - 1, (int *) NULL);
     892         1600 :         NOERR();
     893         1600 :         if (mid == NULL)
     894              :         {
     895              :             /* failed to find a new one */
     896              :             MDEBUG(("%d: failed midpoint\n", t->id));
     897          264 :             return REG_NOMATCH;
     898              :         }
     899              :         MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
     900              :     }
     901              : 
     902              :     /* can't get here */
     903              :     return REG_ASSERT;
     904              : }
     905              : 
     906              : /*
     907              :  * crevcondissect - dissect match for concatenation node, shortest-first
     908              :  */
     909              : static int                      /* regexec return code */
     910          164 : crevcondissect(struct vars *v,
     911              :                struct subre *t,
     912              :                chr *begin,      /* beginning of relevant substring */
     913              :                chr *end)        /* end of same */
     914              : {
     915          164 :     struct subre *left = t->child;
     916          164 :     struct subre *right = left->sibling;
     917              :     struct dfa *d;
     918              :     struct dfa *d2;
     919              :     chr        *mid;
     920              :     int         er;
     921              : 
     922              :     assert(t->op == '.');
     923              :     assert(left != NULL && left->cnfa.nstates > 0);
     924              :     assert(right != NULL && right->cnfa.nstates > 0);
     925              :     assert(right->sibling == NULL);
     926              :     assert(left->flags & SHORTER);
     927              : 
     928          164 :     d = getsubdfa(v, left);
     929          164 :     NOERR();
     930          164 :     d2 = getsubdfa(v, right);
     931          164 :     NOERR();
     932              :     MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
     933              : 
     934              :     /* pick a tentative midpoint */
     935          164 :     mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
     936          164 :     NOERR();
     937          164 :     if (mid == NULL)
     938            0 :         return REG_NOMATCH;
     939              :     MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
     940              : 
     941              :     /* iterate until satisfaction or failure */
     942              :     for (;;)
     943              :     {
     944              :         /* try this midpoint on for size */
     945          605 :         if (longest(v, d2, mid, end, (int *) NULL) == end)
     946              :         {
     947           93 :             er = cdissect(v, left, begin, mid);
     948           93 :             if (er == REG_OKAY)
     949              :             {
     950           93 :                 er = cdissect(v, right, mid, end);
     951           93 :                 if (er == REG_OKAY)
     952              :                 {
     953              :                     /* satisfaction */
     954              :                     MDEBUG(("%d: successful\n", t->id));
     955           81 :                     return REG_OKAY;
     956              :                 }
     957              :                 /* Reset left's matches (right should have done so itself) */
     958           12 :                 zaptreesubs(v, left);
     959              :             }
     960           12 :             if (er != REG_NOMATCH)
     961            0 :                 return er;
     962              :         }
     963          524 :         NOERR();
     964              : 
     965              :         /* that midpoint didn't work, find a new one */
     966          524 :         if (mid == end)
     967              :         {
     968              :             /* all possibilities exhausted */
     969              :             MDEBUG(("%d: no midpoint\n", t->id));
     970           83 :             return REG_NOMATCH;
     971              :         }
     972          441 :         mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
     973          441 :         NOERR();
     974          441 :         if (mid == NULL)
     975              :         {
     976              :             /* failed to find a new one */
     977              :             MDEBUG(("%d: failed midpoint\n", t->id));
     978            0 :             return REG_NOMATCH;
     979              :         }
     980              :         MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
     981              :     }
     982              : 
     983              :     /* can't get here */
     984              :     return REG_ASSERT;
     985              : }
     986              : 
     987              : /*
     988              :  * cbrdissect - dissect match for backref node
     989              :  *
     990              :  * The backref match might already have been verified by dfa_backref(),
     991              :  * but we don't know that for sure so must check it here.
     992              :  */
     993              : static int                      /* regexec return code */
     994          209 : cbrdissect(struct vars *v,
     995              :            struct subre *t,
     996              :            chr *begin,          /* beginning of relevant substring */
     997              :            chr *end)            /* end of same */
     998              : {
     999          209 :     int         n = t->backno;
    1000              :     size_t      numreps;
    1001              :     size_t      tlen;
    1002              :     size_t      brlen;
    1003              :     chr        *brstring;
    1004              :     chr        *p;
    1005          209 :     int         min = t->min;
    1006          209 :     int         max = t->max;
    1007              : 
    1008              :     assert(t != NULL);
    1009              :     assert(t->op == 'b');
    1010              :     assert(n >= 0);
    1011              :     assert((size_t) n < v->nmatch);
    1012              : 
    1013              :     MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
    1014              :             LOFF(begin), LOFF(end)));
    1015              : 
    1016              :     /* get the backreferenced string */
    1017          209 :     if (v->pmatch[n].rm_so == -1)
    1018           55 :         return REG_NOMATCH;
    1019          154 :     brstring = v->start + v->pmatch[n].rm_so;
    1020          154 :     brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
    1021              : 
    1022              :     /* special cases for zero-length strings */
    1023          154 :     if (brlen == 0)
    1024              :     {
    1025              :         /*
    1026              :          * matches only if target is zero length, but any number of
    1027              :          * repetitions can be considered to be present
    1028              :          */
    1029            7 :         if (begin == end && min <= max)
    1030              :         {
    1031              :             MDEBUG(("%d: backref matched trivially\n", t->id));
    1032            7 :             return REG_OKAY;
    1033              :         }
    1034            0 :         return REG_NOMATCH;
    1035              :     }
    1036          147 :     if (begin == end)
    1037              :     {
    1038              :         /* matches only if zero repetitions are okay */
    1039            6 :         if (min == 0)
    1040              :         {
    1041              :             MDEBUG(("%d: backref matched trivially\n", t->id));
    1042            5 :             return REG_OKAY;
    1043              :         }
    1044            1 :         return REG_NOMATCH;
    1045              :     }
    1046              : 
    1047              :     /*
    1048              :      * check target length to see if it could possibly be an allowed number of
    1049              :      * repetitions of brstring
    1050              :      */
    1051              :     assert(end > begin);
    1052          141 :     tlen = end - begin;
    1053          141 :     if (tlen % brlen != 0)
    1054            5 :         return REG_NOMATCH;
    1055          136 :     numreps = tlen / brlen;
    1056          136 :     if (numreps < min || (numreps > max && max != DUPINF))
    1057            3 :         return REG_NOMATCH;
    1058              : 
    1059              :     /* okay, compare the actual string contents */
    1060          133 :     p = begin;
    1061          236 :     while (numreps-- > 0)
    1062              :     {
    1063          151 :         if ((*v->g->compare) (brstring, p, brlen) != 0)
    1064           48 :             return REG_NOMATCH;
    1065          103 :         p += brlen;
    1066              :     }
    1067              : 
    1068              :     MDEBUG(("%d: backref matched\n", t->id));
    1069           85 :     return REG_OKAY;
    1070              : }
    1071              : 
    1072              : /*
    1073              :  * caltdissect - dissect match for alternation node
    1074              :  */
    1075              : static int                      /* regexec return code */
    1076           80 : caltdissect(struct vars *v,
    1077              :             struct subre *t,
    1078              :             chr *begin,         /* beginning of relevant substring */
    1079              :             chr *end)           /* end of same */
    1080              : {
    1081              :     struct dfa *d;
    1082              :     int         er;
    1083              : 
    1084              :     assert(t->op == '|');
    1085              : 
    1086           80 :     t = t->child;
    1087              :     /* there should be at least 2 alternatives */
    1088              :     assert(t != NULL && t->sibling != NULL);
    1089              : 
    1090          137 :     while (t != NULL)
    1091              :     {
    1092              :         assert(t->cnfa.nstates > 0);
    1093              : 
    1094              :         MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1095              : 
    1096          111 :         d = getsubdfa(v, t);
    1097          111 :         NOERR();
    1098          111 :         if (longest(v, d, begin, end, (int *) NULL) == end)
    1099              :         {
    1100              :             MDEBUG(("%d: caltdissect matched\n", t->id));
    1101           88 :             er = cdissect(v, t, begin, end);
    1102           88 :             if (er != REG_NOMATCH)
    1103           54 :                 return er;
    1104              :         }
    1105           57 :         NOERR();
    1106              : 
    1107           57 :         t = t->sibling;
    1108              :     }
    1109              : 
    1110           26 :     return REG_NOMATCH;
    1111              : }
    1112              : 
    1113              : /*
    1114              :  * citerdissect - dissect match for iteration node
    1115              :  */
    1116              : static int                      /* regexec return code */
    1117          117 : citerdissect(struct vars *v,
    1118              :              struct subre *t,
    1119              :              chr *begin,        /* beginning of relevant substring */
    1120              :              chr *end)          /* end of same */
    1121              : {
    1122              :     struct dfa *d;
    1123              :     chr       **endpts;
    1124              :     chr        *limit;
    1125              :     int         min_matches;
    1126              :     size_t      max_matches;
    1127              :     int         nverified;
    1128              :     int         k;
    1129              :     int         i;
    1130              :     int         er;
    1131              : 
    1132              :     assert(t->op == '*');
    1133              :     assert(t->child != NULL && t->child->cnfa.nstates > 0);
    1134              :     assert(!(t->child->flags & SHORTER));
    1135              :     assert(begin <= end);
    1136              : 
    1137              :     MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1138              : 
    1139              :     /*
    1140              :      * For the moment, assume the minimum number of matches is 1.  If zero
    1141              :      * matches are allowed, and the target string is empty, we are allowed to
    1142              :      * match regardless of the contents of the iter node --- but we would
    1143              :      * prefer to match once, so that capturing parens get set.  (An example of
    1144              :      * the concern here is a pattern like "()*\1", which historically this
    1145              :      * code has allowed to succeed.)  Therefore, we deal with the zero-matches
    1146              :      * case at the bottom, after failing to find any other way to match.
    1147              :      */
    1148          117 :     min_matches = t->min;
    1149          117 :     if (min_matches <= 0)
    1150           81 :         min_matches = 1;
    1151              : 
    1152              :     /*
    1153              :      * We need workspace to track the endpoints of each sub-match.  Normally
    1154              :      * we consider only nonzero-length sub-matches, so there can be at most
    1155              :      * end-begin of them.  However, if min is larger than that, we will also
    1156              :      * consider zero-length sub-matches in order to find enough matches.
    1157              :      *
    1158              :      * For convenience, endpts[0] contains the "begin" pointer and we store
    1159              :      * sub-match endpoints in endpts[1..max_matches].
    1160              :      */
    1161          117 :     max_matches = end - begin;
    1162          117 :     if (max_matches > t->max && t->max != DUPINF)
    1163            0 :         max_matches = t->max;
    1164          117 :     if (max_matches < min_matches)
    1165           73 :         max_matches = min_matches;
    1166          117 :     endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
    1167          117 :     if (endpts == NULL)
    1168            0 :         return REG_ESPACE;
    1169          117 :     endpts[0] = begin;
    1170              : 
    1171          117 :     d = getsubdfa(v, t->child);
    1172          117 :     if (ISERR())
    1173              :     {
    1174            0 :         FREE(endpts);
    1175            0 :         return v->err;
    1176              :     }
    1177              : 
    1178              :     /*
    1179              :      * Our strategy is to first find a set of sub-match endpoints that are
    1180              :      * valid according to the child node's DFA, and then recursively dissect
    1181              :      * each sub-match to confirm validity.  If any validity check fails,
    1182              :      * backtrack that sub-match and try again.  And, when we next try for a
    1183              :      * validity check, we need not recheck any successfully verified
    1184              :      * sub-matches that we didn't move the endpoints of.  nverified remembers
    1185              :      * how many sub-matches are currently known okay.
    1186              :      */
    1187              : 
    1188              :     /* initialize to consider first sub-match */
    1189          117 :     nverified = 0;
    1190          117 :     k = 1;
    1191          117 :     limit = end;
    1192              : 
    1193              :     /* iterate until satisfaction or failure */
    1194          503 :     while (k > 0)
    1195              :     {
    1196              :         /* try to find an endpoint for the k'th sub-match */
    1197          407 :         endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
    1198          407 :         if (ISERR())
    1199              :         {
    1200            0 :             FREE(endpts);
    1201            0 :             return v->err;
    1202              :         }
    1203          407 :         if (endpts[k] == NULL)
    1204              :         {
    1205              :             /* no match possible, so see if we can shorten previous one */
    1206          212 :             k--;
    1207          212 :             goto backtrack;
    1208              :         }
    1209              :         MDEBUG(("%d: working endpoint %d: %ld\n",
    1210              :                 t->id, k, LOFF(endpts[k])));
    1211              : 
    1212              :         /* k'th sub-match can no longer be considered verified */
    1213          195 :         if (nverified >= k)
    1214            8 :             nverified = k - 1;
    1215              : 
    1216          195 :         if (endpts[k] != end)
    1217              :         {
    1218              :             /* haven't reached end yet, try another iteration if allowed */
    1219          134 :             if (k >= max_matches)
    1220              :             {
    1221              :                 /* must try to shorten some previous match */
    1222            0 :                 k--;
    1223            0 :                 goto backtrack;
    1224              :             }
    1225              : 
    1226              :             /* reject zero-length match unless necessary to achieve min */
    1227          134 :             if (endpts[k] == endpts[k - 1] &&
    1228            0 :                 (k >= min_matches || min_matches - k < end - endpts[k]))
    1229            0 :                 goto backtrack;
    1230              : 
    1231          134 :             k++;
    1232          134 :             limit = end;
    1233          134 :             continue;
    1234              :         }
    1235              : 
    1236              :         /*
    1237              :          * We've identified a way to divide the string into k sub-matches that
    1238              :          * works so far as the child DFA can tell.  If k is an allowed number
    1239              :          * of matches, start the slow part: recurse to verify each sub-match.
    1240              :          * We always have k <= max_matches, needn't check that.
    1241              :          */
    1242           61 :         if (k < min_matches)
    1243            0 :             goto backtrack;
    1244              : 
    1245              :         MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
    1246              : 
    1247          100 :         for (i = nverified + 1; i <= k; i++)
    1248              :         {
    1249              :             /* zap any match data from a non-last iteration */
    1250           79 :             zaptreesubs(v, t->child);
    1251           79 :             er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
    1252           79 :             if (er == REG_OKAY)
    1253              :             {
    1254           39 :                 nverified = i;
    1255           39 :                 continue;
    1256              :             }
    1257           40 :             if (er == REG_NOMATCH)
    1258           40 :                 break;
    1259              :             /* oops, something failed */
    1260            0 :             FREE(endpts);
    1261            0 :             return er;
    1262              :         }
    1263              : 
    1264           61 :         if (i > k)
    1265              :         {
    1266              :             /* satisfaction */
    1267              :             MDEBUG(("%d: successful\n", t->id));
    1268           21 :             FREE(endpts);
    1269           21 :             return REG_OKAY;
    1270              :         }
    1271              : 
    1272              :         /* i'th match failed to verify, so backtrack it */
    1273           40 :         k = i;
    1274              : 
    1275          252 : backtrack:
    1276              : 
    1277              :         /*
    1278              :          * Must consider shorter versions of the k'th sub-match.  However,
    1279              :          * we'll only ask for a zero-length match if necessary.
    1280              :          */
    1281          252 :         while (k > 0)
    1282              :         {
    1283          156 :             chr        *prev_end = endpts[k - 1];
    1284              : 
    1285          156 :             if (endpts[k] > prev_end)
    1286              :             {
    1287          156 :                 limit = endpts[k] - 1;
    1288          156 :                 if (limit > prev_end ||
    1289            0 :                     (k < min_matches && min_matches - k >= end - prev_end))
    1290              :                 {
    1291              :                     /* break out of backtrack loop, continue the outer one */
    1292              :                     break;
    1293              :                 }
    1294              :             }
    1295              :             /* can't shorten k'th sub-match any more, consider previous one */
    1296            0 :             k--;
    1297              :         }
    1298              :     }
    1299              : 
    1300              :     /* all possibilities exhausted */
    1301           96 :     FREE(endpts);
    1302              : 
    1303              :     /*
    1304              :      * Now consider the possibility that we can match to a zero-length string
    1305              :      * by using zero repetitions.
    1306              :      */
    1307           96 :     if (t->min == 0 && begin == end)
    1308              :     {
    1309              :         MDEBUG(("%d: allowing zero matches\n", t->id));
    1310           68 :         return REG_OKAY;
    1311              :     }
    1312              : 
    1313              :     MDEBUG(("%d: failed\n", t->id));
    1314           28 :     return REG_NOMATCH;
    1315              : }
    1316              : 
    1317              : /*
    1318              :  * creviterdissect - dissect match for iteration node, shortest-first
    1319              :  */
    1320              : static int                      /* regexec return code */
    1321            7 : creviterdissect(struct vars *v,
    1322              :                 struct subre *t,
    1323              :                 chr *begin,     /* beginning of relevant substring */
    1324              :                 chr *end)       /* end of same */
    1325              : {
    1326              :     struct dfa *d;
    1327              :     chr       **endpts;
    1328              :     chr        *limit;
    1329              :     int         min_matches;
    1330              :     size_t      max_matches;
    1331              :     int         nverified;
    1332              :     int         k;
    1333              :     int         i;
    1334              :     int         er;
    1335              : 
    1336              :     assert(t->op == '*');
    1337              :     assert(t->child != NULL && t->child->cnfa.nstates > 0);
    1338              :     assert(t->child->flags & SHORTER);
    1339              :     assert(begin <= end);
    1340              : 
    1341              :     MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1342              : 
    1343              :     /*
    1344              :      * If zero matches are allowed, and target string is empty, just declare
    1345              :      * victory.  OTOH, if target string isn't empty, zero matches can't work
    1346              :      * so we pretend the min is 1.
    1347              :      */
    1348            7 :     min_matches = t->min;
    1349            7 :     if (min_matches <= 0)
    1350              :     {
    1351            7 :         if (begin == end)
    1352              :         {
    1353              :             MDEBUG(("%d: allowing zero matches\n", t->id));
    1354            1 :             return REG_OKAY;
    1355              :         }
    1356            6 :         min_matches = 1;
    1357              :     }
    1358              : 
    1359              :     /*
    1360              :      * We need workspace to track the endpoints of each sub-match.  Normally
    1361              :      * we consider only nonzero-length sub-matches, so there can be at most
    1362              :      * end-begin of them.  However, if min is larger than that, we will also
    1363              :      * consider zero-length sub-matches in order to find enough matches.
    1364              :      *
    1365              :      * For convenience, endpts[0] contains the "begin" pointer and we store
    1366              :      * sub-match endpoints in endpts[1..max_matches].
    1367              :      */
    1368            6 :     max_matches = end - begin;
    1369            6 :     if (max_matches > t->max && t->max != DUPINF)
    1370            6 :         max_matches = t->max;
    1371            6 :     if (max_matches < min_matches)
    1372            0 :         max_matches = min_matches;
    1373            6 :     endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
    1374            6 :     if (endpts == NULL)
    1375            0 :         return REG_ESPACE;
    1376            6 :     endpts[0] = begin;
    1377              : 
    1378            6 :     d = getsubdfa(v, t->child);
    1379            6 :     if (ISERR())
    1380              :     {
    1381            0 :         FREE(endpts);
    1382            0 :         return v->err;
    1383              :     }
    1384              : 
    1385              :     /*
    1386              :      * Our strategy is to first find a set of sub-match endpoints that are
    1387              :      * valid according to the child node's DFA, and then recursively dissect
    1388              :      * each sub-match to confirm validity.  If any validity check fails,
    1389              :      * backtrack that sub-match and try again.  And, when we next try for a
    1390              :      * validity check, we need not recheck any successfully verified
    1391              :      * sub-matches that we didn't move the endpoints of.  nverified remembers
    1392              :      * how many sub-matches are currently known okay.
    1393              :      */
    1394              : 
    1395              :     /* initialize to consider first sub-match */
    1396            6 :     nverified = 0;
    1397            6 :     k = 1;
    1398            6 :     limit = begin;
    1399              : 
    1400              :     /* iterate until satisfaction or failure */
    1401            8 :     while (k > 0)
    1402              :     {
    1403              :         /* disallow zero-length match unless necessary to achieve min */
    1404            6 :         if (limit == endpts[k - 1] &&
    1405            6 :             limit != end &&
    1406            0 :             (k >= min_matches || min_matches - k < end - limit))
    1407            6 :             limit++;
    1408              : 
    1409              :         /* if this is the last allowed sub-match, it must reach to the end */
    1410            6 :         if (k >= max_matches)
    1411            6 :             limit = end;
    1412              : 
    1413              :         /* try to find an endpoint for the k'th sub-match */
    1414            6 :         endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
    1415              :                              (chr **) NULL, (int *) NULL);
    1416            6 :         if (ISERR())
    1417              :         {
    1418            0 :             FREE(endpts);
    1419            0 :             return v->err;
    1420              :         }
    1421            6 :         if (endpts[k] == NULL)
    1422              :         {
    1423              :             /* no match possible, so see if we can lengthen previous one */
    1424            0 :             k--;
    1425            0 :             goto backtrack;
    1426              :         }
    1427              :         MDEBUG(("%d: working endpoint %d: %ld\n",
    1428              :                 t->id, k, LOFF(endpts[k])));
    1429              : 
    1430              :         /* k'th sub-match can no longer be considered verified */
    1431            6 :         if (nverified >= k)
    1432            0 :             nverified = k - 1;
    1433              : 
    1434            6 :         if (endpts[k] != end)
    1435              :         {
    1436              :             /* haven't reached end yet, try another iteration if allowed */
    1437            0 :             if (k >= max_matches)
    1438              :             {
    1439              :                 /* must try to lengthen some previous match */
    1440            0 :                 k--;
    1441            0 :                 goto backtrack;
    1442              :             }
    1443              : 
    1444            0 :             k++;
    1445            0 :             limit = endpts[k - 1];
    1446            0 :             continue;
    1447              :         }
    1448              : 
    1449              :         /*
    1450              :          * We've identified a way to divide the string into k sub-matches that
    1451              :          * works so far as the child DFA can tell.  If k is an allowed number
    1452              :          * of matches, start the slow part: recurse to verify each sub-match.
    1453              :          * We always have k <= max_matches, needn't check that.
    1454              :          */
    1455            6 :         if (k < min_matches)
    1456            0 :             goto backtrack;
    1457              : 
    1458              :         MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
    1459              : 
    1460           10 :         for (i = nverified + 1; i <= k; i++)
    1461              :         {
    1462              :             /* zap any match data from a non-last iteration */
    1463            6 :             zaptreesubs(v, t->child);
    1464            6 :             er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
    1465            6 :             if (er == REG_OKAY)
    1466              :             {
    1467            4 :                 nverified = i;
    1468            4 :                 continue;
    1469              :             }
    1470            2 :             if (er == REG_NOMATCH)
    1471            2 :                 break;
    1472              :             /* oops, something failed */
    1473            0 :             FREE(endpts);
    1474            0 :             return er;
    1475              :         }
    1476              : 
    1477            6 :         if (i > k)
    1478              :         {
    1479              :             /* satisfaction */
    1480              :             MDEBUG(("%d: successful\n", t->id));
    1481            4 :             FREE(endpts);
    1482            4 :             return REG_OKAY;
    1483              :         }
    1484              : 
    1485              :         /* i'th match failed to verify, so backtrack it */
    1486            2 :         k = i;
    1487              : 
    1488            2 : backtrack:
    1489              : 
    1490              :         /*
    1491              :          * Must consider longer versions of the k'th sub-match.
    1492              :          */
    1493            4 :         while (k > 0)
    1494              :         {
    1495            2 :             if (endpts[k] < end)
    1496              :             {
    1497            0 :                 limit = endpts[k] + 1;
    1498              :                 /* break out of backtrack loop, continue the outer one */
    1499            0 :                 break;
    1500              :             }
    1501              :             /* can't lengthen k'th sub-match any more, consider previous one */
    1502            2 :             k--;
    1503              :         }
    1504              :     }
    1505              : 
    1506              :     /* all possibilities exhausted */
    1507              :     MDEBUG(("%d: failed\n", t->id));
    1508            2 :     FREE(endpts);
    1509            2 :     return REG_NOMATCH;
    1510              : }
    1511              : 
    1512              : 
    1513              : 
    1514              : #include "rege_dfa.c"
        

Generated by: LCOV version 2.0-1