LCOV - code coverage report
Current view: top level - src/backend/regex - regexec.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 85.2 % 506 431
Test Date: 2026-05-24 12:17:26 Functions: 100.0 % 16 16
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * re_*exec and friends - match REs
       3              :  *
       4              :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
       5              :  *
       6              :  * Development of this software was funded, in part, by Cray Research Inc.,
       7              :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
       8              :  * Corporation, none of whom are responsible for the results.  The author
       9              :  * thanks all of them.
      10              :  *
      11              :  * Redistribution and use in source and binary forms -- with or without
      12              :  * modification -- are permitted for any purpose, provided that
      13              :  * redistributions in source form retain this entire copyright notice and
      14              :  * indicate the origin and nature of any modifications.
      15              :  *
      16              :  * I'd appreciate being given credit for this package in the documentation
      17              :  * of software which uses it, but that is not a requirement.
      18              :  *
      19              :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
      20              :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      21              :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      22              :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      23              :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      24              :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
      25              :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      26              :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      27              :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      28              :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      29              :  *
      30              :  * src/backend/regex/regexec.c
      31              :  *
      32              :  */
      33              : 
      34              : #include "regex/regguts.h"
      35              : 
      36              : 
      37              : 
      38              : /* lazy-DFA representation */
      39              : struct arcp
      40              : {                               /* "pointer" to an outarc */
      41              :     struct sset *ss;
      42              :     color       co;
      43              : };
      44              : 
      45              : struct sset
      46              : {                               /* state set */
      47              :     unsigned   *states;         /* pointer to bitvector */
      48              :     unsigned    hash;           /* hash of bitvector */
      49              : #define  HASH(bv, nw)    (((nw) == 1) ? *(bv) : hash(bv, nw))
      50              : #define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
      51              :         memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
      52              :     int         flags;
      53              : #define  STARTER     01         /* the initial state set */
      54              : #define  POSTSTATE   02         /* includes the goal state */
      55              : #define  LOCKED      04         /* locked in cache */
      56              : #define  NOPROGRESS  010        /* zero-progress state set */
      57              :     struct arcp ins;            /* chain of inarcs pointing here */
      58              :     chr        *lastseen;       /* last entered on arrival here */
      59              :     struct sset **outs;         /* outarc vector indexed by color */
      60              :     struct arcp *inchain;       /* chain-pointer vector for outarcs */
      61              : };
      62              : 
      63              : struct dfa
      64              : {
      65              :     int         nssets;         /* size of cache */
      66              :     int         nssused;        /* how many entries occupied yet */
      67              :     int         nstates;        /* number of states */
      68              :     int         ncolors;        /* length of outarc and inchain vectors */
      69              :     int         wordsper;       /* length of state-set bitvectors */
      70              :     struct sset *ssets;         /* state-set cache */
      71              :     unsigned   *statesarea;     /* bitvector storage */
      72              :     unsigned   *work;           /* pointer to work area within statesarea */
      73              :     struct sset **outsarea;     /* outarc-vector storage */
      74              :     struct arcp *incarea;       /* inchain storage */
      75              :     struct cnfa *cnfa;
      76              :     struct colormap *cm;
      77              :     chr        *lastpost;       /* location of last cache-flushed success */
      78              :     chr        *lastnopr;       /* location of last cache-flushed NOPROGRESS */
      79              :     struct sset *search;        /* replacement-search-pointer memory */
      80              :     int         backno;         /* if DFA for a backref, subno it refers to */
      81              :     short       backmin;        /* min repetitions for backref */
      82              :     short       backmax;        /* max repetitions for backref */
      83              :     bool        ismalloced;     /* should this struct dfa be freed? */
      84              :     bool        arraysmalloced; /* should its subsidiary arrays be freed? */
      85              : };
      86              : 
      87              : #define WORK    1               /* number of work bitvectors needed */
      88              : 
      89              : /* setup for non-malloc allocation for small cases */
      90              : #define FEWSTATES   20          /* must be less than UBITS */
      91              : #define FEWCOLORS   15
      92              : struct smalldfa
      93              : {
      94              :     struct dfa  dfa;            /* must be first */
      95              :     struct sset ssets[FEWSTATES * 2];
      96              :     unsigned    statesarea[FEWSTATES * 2 + WORK];
      97              :     struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
      98              :     struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
      99              : };
     100              : 
     101              : #define DOMALLOC    ((struct smalldfa *)NULL)   /* force malloc */
     102              : 
     103              : 
     104              : 
     105              : /* internal variables, bundled for easy passing around */
     106              : struct vars
     107              : {
     108              :     regex_t    *re;
     109              :     struct guts *g;
     110              :     int         eflags;         /* copies of arguments */
     111              :     size_t      nmatch;
     112              :     regmatch_t *pmatch;
     113              :     rm_detail_t *details;
     114              :     chr        *start;          /* start of string */
     115              :     chr        *search_start;   /* search start of string */
     116              :     chr        *stop;           /* just past end of string */
     117              :     int         err;            /* error code if any (0 none) */
     118              :     struct dfa **subdfas;       /* per-tree-subre DFAs */
     119              :     struct dfa **ladfas;        /* per-lacon-subre DFAs */
     120              :     struct sset **lblastcss;    /* per-lacon-subre lookbehind restart data */
     121              :     chr       **lblastcp;       /* per-lacon-subre lookbehind restart data */
     122              :     struct smalldfa dfa1;
     123              :     struct smalldfa dfa2;
     124              : };
     125              : 
     126              : #define VISERR(vv)  ((vv)->err != 0) /* have we seen an error yet? */
     127              : #define ISERR() VISERR(v)
     128              : #define VERR(vv,e)  ((vv)->err = ((vv)->err ? (vv)->err : (e)))
     129              : #define ERR(e)  VERR(v, e)      /* record an error */
     130              : #define NOERR() {if (ISERR()) return v->err;}    /* if error seen, return it */
     131              : #define OFF(p)  ((p) - v->start)
     132              : #define LOFF(p) ((long)OFF(p))
     133              : 
     134              : 
     135              : 
     136              : /*
     137              :  * forward declarations
     138              :  */
     139              : /* === regexec.c === */
     140              : static struct dfa *getsubdfa(struct vars *v, struct subre *t);
     141              : static struct dfa *getladfa(struct vars *v, int n);
     142              : static int  find(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
     143              : static int  cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
     144              : static int  cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm,
     145              :                       struct dfa *d, struct dfa *s, chr **coldp);
     146              : static void zapallsubs(regmatch_t *p, size_t n);
     147              : static void zaptreesubs(struct vars *v, struct subre *t);
     148              : static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end);
     149              : static int  cdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     150              : static int  ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     151              : static int  crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     152              : static int  cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     153              : static int  caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     154              : static int  citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     155              : static int  creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
     156              : 
     157              : /* === rege_dfa.c === */
     158              : static chr *longest(struct vars *v, struct dfa *d,
     159              :                     chr *start, chr *stop, int *hitstopp);
     160              : static chr *shortest(struct vars *v, struct dfa *d, chr *start, chr *min,
     161              :                      chr *max, chr **coldp, int *hitstopp);
     162              : static int  matchuntil(struct vars *v, struct dfa *d, chr *probe,
     163              :                        struct sset **lastcss, chr **lastcp);
     164              : static chr *dfa_backref(struct vars *v, struct dfa *d, chr *start,
     165              :                         chr *min, chr *max, bool shortest);
     166              : static chr *lastcold(struct vars *v, struct dfa *d);
     167              : static struct dfa *newdfa(struct vars *v, struct cnfa *cnfa,
     168              :                           struct colormap *cm, struct smalldfa *sml);
     169              : static void freedfa(struct dfa *d);
     170              : static unsigned hash(unsigned *uv, int n);
     171              : static struct sset *initialize(struct vars *v, struct dfa *d, chr *start);
     172              : static struct sset *miss(struct vars *v, struct dfa *d, struct sset *css,
     173              :                          color co, chr *cp, chr *start);
     174              : static int  lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co);
     175              : static struct sset *getvacant(struct vars *v, struct dfa *d, chr *cp,
     176              :                               chr *start);
     177              : static struct sset *pickss(struct vars *v, struct dfa *d, chr *cp,
     178              :                            chr *start);
     179              : 
     180              : 
     181              : /*
     182              :  * pg_regexec - match regular expression
     183              :  */
     184              : int
     185      5762030 : pg_regexec(regex_t *re,
     186              :            const chr *string,
     187              :            size_t len,
     188              :            size_t search_start,
     189              :            rm_detail_t *details,
     190              :            size_t nmatch,
     191              :            regmatch_t pmatch[],
     192              :            int flags)
     193              : {
     194              :     struct vars var;
     195      5762030 :     struct vars *v = &var;
     196              :     int         st;
     197              :     size_t      n;
     198              :     size_t      i;
     199              :     int         backref;
     200              : 
     201              : #define  LOCALMAT    20
     202              :     regmatch_t  mat[LOCALMAT];
     203              : 
     204              : #define  LOCALDFAS   40
     205              :     struct dfa *subdfas[LOCALDFAS];
     206              : 
     207              :     /* sanity checks */
     208      5762030 :     if (re == NULL || string == NULL || re->re_magic != REMAGIC)
     209            0 :         return REG_INVARG;
     210      5762030 :     if (re->re_csize != sizeof(chr))
     211            0 :         return REG_MIXED;
     212      5762030 :     if (search_start > len)
     213            5 :         return REG_NOMATCH;
     214              : 
     215              :     /* Initialize locale-dependent support */
     216      5762025 :     pg_set_regex_collation(re->re_collation);
     217              : 
     218              :     /* setup */
     219      5762025 :     v->re = re;
     220      5762025 :     v->g = (struct guts *) re->re_guts;
     221      5762025 :     if ((v->g->cflags & REG_EXPECT) && details == NULL)
     222            0 :         return REG_INVARG;
     223      5762025 :     if (v->g->info & REG_UIMPOSSIBLE)
     224          721 :         return REG_NOMATCH;
     225      5761304 :     backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
     226      5761304 :     v->eflags = flags;
     227      5761304 :     if (backref && nmatch <= v->g->nsub)
     228              :     {
     229              :         /* need larger work area */
     230          149 :         v->nmatch = v->g->nsub + 1;
     231          149 :         if (v->nmatch <= LOCALMAT)
     232          148 :             v->pmatch = mat;
     233              :         else
     234            1 :             v->pmatch = MALLOC_ARRAY(regmatch_t, v->nmatch);
     235          149 :         if (v->pmatch == NULL)
     236            0 :             return REG_ESPACE;
     237          149 :         zapallsubs(v->pmatch, v->nmatch);
     238              :     }
     239              :     else
     240              :     {
     241              :         /* we can store results directly in caller's array */
     242      5761155 :         v->pmatch = pmatch;
     243              :         /* ensure any extra entries in caller's array are filled with -1 */
     244      5761155 :         if (nmatch > 0)
     245       583052 :             zapallsubs(pmatch, nmatch);
     246              :         /* then forget about extra entries, to avoid useless work in find() */
     247      5761155 :         if (nmatch > v->g->nsub + 1)
     248          670 :             nmatch = v->g->nsub + 1;
     249      5761155 :         v->nmatch = nmatch;
     250              :     }
     251      5761304 :     v->details = details;
     252      5761304 :     v->start = (chr *) string;
     253      5761304 :     v->search_start = (chr *) string + search_start;
     254      5761304 :     v->stop = (chr *) string + len;
     255      5761304 :     v->err = 0;
     256      5761304 :     v->subdfas = NULL;
     257      5761304 :     v->ladfas = NULL;
     258      5761304 :     v->lblastcss = NULL;
     259      5761304 :     v->lblastcp = NULL;
     260              :     /* below this point, "goto cleanup" will behave sanely */
     261              : 
     262              :     assert(v->g->ntree >= 0);
     263      5761304 :     n = (size_t) v->g->ntree;
     264      5761304 :     if (n <= LOCALDFAS)
     265      5761302 :         v->subdfas = subdfas;
     266              :     else
     267              :     {
     268              :         /* ntree is surely less than the number of states, so this is safe: */
     269            2 :         v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
     270            2 :         if (v->subdfas == NULL)
     271              :         {
     272            0 :             st = REG_ESPACE;
     273            0 :             goto cleanup;
     274              :         }
     275              :     }
     276     17305646 :     for (i = 0; i < n; i++)
     277     11544342 :         v->subdfas[i] = NULL;
     278              : 
     279              :     assert(v->g->nlacons >= 0);
     280      5761304 :     n = (size_t) v->g->nlacons;
     281      5761304 :     if (n > 0)
     282              :     {
     283              :         /* nlacons is surely less than the number of arcs, so this is safe: */
     284         1040 :         v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
     285         1040 :         if (v->ladfas == NULL)
     286              :         {
     287            0 :             st = REG_ESPACE;
     288            0 :             goto cleanup;
     289              :         }
     290         3168 :         for (i = 0; i < n; i++)
     291         2128 :             v->ladfas[i] = NULL;
     292         1040 :         v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
     293         1040 :         v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
     294         1040 :         if (v->lblastcss == NULL || v->lblastcp == NULL)
     295              :         {
     296            0 :             st = REG_ESPACE;
     297            0 :             goto cleanup;
     298              :         }
     299         3168 :         for (i = 0; i < n; i++)
     300              :         {
     301         2128 :             v->lblastcss[i] = NULL;
     302         2128 :             v->lblastcp[i] = NULL;
     303              :         }
     304              :     }
     305              : 
     306              :     /* do it */
     307              :     assert(v->g->tree != NULL);
     308      5761304 :     if (backref)
     309          208 :         st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
     310              :     else
     311      5761096 :         st = find(v, &v->g->tree->cnfa, &v->g->cmap);
     312              : 
     313              :     /* on success, ensure caller's match vector is filled correctly */
     314      5761304 :     if (st == REG_OKAY && nmatch > 0)
     315              :     {
     316       470605 :         if (v->pmatch != pmatch)
     317              :         {
     318              :             /* copy portion of match vector over from (larger) work area */
     319              :             assert(nmatch <= v->nmatch);
     320           33 :             memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
     321              :         }
     322       470605 :         if (v->g->cflags & REG_NOSUB)
     323              :         {
     324              :             /* don't expose possibly-partial sub-match results to caller */
     325       466736 :             zapallsubs(pmatch, nmatch);
     326              :         }
     327              :     }
     328              : 
     329              :     /* clean up */
     330      5294568 : cleanup:
     331      5761304 :     if (v->pmatch != pmatch && v->pmatch != mat)
     332            1 :         FREE(v->pmatch);
     333      5761304 :     if (v->subdfas != NULL)
     334              :     {
     335      5761304 :         n = (size_t) v->g->ntree;
     336     17305646 :         for (i = 0; i < n; i++)
     337              :         {
     338     11544342 :             if (v->subdfas[i] != NULL)
     339        16880 :                 freedfa(v->subdfas[i]);
     340              :         }
     341      5761304 :         if (v->subdfas != subdfas)
     342            2 :             FREE(v->subdfas);
     343              :     }
     344      5761304 :     if (v->ladfas != NULL)
     345              :     {
     346         1040 :         n = (size_t) v->g->nlacons;
     347         3168 :         for (i = 0; i < n; i++)
     348              :         {
     349         2128 :             if (v->ladfas[i] != NULL)
     350           75 :                 freedfa(v->ladfas[i]);
     351              :         }
     352         1040 :         FREE(v->ladfas);
     353              :     }
     354      5761304 :     if (v->lblastcss != NULL)
     355         1040 :         FREE(v->lblastcss);
     356      5761304 :     if (v->lblastcp != NULL)
     357         1040 :         FREE(v->lblastcp);
     358              : 
     359              : #ifdef REG_DEBUG
     360              :     if (v->eflags & (REG_FTRACE | REG_MTRACE))
     361              :         fflush(stdout);
     362              : #endif
     363              : 
     364      5761304 :     return st;
     365              : }
     366              : 
     367              : /*
     368              :  * getsubdfa - create or re-fetch the DFA for a tree subre node
     369              :  *
     370              :  * We only need to create the DFA once per overall regex execution.
     371              :  * The DFA will be freed by the cleanup step in pg_regexec().
     372              :  */
     373              : static struct dfa *
     374        17864 : getsubdfa(struct vars *v,
     375              :           struct subre *t)
     376              : {
     377        17864 :     struct dfa *d = v->subdfas[t->id];
     378              : 
     379        17864 :     if (d == NULL)
     380              :     {
     381        16880 :         d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
     382        16880 :         if (d == NULL)
     383            0 :             return NULL;
     384              :         /* set up additional info if this is a backref node */
     385        16880 :         if (t->op == 'b')
     386              :         {
     387          199 :             d->backno = t->backno;
     388          199 :             d->backmin = t->min;
     389          199 :             d->backmax = t->max;
     390              :         }
     391        16880 :         v->subdfas[t->id] = d;
     392              :     }
     393        17864 :     return d;
     394              : }
     395              : 
     396              : /*
     397              :  * getladfa - create or re-fetch the DFA for a LACON subre node
     398              :  *
     399              :  * Same as above, but for LACONs.
     400              :  */
     401              : static struct dfa *
     402          158 : getladfa(struct vars *v,
     403              :          int n)
     404              : {
     405              :     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
     406              : 
     407          158 :     if (v->ladfas[n] == NULL)
     408              :     {
     409           75 :         struct subre *sub = &v->g->lacons[n];
     410              : 
     411           75 :         v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
     412              :         /* a LACON can't contain a backref, so nothing else to do */
     413              :     }
     414          158 :     return v->ladfas[n];
     415              : }
     416              : 
     417              : /*
     418              :  * find - find a match for the main NFA (no-complications case)
     419              :  */
     420              : static int
     421      5761096 : find(struct vars *v,
     422              :      struct cnfa *cnfa,
     423              :      struct colormap *cm)
     424              : {
     425              :     struct dfa *s;
     426              :     struct dfa *d;
     427              :     chr        *begin;
     428      5761096 :     chr        *end = NULL;
     429              :     chr        *cold;
     430              :     chr        *open;           /* open and close of range of possible starts */
     431              :     chr        *close;
     432              :     int         hitend;
     433      5761096 :     int         shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
     434              : 
     435              :     /* first, a shot with the search RE */
     436      5761096 :     s = newdfa(v, &v->g->search, cm, &v->dfa1);
     437      5761096 :     if (s == NULL)
     438            0 :         return v->err;
     439              :     MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
     440      5761096 :     cold = NULL;
     441      5761096 :     close = shortest(v, s, v->search_start, v->search_start, v->stop,
     442              :                      &cold, (int *) NULL);
     443      5761096 :     freedfa(s);
     444      5761096 :     NOERR();
     445      5761096 :     if (v->g->cflags & REG_EXPECT)
     446              :     {
     447              :         assert(v->details != NULL);
     448           27 :         if (cold != NULL)
     449           27 :             v->details->rm_extend.rm_so = OFF(cold);
     450              :         else
     451            0 :             v->details->rm_extend.rm_so = OFF(v->stop);
     452           27 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     453              :     }
     454      5761096 :     if (close == NULL)          /* not found */
     455      5160786 :         return REG_NOMATCH;
     456       600310 :     if (v->nmatch == 0)          /* found, don't need exact location */
     457       129779 :         return REG_OKAY;
     458              : 
     459              :     /* find starting point and match */
     460              :     assert(cold != NULL);
     461       470531 :     open = cold;
     462       470531 :     cold = NULL;
     463              :     MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
     464       470531 :     d = newdfa(v, cnfa, cm, &v->dfa1);
     465       470531 :     if (d == NULL)
     466            0 :         return v->err;
     467       470562 :     for (begin = open; begin <= close; begin++)
     468              :     {
     469              :         MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
     470       470562 :         if (shorter)
     471           86 :             end = shortest(v, d, begin, begin, v->stop,
     472              :                            (chr **) NULL, &hitend);
     473              :         else
     474       470476 :             end = longest(v, d, begin, v->stop, &hitend);
     475       470562 :         if (ISERR())
     476              :         {
     477            0 :             freedfa(d);
     478            0 :             return v->err;
     479              :         }
     480       470562 :         if (hitend && cold == NULL)
     481         4775 :             cold = begin;
     482       470562 :         if (end != NULL)
     483       470531 :             break;              /* NOTE BREAK OUT */
     484              :     }
     485              :     assert(end != NULL);        /* search RE succeeded so loop should */
     486       470531 :     freedfa(d);
     487              : 
     488              :     /* and pin down details */
     489              :     assert(v->nmatch > 0);
     490       470531 :     v->pmatch[0].rm_so = OFF(begin);
     491       470531 :     v->pmatch[0].rm_eo = OFF(end);
     492       470531 :     if (v->g->cflags & REG_EXPECT)
     493              :     {
     494           10 :         if (cold != NULL)
     495            5 :             v->details->rm_extend.rm_so = OFF(cold);
     496              :         else
     497            5 :             v->details->rm_extend.rm_so = OFF(v->stop);
     498           10 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     499              :     }
     500       470531 :     if (v->nmatch == 1)          /* no need for submatches */
     501       467537 :         return REG_OKAY;
     502              : 
     503              :     /* find submatches */
     504         2994 :     return cdissect(v, v->g->tree, begin, end);
     505              : }
     506              : 
     507              : /*
     508              :  * cfind - find a match for the main NFA (with complications)
     509              :  */
     510              : static int
     511          208 : cfind(struct vars *v,
     512              :       struct cnfa *cnfa,
     513              :       struct colormap *cm)
     514              : {
     515              :     struct dfa *s;
     516              :     struct dfa *d;
     517              :     chr        *cold;
     518              :     int         ret;
     519              : 
     520          208 :     s = newdfa(v, &v->g->search, cm, &v->dfa1);
     521          208 :     if (s == NULL)
     522            0 :         return v->err;
     523          208 :     d = newdfa(v, cnfa, cm, &v->dfa2);
     524          208 :     if (d == NULL)
     525              :     {
     526            0 :         freedfa(s);
     527            0 :         return v->err;
     528              :     }
     529              : 
     530          208 :     ret = cfindloop(v, cnfa, cm, d, s, &cold);
     531              : 
     532          208 :     freedfa(d);
     533          208 :     freedfa(s);
     534          208 :     NOERR();
     535          208 :     if (v->g->cflags & REG_EXPECT)
     536              :     {
     537              :         assert(v->details != NULL);
     538            0 :         if (cold != NULL)
     539            0 :             v->details->rm_extend.rm_so = OFF(cold);
     540              :         else
     541            0 :             v->details->rm_extend.rm_so = OFF(v->stop);
     542            0 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     543              :     }
     544          208 :     return ret;
     545              : }
     546              : 
     547              : /*
     548              :  * cfindloop - the heart of cfind
     549              :  */
     550              : static int
     551          208 : cfindloop(struct vars *v,
     552              :           struct cnfa *cnfa,
     553              :           struct colormap *cm,
     554              :           struct dfa *d,
     555              :           struct dfa *s,
     556              :           chr **coldp)          /* where to put coldstart pointer */
     557              : {
     558              :     chr        *begin;
     559              :     chr        *end;
     560              :     chr        *cold;
     561              :     chr        *open;           /* open and close of range of possible starts */
     562              :     chr        *close;
     563              :     chr        *estart;
     564              :     chr        *estop;
     565              :     int         er;
     566          208 :     int         shorter = v->g->tree->flags & SHORTER;
     567              :     int         hitend;
     568              : 
     569              :     assert(d != NULL && s != NULL);
     570          208 :     cold = NULL;
     571          208 :     close = v->search_start;
     572              :     do
     573              :     {
     574              :         /* Search with the search RE for match range at/beyond "close" */
     575              :         MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
     576          232 :         close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
     577          232 :         if (ISERR())
     578              :         {
     579            0 :             *coldp = cold;
     580            0 :             return v->err;
     581              :         }
     582          232 :         if (close == NULL)
     583           23 :             break;              /* no more possible match anywhere */
     584              :         assert(cold != NULL);
     585          209 :         open = cold;
     586          209 :         cold = NULL;
     587              :         /* Search for matches starting between "open" and "close" inclusive */
     588              :         MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
     589          749 :         for (begin = open; begin <= close; begin++)
     590              :         {
     591              :             MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
     592          659 :             estart = begin;
     593          659 :             estop = v->stop;
     594              :             for (;;)
     595              :             {
     596              :                 /* Here we use the top node's detailed RE */
     597          942 :                 if (shorter)
     598          141 :                     end = shortest(v, d, begin, estart,
     599              :                                    estop, (chr **) NULL, &hitend);
     600              :                 else
     601          801 :                     end = longest(v, d, begin, estop,
     602              :                                   &hitend);
     603          942 :                 if (ISERR())
     604              :                 {
     605            0 :                     *coldp = cold;
     606            0 :                     return v->err;
     607              :                 }
     608          942 :                 if (hitend && cold == NULL)
     609          147 :                     cold = begin;
     610          942 :                 if (end == NULL)
     611          513 :                     break;      /* no match with this begin point, try next */
     612              :                 MDEBUG(("tentative end %ld\n", LOFF(end)));
     613              :                 /* Dissect the potential match to see if it really matches */
     614          429 :                 er = cdissect(v, v->g->tree, begin, end);
     615          429 :                 if (er == REG_OKAY)
     616              :                 {
     617          119 :                     if (v->nmatch > 0)
     618              :                     {
     619          119 :                         v->pmatch[0].rm_so = OFF(begin);
     620          119 :                         v->pmatch[0].rm_eo = OFF(end);
     621              :                     }
     622          119 :                     *coldp = cold;
     623          119 :                     return REG_OKAY;
     624              :                 }
     625          310 :                 if (er != REG_NOMATCH)
     626              :                 {
     627            0 :                     ERR(er);
     628            0 :                     *coldp = cold;
     629            0 :                     return er;
     630              :                 }
     631              :                 /* Try next longer/shorter match with same begin point */
     632          310 :                 if (shorter)
     633              :                 {
     634          119 :                     if (end == estop)
     635           17 :                         break;  /* no more, so try next begin point */
     636          102 :                     estart = end + 1;
     637              :                 }
     638              :                 else
     639              :                 {
     640          191 :                     if (end == begin)
     641           10 :                         break;  /* no more, so try next begin point */
     642          181 :                     estop = end - 1;
     643              :                 }
     644              :             }                   /* end loop over endpoint positions */
     645              :         }                       /* end loop over beginning positions */
     646              : 
     647              :         /*
     648              :          * If we get here, there is no possible match starting at or before
     649              :          * "close", so consider matches beyond that.  We'll do a fresh search
     650              :          * with the search RE to find a new promising match range.
     651              :          */
     652           90 :         close++;
     653           90 :     } while (close < v->stop);
     654              : 
     655           89 :     *coldp = cold;
     656           89 :     return REG_NOMATCH;
     657              : }
     658              : 
     659              : /*
     660              :  * zapallsubs - initialize all subexpression matches to "no match"
     661              :  *
     662              :  * Note that p[0], the overall-match location, is not touched.
     663              :  */
     664              : static void
     665      1049937 : zapallsubs(regmatch_t *p,
     666              :            size_t n)
     667              : {
     668              :     size_t      i;
     669              : 
     670      1060993 :     for (i = n - 1; i > 0; i--)
     671              :     {
     672        11056 :         p[i].rm_so = -1;
     673        11056 :         p[i].rm_eo = -1;
     674              :     }
     675      1049937 : }
     676              : 
     677              : /*
     678              :  * zaptreesubs - initialize subexpressions within subtree to "no match"
     679              :  */
     680              : static void
     681          739 : zaptreesubs(struct vars *v,
     682              :             struct subre *t)
     683              : {
     684          739 :     int         n = t->capno;
     685              :     struct subre *t2;
     686              : 
     687          739 :     if (n > 0)
     688              :     {
     689          328 :         if ((size_t) n < v->nmatch)
     690              :         {
     691          328 :             v->pmatch[n].rm_so = -1;
     692          328 :             v->pmatch[n].rm_eo = -1;
     693              :         }
     694              :     }
     695              : 
     696          988 :     for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
     697          249 :         zaptreesubs(v, t2);
     698          739 : }
     699              : 
     700              : /*
     701              :  * subset - set subexpression match data for a successful subre
     702              :  */
     703              : static void
     704         5539 : subset(struct vars *v,
     705              :        struct subre *sub,
     706              :        chr *begin,
     707              :        chr *end)
     708              : {
     709         5539 :     int         n = sub->capno;
     710              : 
     711              :     assert(n > 0);
     712         5539 :     if ((size_t) n >= v->nmatch)
     713           15 :         return;
     714              : 
     715              :     MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
     716         5524 :     v->pmatch[n].rm_so = OFF(begin);
     717         5524 :     v->pmatch[n].rm_eo = OFF(end);
     718              : }
     719              : 
     720              : /*
     721              :  * cdissect - check backrefs and determine subexpression matches
     722              :  *
     723              :  * cdissect recursively processes a subre tree to check matching of backrefs
     724              :  * and/or identify submatch boundaries for capture nodes.  The proposed match
     725              :  * runs from "begin" to "end" (not including "end"), and we are basically
     726              :  * "dissecting" it to see where the submatches are.
     727              :  *
     728              :  * Before calling any level of cdissect, the caller must have run the node's
     729              :  * DFA and found that the proposed substring satisfies the DFA.  (We make
     730              :  * the caller do that because in concatenation and iteration nodes, it's
     731              :  * much faster to check all the substrings against the child DFAs before we
     732              :  * recurse.)
     733              :  *
     734              :  * A side-effect of a successful match is to save match locations for
     735              :  * capturing subexpressions in v->pmatch[].  This is a little bit tricky,
     736              :  * so we make the following rules:
     737              :  * 1. Before initial entry to cdissect, all match data must have been
     738              :  *    cleared (this is seen to by zapallsubs).
     739              :  * 2. Before any recursive entry to cdissect, the match data for that
     740              :  *    subexpression tree must be guaranteed clear (see zaptreesubs).
     741              :  * 3. When returning REG_OKAY, each level of cdissect will have saved
     742              :  *    any relevant match locations.
     743              :  * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
     744              :  *    that its subexpression match locations are again clear.
     745              :  * 5. No guarantees are made for error cases (i.e., other result codes).
     746              :  * 6. When a level of cdissect abandons a successful sub-match, it will
     747              :  *    clear that subtree's match locations with zaptreesubs before trying
     748              :  *    any new DFA match or cdissect call for that subtree or any subtree
     749              :  *    to its right (that is, any subtree that could have a backref into the
     750              :  *    abandoned match).
     751              :  * This may seem overly complicated, but it's difficult to simplify it
     752              :  * because of the provision that match locations must be reset before
     753              :  * any fresh DFA match (a rule that is needed to make dfa_backref safe).
     754              :  * That means it won't work to just reset relevant match locations at the
     755              :  * start of each cdissect level.
     756              :  */
     757              : static int                      /* regexec return code */
     758        20816 : cdissect(struct vars *v,
     759              :          struct subre *t,
     760              :          chr *begin,            /* beginning of relevant substring */
     761              :          chr *end)              /* end of same */
     762              : {
     763              :     int         er;
     764              : 
     765              :     assert(t != NULL);
     766              :     MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
     767              : 
     768              :     /* handy place to check for operation cancel */
     769        20816 :     INTERRUPT(v->re);
     770              :     /* ... and stack overrun */
     771        20816 :     if (STACK_TOO_DEEP(v->re))
     772            0 :         return REG_ETOOBIG;
     773              : 
     774        20816 :     switch (t->op)
     775              :     {
     776        11432 :         case '=':               /* terminal node */
     777              :             assert(t->child == NULL);
     778        11432 :             er = REG_OKAY;      /* no action, parent did the work */
     779        11432 :             break;
     780          303 :         case 'b':               /* back reference */
     781              :             assert(t->child == NULL);
     782          303 :             er = cbrdissect(v, t, begin, end);
     783          303 :             break;
     784         8769 :         case '.':               /* concatenation */
     785              :             assert(t->child != NULL);
     786         8769 :             if (t->child->flags & SHORTER)    /* reverse scan */
     787          237 :                 er = crevcondissect(v, t, begin, end);
     788              :             else
     789         8532 :                 er = ccondissect(v, t, begin, end);
     790         8769 :             break;
     791          113 :         case '|':               /* alternation */
     792              :             assert(t->child != NULL);
     793          113 :             er = caltdissect(v, t, begin, end);
     794          113 :             break;
     795          167 :         case '*':               /* iteration */
     796              :             assert(t->child != NULL);
     797          167 :             if (t->child->flags & SHORTER)    /* reverse scan */
     798            8 :                 er = creviterdissect(v, t, begin, end);
     799              :             else
     800          159 :                 er = citerdissect(v, t, begin, end);
     801          167 :             break;
     802           32 :         case '(':               /* no-op capture node */
     803              :             assert(t->child != NULL);
     804           32 :             er = cdissect(v, t->child, begin, end);
     805           32 :             break;
     806            0 :         default:
     807            0 :             er = REG_ASSERT;
     808            0 :             break;
     809              :     }
     810              : 
     811              :     /*
     812              :      * We should never have a match failure unless backrefs lurk below;
     813              :      * otherwise, either caller failed to check the DFA, or there's some
     814              :      * inconsistency between the DFA and the node's innards.
     815              :      */
     816              :     assert(er != REG_NOMATCH || (t->flags & BACKR));
     817              : 
     818              :     /*
     819              :      * If this node is marked as capturing, save successful match's location.
     820              :      */
     821        20816 :     if (t->capno > 0 && er == REG_OKAY)
     822         5539 :         subset(v, t, begin, end);
     823              : 
     824        20816 :     return er;
     825              : }
     826              : 
     827              : /*
     828              :  * ccondissect - dissect match for concatenation node
     829              :  */
     830              : static int                      /* regexec return code */
     831         8532 : ccondissect(struct vars *v,
     832              :             struct subre *t,
     833              :             chr *begin,         /* beginning of relevant substring */
     834              :             chr *end)           /* end of same */
     835              : {
     836         8532 :     struct subre *left = t->child;
     837         8532 :     struct subre *right = left->sibling;
     838              :     struct dfa *d;
     839              :     struct dfa *d2;
     840              :     chr        *mid;
     841              :     int         er;
     842              : 
     843              :     assert(t->op == '.');
     844              :     assert(left != NULL && left->cnfa.nstates > 0);
     845              :     assert(right != NULL && right->cnfa.nstates > 0);
     846              :     assert(right->sibling == NULL);
     847              :     assert(!(left->flags & SHORTER));
     848              : 
     849         8532 :     d = getsubdfa(v, left);
     850         8532 :     NOERR();
     851         8532 :     d2 = getsubdfa(v, right);
     852         8532 :     NOERR();
     853              :     MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
     854              : 
     855              :     /* pick a tentative midpoint */
     856         8532 :     mid = longest(v, d, begin, end, (int *) NULL);
     857         8532 :     NOERR();
     858         8532 :     if (mid == NULL)
     859            8 :         return REG_NOMATCH;
     860              :     MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
     861              : 
     862              :     /* iterate until satisfaction or failure */
     863              :     for (;;)
     864              :     {
     865              :         /* try this midpoint on for size */
     866        10116 :         if (longest(v, d2, mid, end, (int *) NULL) == end)
     867              :         {
     868         8467 :             er = cdissect(v, left, begin, mid);
     869         8467 :             if (er == REG_OKAY)
     870              :             {
     871         8393 :                 er = cdissect(v, right, mid, end);
     872         8393 :                 if (er == REG_OKAY)
     873              :                 {
     874              :                     /* satisfaction */
     875              :                     MDEBUG(("%d: successful\n", t->id));
     876         8035 :                     return REG_OKAY;
     877              :                 }
     878              :                 /* Reset left's matches (right should have done so itself) */
     879          358 :                 zaptreesubs(v, left);
     880              :             }
     881          432 :             if (er != REG_NOMATCH)
     882            0 :                 return er;
     883              :         }
     884         2081 :         NOERR();
     885              : 
     886              :         /* that midpoint didn't work, find a new one */
     887         2081 :         if (mid == begin)
     888              :         {
     889              :             /* all possibilities exhausted */
     890              :             MDEBUG(("%d: no midpoint\n", t->id));
     891          105 :             return REG_NOMATCH;
     892              :         }
     893         1976 :         mid = longest(v, d, begin, mid - 1, (int *) NULL);
     894         1976 :         NOERR();
     895         1976 :         if (mid == NULL)
     896              :         {
     897              :             /* failed to find a new one */
     898              :             MDEBUG(("%d: failed midpoint\n", t->id));
     899          384 :             return REG_NOMATCH;
     900              :         }
     901              :         MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
     902              :     }
     903              : 
     904              :     /* can't get here */
     905              :     return REG_ASSERT;
     906              : }
     907              : 
     908              : /*
     909              :  * crevcondissect - dissect match for concatenation node, shortest-first
     910              :  */
     911              : static int                      /* regexec return code */
     912          237 : crevcondissect(struct vars *v,
     913              :                struct subre *t,
     914              :                chr *begin,      /* beginning of relevant substring */
     915              :                chr *end)        /* end of same */
     916              : {
     917          237 :     struct subre *left = t->child;
     918          237 :     struct subre *right = left->sibling;
     919              :     struct dfa *d;
     920              :     struct dfa *d2;
     921              :     chr        *mid;
     922              :     int         er;
     923              : 
     924              :     assert(t->op == '.');
     925              :     assert(left != NULL && left->cnfa.nstates > 0);
     926              :     assert(right != NULL && right->cnfa.nstates > 0);
     927              :     assert(right->sibling == NULL);
     928              :     assert(left->flags & SHORTER);
     929              : 
     930          237 :     d = getsubdfa(v, left);
     931          237 :     NOERR();
     932          237 :     d2 = getsubdfa(v, right);
     933          237 :     NOERR();
     934              :     MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
     935              : 
     936              :     /* pick a tentative midpoint */
     937          237 :     mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
     938          237 :     NOERR();
     939          237 :     if (mid == NULL)
     940            0 :         return REG_NOMATCH;
     941              :     MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
     942              : 
     943              :     /* iterate until satisfaction or failure */
     944              :     for (;;)
     945              :     {
     946              :         /* try this midpoint on for size */
     947          853 :         if (longest(v, d2, mid, end, (int *) NULL) == end)
     948              :         {
     949          128 :             er = cdissect(v, left, begin, mid);
     950          128 :             if (er == REG_OKAY)
     951              :             {
     952          128 :                 er = cdissect(v, right, mid, end);
     953          128 :                 if (er == REG_OKAY)
     954              :                 {
     955              :                     /* satisfaction */
     956              :                     MDEBUG(("%d: successful\n", t->id));
     957          116 :                     return REG_OKAY;
     958              :                 }
     959              :                 /* Reset left's matches (right should have done so itself) */
     960           12 :                 zaptreesubs(v, left);
     961              :             }
     962           12 :             if (er != REG_NOMATCH)
     963            0 :                 return er;
     964              :         }
     965          737 :         NOERR();
     966              : 
     967              :         /* that midpoint didn't work, find a new one */
     968          737 :         if (mid == end)
     969              :         {
     970              :             /* all possibilities exhausted */
     971              :             MDEBUG(("%d: no midpoint\n", t->id));
     972          121 :             return REG_NOMATCH;
     973              :         }
     974          616 :         mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
     975          616 :         NOERR();
     976          616 :         if (mid == NULL)
     977              :         {
     978              :             /* failed to find a new one */
     979              :             MDEBUG(("%d: failed midpoint\n", t->id));
     980            0 :             return REG_NOMATCH;
     981              :         }
     982              :         MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
     983              :     }
     984              : 
     985              :     /* can't get here */
     986              :     return REG_ASSERT;
     987              : }
     988              : 
     989              : /*
     990              :  * cbrdissect - dissect match for backref node
     991              :  *
     992              :  * The backref match might already have been verified by dfa_backref(),
     993              :  * but we don't know that for sure so must check it here.
     994              :  */
     995              : static int                      /* regexec return code */
     996          303 : cbrdissect(struct vars *v,
     997              :            struct subre *t,
     998              :            chr *begin,          /* beginning of relevant substring */
     999              :            chr *end)            /* end of same */
    1000              : {
    1001          303 :     int         n = t->backno;
    1002              :     size_t      numreps;
    1003              :     size_t      tlen;
    1004              :     size_t      brlen;
    1005              :     chr        *brstring;
    1006              :     chr        *p;
    1007          303 :     int         min = t->min;
    1008          303 :     int         max = t->max;
    1009              : 
    1010              :     assert(t != NULL);
    1011              :     assert(t->op == 'b');
    1012              :     assert(n >= 0);
    1013              :     assert((size_t) n < v->nmatch);
    1014              : 
    1015              :     MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
    1016              :             LOFF(begin), LOFF(end)));
    1017              : 
    1018              :     /* get the backreferenced string */
    1019          303 :     if (v->pmatch[n].rm_so == -1)
    1020           83 :         return REG_NOMATCH;
    1021          220 :     brstring = v->start + v->pmatch[n].rm_so;
    1022          220 :     brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
    1023              : 
    1024              :     /* special cases for zero-length strings */
    1025          220 :     if (brlen == 0)
    1026              :     {
    1027              :         /*
    1028              :          * matches only if target is zero length, but any number of
    1029              :          * repetitions can be considered to be present
    1030              :          */
    1031           11 :         if (begin == end && min <= max)
    1032              :         {
    1033              :             MDEBUG(("%d: backref matched trivially\n", t->id));
    1034           11 :             return REG_OKAY;
    1035              :         }
    1036            0 :         return REG_NOMATCH;
    1037              :     }
    1038          209 :     if (begin == end)
    1039              :     {
    1040              :         /* matches only if zero repetitions are okay */
    1041            8 :         if (min == 0)
    1042              :         {
    1043              :             MDEBUG(("%d: backref matched trivially\n", t->id));
    1044            7 :             return REG_OKAY;
    1045              :         }
    1046            1 :         return REG_NOMATCH;
    1047              :     }
    1048              : 
    1049              :     /*
    1050              :      * check target length to see if it could possibly be an allowed number of
    1051              :      * repetitions of brstring
    1052              :      */
    1053              :     assert(end > begin);
    1054          201 :     tlen = end - begin;
    1055          201 :     if (tlen % brlen != 0)
    1056            5 :         return REG_NOMATCH;
    1057          196 :     numreps = tlen / brlen;
    1058          196 :     if (numreps < min || (numreps > max && max != DUPINF))
    1059            3 :         return REG_NOMATCH;
    1060              : 
    1061              :     /* okay, compare the actual string contents */
    1062          193 :     p = begin;
    1063          338 :     while (numreps-- > 0)
    1064              :     {
    1065          219 :         if ((*v->g->compare) (brstring, p, brlen) != 0)
    1066           74 :             return REG_NOMATCH;
    1067          145 :         p += brlen;
    1068              :     }
    1069              : 
    1070              :     MDEBUG(("%d: backref matched\n", t->id));
    1071          119 :     return REG_OKAY;
    1072              : }
    1073              : 
    1074              : /*
    1075              :  * caltdissect - dissect match for alternation node
    1076              :  */
    1077              : static int                      /* regexec return code */
    1078          113 : caltdissect(struct vars *v,
    1079              :             struct subre *t,
    1080              :             chr *begin,         /* beginning of relevant substring */
    1081              :             chr *end)           /* end of same */
    1082              : {
    1083              :     struct dfa *d;
    1084              :     int         er;
    1085              : 
    1086              :     assert(t->op == '|');
    1087              : 
    1088          113 :     t = t->child;
    1089              :     /* there should be at least 2 alternatives */
    1090              :     assert(t != NULL && t->sibling != NULL);
    1091              : 
    1092          200 :     while (t != NULL)
    1093              :     {
    1094              :         assert(t->cnfa.nstates > 0);
    1095              : 
    1096              :         MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1097              : 
    1098          160 :         d = getsubdfa(v, t);
    1099          160 :         NOERR();
    1100          160 :         if (longest(v, d, begin, end, (int *) NULL) == end)
    1101              :         {
    1102              :             MDEBUG(("%d: caltdissect matched\n", t->id));
    1103          125 :             er = cdissect(v, t, begin, end);
    1104          125 :             if (er != REG_NOMATCH)
    1105           73 :                 return er;
    1106              :         }
    1107           87 :         NOERR();
    1108              : 
    1109           87 :         t = t->sibling;
    1110              :     }
    1111              : 
    1112           40 :     return REG_NOMATCH;
    1113              : }
    1114              : 
    1115              : /*
    1116              :  * citerdissect - dissect match for iteration node
    1117              :  */
    1118              : static int                      /* regexec return code */
    1119          159 : citerdissect(struct vars *v,
    1120              :              struct subre *t,
    1121              :              chr *begin,        /* beginning of relevant substring */
    1122              :              chr *end)          /* end of same */
    1123              : {
    1124              :     struct dfa *d;
    1125              :     chr       **endpts;
    1126              :     chr        *limit;
    1127              :     int         min_matches;
    1128              :     size_t      max_matches;
    1129              :     int         nverified;
    1130              :     int         k;
    1131              :     int         i;
    1132              :     int         er;
    1133              : 
    1134              :     assert(t->op == '*');
    1135              :     assert(t->child != NULL && t->child->cnfa.nstates > 0);
    1136              :     assert(!(t->child->flags & SHORTER));
    1137              :     assert(begin <= end);
    1138              : 
    1139              :     MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1140              : 
    1141              :     /*
    1142              :      * For the moment, assume the minimum number of matches is 1.  If zero
    1143              :      * matches are allowed, and the target string is empty, we are allowed to
    1144              :      * match regardless of the contents of the iter node --- but we would
    1145              :      * prefer to match once, so that capturing parens get set.  (An example of
    1146              :      * the concern here is a pattern like "()*\1", which historically this
    1147              :      * code has allowed to succeed.)  Therefore, we deal with the zero-matches
    1148              :      * case at the bottom, after failing to find any other way to match.
    1149              :      */
    1150          159 :     min_matches = t->min;
    1151          159 :     if (min_matches <= 0)
    1152          105 :         min_matches = 1;
    1153              : 
    1154              :     /*
    1155              :      * We need workspace to track the endpoints of each sub-match.  Normally
    1156              :      * we consider only nonzero-length sub-matches, so there can be at most
    1157              :      * end-begin of them.  However, if min is larger than that, we will also
    1158              :      * consider zero-length sub-matches in order to find enough matches.
    1159              :      *
    1160              :      * For convenience, endpts[0] contains the "begin" pointer and we store
    1161              :      * sub-match endpoints in endpts[1..max_matches].
    1162              :      */
    1163          159 :     max_matches = end - begin;
    1164          159 :     if (max_matches > t->max && t->max != DUPINF)
    1165            0 :         max_matches = t->max;
    1166          159 :     if (max_matches < min_matches)
    1167           97 :         max_matches = min_matches;
    1168          159 :     endpts = MALLOC_ARRAY(chr *, max_matches + 1);
    1169          159 :     if (endpts == NULL)
    1170            0 :         return REG_ESPACE;
    1171          159 :     endpts[0] = begin;
    1172              : 
    1173          159 :     d = getsubdfa(v, t->child);
    1174          159 :     if (ISERR())
    1175              :     {
    1176            0 :         FREE(endpts);
    1177            0 :         return v->err;
    1178              :     }
    1179              : 
    1180              :     /*
    1181              :      * Our strategy is to first find a set of sub-match endpoints that are
    1182              :      * valid according to the child node's DFA, and then recursively dissect
    1183              :      * each sub-match to confirm validity.  If any validity check fails,
    1184              :      * backtrack that sub-match and try again.  And, when we next try for a
    1185              :      * validity check, we need not recheck any successfully verified
    1186              :      * sub-matches that we didn't move the endpoints of.  nverified remembers
    1187              :      * how many sub-matches are currently known okay.
    1188              :      */
    1189              : 
    1190              :     /* initialize to consider first sub-match */
    1191          159 :     nverified = 0;
    1192          159 :     k = 1;
    1193          159 :     limit = end;
    1194              : 
    1195              :     /* iterate until satisfaction or failure */
    1196          725 :     while (k > 0)
    1197              :     {
    1198              :         /* try to find an endpoint for the k'th sub-match */
    1199          593 :         endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
    1200          593 :         if (ISERR())
    1201              :         {
    1202            0 :             FREE(endpts);
    1203            0 :             return v->err;
    1204              :         }
    1205          593 :         if (endpts[k] == NULL)
    1206              :         {
    1207              :             /* no match possible, so see if we can shorten previous one */
    1208          306 :             k--;
    1209          306 :             goto backtrack;
    1210              :         }
    1211              :         MDEBUG(("%d: working endpoint %d: %ld\n",
    1212              :                 t->id, k, LOFF(endpts[k])));
    1213              : 
    1214              :         /* k'th sub-match can no longer be considered verified */
    1215          287 :         if (nverified >= k)
    1216           12 :             nverified = k - 1;
    1217              : 
    1218          287 :         if (endpts[k] != end)
    1219              :         {
    1220              :             /* haven't reached end yet, try another iteration if allowed */
    1221          200 :             if (k >= max_matches)
    1222              :             {
    1223              :                 /* must try to shorten some previous match */
    1224            0 :                 k--;
    1225            0 :                 goto backtrack;
    1226              :             }
    1227              : 
    1228              :             /* reject zero-length match unless necessary to achieve min */
    1229          200 :             if (endpts[k] == endpts[k - 1] &&
    1230            0 :                 (k >= min_matches || min_matches - k < end - endpts[k]))
    1231            0 :                 goto backtrack;
    1232              : 
    1233          200 :             k++;
    1234          200 :             limit = end;
    1235          200 :             continue;
    1236              :         }
    1237              : 
    1238              :         /*
    1239              :          * We've identified a way to divide the string into k sub-matches that
    1240              :          * works so far as the child DFA can tell.  If k is an allowed number
    1241              :          * of matches, start the slow part: recurse to verify each sub-match.
    1242              :          * We always have k <= max_matches, needn't check that.
    1243              :          */
    1244           87 :         if (k < min_matches)
    1245            0 :             goto backtrack;
    1246              : 
    1247              :         MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
    1248              : 
    1249          140 :         for (i = nverified + 1; i <= k; i++)
    1250              :         {
    1251              :             /* zap any match data from a non-last iteration */
    1252          113 :             zaptreesubs(v, t->child);
    1253          113 :             er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
    1254          113 :             if (er == REG_OKAY)
    1255              :             {
    1256           53 :                 nverified = i;
    1257           53 :                 continue;
    1258              :             }
    1259           60 :             if (er == REG_NOMATCH)
    1260           60 :                 break;
    1261              :             /* oops, something failed */
    1262            0 :             FREE(endpts);
    1263            0 :             return er;
    1264              :         }
    1265              : 
    1266           87 :         if (i > k)
    1267              :         {
    1268              :             /* satisfaction */
    1269              :             MDEBUG(("%d: successful\n", t->id));
    1270           27 :             FREE(endpts);
    1271           27 :             return REG_OKAY;
    1272              :         }
    1273              : 
    1274              :         /* i'th match failed to verify, so backtrack it */
    1275           60 :         k = i;
    1276              : 
    1277          366 : backtrack:
    1278              : 
    1279              :         /*
    1280              :          * Must consider shorter versions of the k'th sub-match.  However,
    1281              :          * we'll only ask for a zero-length match if necessary.
    1282              :          */
    1283          366 :         while (k > 0)
    1284              :         {
    1285          234 :             chr        *prev_end = endpts[k - 1];
    1286              : 
    1287          234 :             if (endpts[k] > prev_end)
    1288              :             {
    1289          234 :                 limit = endpts[k] - 1;
    1290          234 :                 if (limit > prev_end ||
    1291            0 :                     (k < min_matches && min_matches - k >= end - prev_end))
    1292              :                 {
    1293              :                     /* break out of backtrack loop, continue the outer one */
    1294              :                     break;
    1295              :                 }
    1296              :             }
    1297              :             /* can't shorten k'th sub-match any more, consider previous one */
    1298            0 :             k--;
    1299              :         }
    1300              :     }
    1301              : 
    1302              :     /* all possibilities exhausted */
    1303          132 :     FREE(endpts);
    1304              : 
    1305              :     /*
    1306              :      * Now consider the possibility that we can match to a zero-length string
    1307              :      * by using zero repetitions.
    1308              :      */
    1309          132 :     if (t->min == 0 && begin == end)
    1310              :     {
    1311              :         MDEBUG(("%d: allowing zero matches\n", t->id));
    1312           90 :         return REG_OKAY;
    1313              :     }
    1314              : 
    1315              :     MDEBUG(("%d: failed\n", t->id));
    1316           42 :     return REG_NOMATCH;
    1317              : }
    1318              : 
    1319              : /*
    1320              :  * creviterdissect - dissect match for iteration node, shortest-first
    1321              :  */
    1322              : static int                      /* regexec return code */
    1323            8 : creviterdissect(struct vars *v,
    1324              :                 struct subre *t,
    1325              :                 chr *begin,     /* beginning of relevant substring */
    1326              :                 chr *end)       /* end of same */
    1327              : {
    1328              :     struct dfa *d;
    1329              :     chr       **endpts;
    1330              :     chr        *limit;
    1331              :     int         min_matches;
    1332              :     size_t      max_matches;
    1333              :     int         nverified;
    1334              :     int         k;
    1335              :     int         i;
    1336              :     int         er;
    1337              : 
    1338              :     assert(t->op == '*');
    1339              :     assert(t->child != NULL && t->child->cnfa.nstates > 0);
    1340              :     assert(t->child->flags & SHORTER);
    1341              :     assert(begin <= end);
    1342              : 
    1343              :     MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1344              : 
    1345              :     /*
    1346              :      * If zero matches are allowed, and target string is empty, just declare
    1347              :      * victory.  OTOH, if target string isn't empty, zero matches can't work
    1348              :      * so we pretend the min is 1.
    1349              :      */
    1350            8 :     min_matches = t->min;
    1351            8 :     if (min_matches <= 0)
    1352              :     {
    1353            8 :         if (begin == end)
    1354              :         {
    1355              :             MDEBUG(("%d: allowing zero matches\n", t->id));
    1356            1 :             return REG_OKAY;
    1357              :         }
    1358            7 :         min_matches = 1;
    1359              :     }
    1360              : 
    1361              :     /*
    1362              :      * We need workspace to track the endpoints of each sub-match.  Normally
    1363              :      * we consider only nonzero-length sub-matches, so there can be at most
    1364              :      * end-begin of them.  However, if min is larger than that, we will also
    1365              :      * consider zero-length sub-matches in order to find enough matches.
    1366              :      *
    1367              :      * For convenience, endpts[0] contains the "begin" pointer and we store
    1368              :      * sub-match endpoints in endpts[1..max_matches].
    1369              :      */
    1370            7 :     max_matches = end - begin;
    1371            7 :     if (max_matches > t->max && t->max != DUPINF)
    1372            7 :         max_matches = t->max;
    1373            7 :     if (max_matches < min_matches)
    1374            0 :         max_matches = min_matches;
    1375            7 :     endpts = MALLOC_ARRAY(chr *, max_matches + 1);
    1376            7 :     if (endpts == NULL)
    1377            0 :         return REG_ESPACE;
    1378            7 :     endpts[0] = begin;
    1379              : 
    1380            7 :     d = getsubdfa(v, t->child);
    1381            7 :     if (ISERR())
    1382              :     {
    1383            0 :         FREE(endpts);
    1384            0 :         return v->err;
    1385              :     }
    1386              : 
    1387              :     /*
    1388              :      * Our strategy is to first find a set of sub-match endpoints that are
    1389              :      * valid according to the child node's DFA, and then recursively dissect
    1390              :      * each sub-match to confirm validity.  If any validity check fails,
    1391              :      * backtrack that sub-match and try again.  And, when we next try for a
    1392              :      * validity check, we need not recheck any successfully verified
    1393              :      * sub-matches that we didn't move the endpoints of.  nverified remembers
    1394              :      * how many sub-matches are currently known okay.
    1395              :      */
    1396              : 
    1397              :     /* initialize to consider first sub-match */
    1398            7 :     nverified = 0;
    1399            7 :     k = 1;
    1400            7 :     limit = begin;
    1401              : 
    1402              :     /* iterate until satisfaction or failure */
    1403            9 :     while (k > 0)
    1404              :     {
    1405              :         /* disallow zero-length match unless necessary to achieve min */
    1406            7 :         if (limit == endpts[k - 1] &&
    1407            7 :             limit != end &&
    1408            0 :             (k >= min_matches || min_matches - k < end - limit))
    1409            7 :             limit++;
    1410              : 
    1411              :         /* if this is the last allowed sub-match, it must reach to the end */
    1412            7 :         if (k >= max_matches)
    1413            7 :             limit = end;
    1414              : 
    1415              :         /* try to find an endpoint for the k'th sub-match */
    1416            7 :         endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
    1417              :                              (chr **) NULL, (int *) NULL);
    1418            7 :         if (ISERR())
    1419              :         {
    1420            0 :             FREE(endpts);
    1421            0 :             return v->err;
    1422              :         }
    1423            7 :         if (endpts[k] == NULL)
    1424              :         {
    1425              :             /* no match possible, so see if we can lengthen previous one */
    1426            0 :             k--;
    1427            0 :             goto backtrack;
    1428              :         }
    1429              :         MDEBUG(("%d: working endpoint %d: %ld\n",
    1430              :                 t->id, k, LOFF(endpts[k])));
    1431              : 
    1432              :         /* k'th sub-match can no longer be considered verified */
    1433            7 :         if (nverified >= k)
    1434            0 :             nverified = k - 1;
    1435              : 
    1436            7 :         if (endpts[k] != end)
    1437              :         {
    1438              :             /* haven't reached end yet, try another iteration if allowed */
    1439            0 :             if (k >= max_matches)
    1440              :             {
    1441              :                 /* must try to lengthen some previous match */
    1442            0 :                 k--;
    1443            0 :                 goto backtrack;
    1444              :             }
    1445              : 
    1446            0 :             k++;
    1447            0 :             limit = endpts[k - 1];
    1448            0 :             continue;
    1449              :         }
    1450              : 
    1451              :         /*
    1452              :          * We've identified a way to divide the string into k sub-matches that
    1453              :          * works so far as the child DFA can tell.  If k is an allowed number
    1454              :          * of matches, start the slow part: recurse to verify each sub-match.
    1455              :          * We always have k <= max_matches, needn't check that.
    1456              :          */
    1457            7 :         if (k < min_matches)
    1458            0 :             goto backtrack;
    1459              : 
    1460              :         MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
    1461              : 
    1462           12 :         for (i = nverified + 1; i <= k; i++)
    1463              :         {
    1464              :             /* zap any match data from a non-last iteration */
    1465            7 :             zaptreesubs(v, t->child);
    1466            7 :             er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
    1467            7 :             if (er == REG_OKAY)
    1468              :             {
    1469            5 :                 nverified = i;
    1470            5 :                 continue;
    1471              :             }
    1472            2 :             if (er == REG_NOMATCH)
    1473            2 :                 break;
    1474              :             /* oops, something failed */
    1475            0 :             FREE(endpts);
    1476            0 :             return er;
    1477              :         }
    1478              : 
    1479            7 :         if (i > k)
    1480              :         {
    1481              :             /* satisfaction */
    1482              :             MDEBUG(("%d: successful\n", t->id));
    1483            5 :             FREE(endpts);
    1484            5 :             return REG_OKAY;
    1485              :         }
    1486              : 
    1487              :         /* i'th match failed to verify, so backtrack it */
    1488            2 :         k = i;
    1489              : 
    1490            2 : backtrack:
    1491              : 
    1492              :         /*
    1493              :          * Must consider longer versions of the k'th sub-match.
    1494              :          */
    1495            4 :         while (k > 0)
    1496              :         {
    1497            2 :             if (endpts[k] < end)
    1498              :             {
    1499            0 :                 limit = endpts[k] + 1;
    1500              :                 /* break out of backtrack loop, continue the outer one */
    1501            0 :                 break;
    1502              :             }
    1503              :             /* can't lengthen k'th sub-match any more, consider previous one */
    1504            2 :             k--;
    1505              :         }
    1506              :     }
    1507              : 
    1508              :     /* all possibilities exhausted */
    1509              :     MDEBUG(("%d: failed\n", t->id));
    1510            2 :     FREE(endpts);
    1511            2 :     return REG_NOMATCH;
    1512              : }
    1513              : 
    1514              : 
    1515              : 
    1516              : #include "rege_dfa.c"
        

Generated by: LCOV version 2.0-1