LCOV - code coverage report
Current view: top level - src/backend/regex - regexec.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 431 507 85.0 %
Date: 2021-12-09 04:09:06 Functions: 16 16 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * re_*exec and friends - match REs
       3             :  *
       4             :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
       5             :  *
       6             :  * Development of this software was funded, in part, by Cray Research Inc.,
       7             :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
       8             :  * Corporation, none of whom are responsible for the results.  The author
       9             :  * thanks all of them.
      10             :  *
      11             :  * Redistribution and use in source and binary forms -- with or without
      12             :  * modification -- are permitted for any purpose, provided that
      13             :  * redistributions in source form retain this entire copyright notice and
      14             :  * indicate the origin and nature of any modifications.
      15             :  *
      16             :  * I'd appreciate being given credit for this package in the documentation
      17             :  * of software which uses it, but that is not a requirement.
      18             :  *
      19             :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
      20             :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      21             :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      22             :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      23             :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      24             :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
      25             :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      26             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      27             :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      28             :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      29             :  *
      30             :  * src/backend/regex/regexec.c
      31             :  *
      32             :  */
      33             : 
      34             : #include "regex/regguts.h"
      35             : 
      36             : 
      37             : 
      38             : /* lazy-DFA representation */
      39             : struct arcp
      40             : {                               /* "pointer" to an outarc */
      41             :     struct sset *ss;
      42             :     color       co;
      43             : };
      44             : 
      45             : struct sset
      46             : {                               /* state set */
      47             :     unsigned   *states;         /* pointer to bitvector */
      48             :     unsigned    hash;           /* hash of bitvector */
      49             : #define  HASH(bv, nw)    (((nw) == 1) ? *(bv) : hash(bv, nw))
      50             : #define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
      51             :         memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
      52             :     int         flags;
      53             : #define  STARTER     01         /* the initial state set */
      54             : #define  POSTSTATE   02         /* includes the goal state */
      55             : #define  LOCKED      04         /* locked in cache */
      56             : #define  NOPROGRESS  010        /* zero-progress state set */
      57             :     struct arcp ins;            /* chain of inarcs pointing here */
      58             :     chr        *lastseen;       /* last entered on arrival here */
      59             :     struct sset **outs;         /* outarc vector indexed by color */
      60             :     struct arcp *inchain;       /* chain-pointer vector for outarcs */
      61             : };
      62             : 
      63             : struct dfa
      64             : {
      65             :     int         nssets;         /* size of cache */
      66             :     int         nssused;        /* how many entries occupied yet */
      67             :     int         nstates;        /* number of states */
      68             :     int         ncolors;        /* length of outarc and inchain vectors */
      69             :     int         wordsper;       /* length of state-set bitvectors */
      70             :     struct sset *ssets;         /* state-set cache */
      71             :     unsigned   *statesarea;     /* bitvector storage */
      72             :     unsigned   *work;           /* pointer to work area within statesarea */
      73             :     struct sset **outsarea;     /* outarc-vector storage */
      74             :     struct arcp *incarea;       /* inchain storage */
      75             :     struct cnfa *cnfa;
      76             :     struct colormap *cm;
      77             :     chr        *lastpost;       /* location of last cache-flushed success */
      78             :     chr        *lastnopr;       /* location of last cache-flushed NOPROGRESS */
      79             :     struct sset *search;        /* replacement-search-pointer memory */
      80             :     int         backno;         /* if DFA for a backref, subno it refers to */
      81             :     short       backmin;        /* min repetitions for backref */
      82             :     short       backmax;        /* max repetitions for backref */
      83             :     bool        ismalloced;     /* should this struct dfa be freed? */
      84             :     bool        arraysmalloced; /* should its subsidiary arrays be freed? */
      85             : };
      86             : 
      87             : #define WORK    1               /* number of work bitvectors needed */
      88             : 
      89             : /* setup for non-malloc allocation for small cases */
      90             : #define FEWSTATES   20          /* must be less than UBITS */
      91             : #define FEWCOLORS   15
      92             : struct smalldfa
      93             : {
      94             :     struct dfa  dfa;            /* must be first */
      95             :     struct sset ssets[FEWSTATES * 2];
      96             :     unsigned    statesarea[FEWSTATES * 2 + WORK];
      97             :     struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
      98             :     struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
      99             : };
     100             : 
     101             : #define DOMALLOC    ((struct smalldfa *)NULL)   /* force malloc */
     102             : 
     103             : 
     104             : 
     105             : /* internal variables, bundled for easy passing around */
     106             : struct vars
     107             : {
     108             :     regex_t    *re;
     109             :     struct guts *g;
     110             :     int         eflags;         /* copies of arguments */
     111             :     size_t      nmatch;
     112             :     regmatch_t *pmatch;
     113             :     rm_detail_t *details;
     114             :     chr        *start;          /* start of string */
     115             :     chr        *search_start;   /* search start of string */
     116             :     chr        *stop;           /* just past end of string */
     117             :     int         err;            /* error code if any (0 none) */
     118             :     struct dfa **subdfas;       /* per-tree-subre DFAs */
     119             :     struct dfa **ladfas;        /* per-lacon-subre DFAs */
     120             :     struct sset **lblastcss;    /* per-lacon-subre lookbehind restart data */
     121             :     chr       **lblastcp;       /* per-lacon-subre lookbehind restart data */
     122             :     struct smalldfa dfa1;
     123             :     struct smalldfa dfa2;
     124             : };
     125             : 
     126             : #define VISERR(vv)  ((vv)->err != 0) /* have we seen an error yet? */
     127             : #define ISERR() VISERR(v)
     128             : #define VERR(vv,e)  ((vv)->err = ((vv)->err ? (vv)->err : (e)))
     129             : #define ERR(e)  VERR(v, e)      /* record an error */
     130             : #define NOERR() {if (ISERR()) return v->err;}    /* if error seen, return it */
     131             : #define OFF(p)  ((p) - v->start)
     132             : #define LOFF(p) ((long)OFF(p))
     133             : 
     134             : 
     135             : 
     136             : /*
     137             :  * forward declarations
     138             :  */
     139             : /* === regexec.c === */
     140             : static struct dfa *getsubdfa(struct vars *, struct subre *);
     141             : static struct dfa *getladfa(struct vars *, int);
     142             : static int  find(struct vars *, struct cnfa *, struct colormap *);
     143             : static int  cfind(struct vars *, struct cnfa *, struct colormap *);
     144             : static int  cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
     145             : static void zapallsubs(regmatch_t *, size_t);
     146             : static void zaptreesubs(struct vars *, struct subre *);
     147             : static void subset(struct vars *, struct subre *, chr *, chr *);
     148             : static int  cdissect(struct vars *, struct subre *, chr *, chr *);
     149             : static int  ccondissect(struct vars *, struct subre *, chr *, chr *);
     150             : static int  crevcondissect(struct vars *, struct subre *, chr *, chr *);
     151             : static int  cbrdissect(struct vars *, struct subre *, chr *, chr *);
     152             : static int  caltdissect(struct vars *, struct subre *, chr *, chr *);
     153             : static int  citerdissect(struct vars *, struct subre *, chr *, chr *);
     154             : static int  creviterdissect(struct vars *, struct subre *, chr *, chr *);
     155             : 
     156             : /* === rege_dfa.c === */
     157             : static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
     158             : static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
     159             : static int  matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **);
     160             : static chr *dfa_backref(struct vars *, struct dfa *, chr *, chr *, chr *, bool);
     161             : static chr *lastcold(struct vars *, struct dfa *);
     162             : static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
     163             : static void freedfa(struct dfa *);
     164             : static unsigned hash(unsigned *, int);
     165             : static struct sset *initialize(struct vars *, struct dfa *, chr *);
     166             : static struct sset *miss(struct vars *, struct dfa *, struct sset *, color, chr *, chr *);
     167             : static int  lacon(struct vars *, struct cnfa *, chr *, color);
     168             : static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
     169             : static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
     170             : 
     171             : 
     172             : /*
     173             :  * pg_regexec - match regular expression
     174             :  */
     175             : int
     176     1509916 : pg_regexec(regex_t *re,
     177             :            const chr *string,
     178             :            size_t len,
     179             :            size_t search_start,
     180             :            rm_detail_t *details,
     181             :            size_t nmatch,
     182             :            regmatch_t pmatch[],
     183             :            int flags)
     184             : {
     185             :     struct vars var;
     186     1509916 :     register struct vars *v = &var;
     187             :     int         st;
     188             :     size_t      n;
     189             :     size_t      i;
     190             :     int         backref;
     191             : 
     192             : #define  LOCALMAT    20
     193             :     regmatch_t  mat[LOCALMAT];
     194             : 
     195             : #define  LOCALDFAS   40
     196             :     struct dfa *subdfas[LOCALDFAS];
     197             : 
     198             :     /* sanity checks */
     199     1509916 :     if (re == NULL || string == NULL || re->re_magic != REMAGIC)
     200           0 :         return REG_INVARG;
     201     1509916 :     if (re->re_csize != sizeof(chr))
     202           0 :         return REG_MIXED;
     203     1509916 :     if (search_start > len)
     204           4 :         return REG_NOMATCH;
     205             : 
     206             :     /* Initialize locale-dependent support */
     207     1509912 :     pg_set_regex_collation(re->re_collation);
     208             : 
     209             :     /* setup */
     210     1509912 :     v->re = re;
     211     1509912 :     v->g = (struct guts *) re->re_guts;
     212     1509912 :     if ((v->g->cflags & REG_EXPECT) && details == NULL)
     213           0 :         return REG_INVARG;
     214     1509912 :     if (v->g->info & REG_UIMPOSSIBLE)
     215          32 :         return REG_NOMATCH;
     216     1509880 :     backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
     217     1509880 :     v->eflags = flags;
     218     1509880 :     if (backref && nmatch <= v->g->nsub)
     219             :     {
     220             :         /* need larger work area */
     221         124 :         v->nmatch = v->g->nsub + 1;
     222         124 :         if (v->nmatch <= LOCALMAT)
     223         122 :             v->pmatch = mat;
     224             :         else
     225           2 :             v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
     226         124 :         if (v->pmatch == NULL)
     227           0 :             return REG_ESPACE;
     228         124 :         zapallsubs(v->pmatch, v->nmatch);
     229             :     }
     230             :     else
     231             :     {
     232             :         /* we can store results directly in caller's array */
     233     1509756 :         v->pmatch = pmatch;
     234             :         /* ensure any extra entries in caller's array are filled with -1 */
     235     1509756 :         if (nmatch > 0)
     236     1014634 :             zapallsubs(pmatch, nmatch);
     237             :         /* then forget about extra entries, to avoid useless work in find() */
     238     1509756 :         if (nmatch > v->g->nsub + 1)
     239         624 :             nmatch = v->g->nsub + 1;
     240     1509756 :         v->nmatch = nmatch;
     241             :     }
     242     1509880 :     v->details = details;
     243     1509880 :     v->start = (chr *) string;
     244     1509880 :     v->search_start = (chr *) string + search_start;
     245     1509880 :     v->stop = (chr *) string + len;
     246     1509880 :     v->err = 0;
     247     1509880 :     v->subdfas = NULL;
     248     1509880 :     v->ladfas = NULL;
     249     1509880 :     v->lblastcss = NULL;
     250     1509880 :     v->lblastcp = NULL;
     251             :     /* below this point, "goto cleanup" will behave sanely */
     252             : 
     253             :     assert(v->g->ntree >= 0);
     254     1509880 :     n = (size_t) v->g->ntree;
     255     1509880 :     if (n <= LOCALDFAS)
     256     1509876 :         v->subdfas = subdfas;
     257             :     else
     258             :     {
     259           4 :         v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
     260           4 :         if (v->subdfas == NULL)
     261             :         {
     262           0 :             st = REG_ESPACE;
     263           0 :             goto cleanup;
     264             :         }
     265             :     }
     266     4547750 :     for (i = 0; i < n; i++)
     267     3037870 :         v->subdfas[i] = NULL;
     268             : 
     269             :     assert(v->g->nlacons >= 0);
     270     1509880 :     n = (size_t) v->g->nlacons;
     271     1509880 :     if (n > 0)
     272             :     {
     273         848 :         v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
     274         848 :         if (v->ladfas == NULL)
     275             :         {
     276           0 :             st = REG_ESPACE;
     277           0 :             goto cleanup;
     278             :         }
     279        2592 :         for (i = 0; i < n; i++)
     280        1744 :             v->ladfas[i] = NULL;
     281         848 :         v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
     282         848 :         v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
     283         848 :         if (v->lblastcss == NULL || v->lblastcp == NULL)
     284             :         {
     285           0 :             st = REG_ESPACE;
     286           0 :             goto cleanup;
     287             :         }
     288        2592 :         for (i = 0; i < n; i++)
     289             :         {
     290        1744 :             v->lblastcss[i] = NULL;
     291        1744 :             v->lblastcp[i] = NULL;
     292             :         }
     293             :     }
     294             : 
     295             :     /* do it */
     296             :     assert(v->g->tree != NULL);
     297     1509880 :     if (backref)
     298         224 :         st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
     299             :     else
     300     1509656 :         st = find(v, &v->g->tree->cnfa, &v->g->cmap);
     301             : 
     302             :     /* on success, ensure caller's match vector is filled correctly */
     303     1509880 :     if (st == REG_OKAY && nmatch > 0)
     304             :     {
     305      808716 :         if (v->pmatch != pmatch)
     306             :         {
     307             :             /* copy portion of match vector over from (larger) work area */
     308             :             assert(nmatch <= v->nmatch);
     309          30 :             memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
     310             :         }
     311      808716 :         if (v->g->cflags & REG_NOSUB)
     312             :         {
     313             :             /* don't expose possibly-partial sub-match results to caller */
     314      804342 :             zapallsubs(pmatch, nmatch);
     315             :         }
     316             :     }
     317             : 
     318             :     /* clean up */
     319      705538 : cleanup:
     320     1509880 :     if (v->pmatch != pmatch && v->pmatch != mat)
     321           2 :         FREE(v->pmatch);
     322     1509880 :     if (v->subdfas != NULL)
     323             :     {
     324     1509880 :         n = (size_t) v->g->ntree;
     325     4547750 :         for (i = 0; i < n; i++)
     326             :         {
     327     3037870 :             if (v->subdfas[i] != NULL)
     328       16214 :                 freedfa(v->subdfas[i]);
     329             :         }
     330     1509880 :         if (v->subdfas != subdfas)
     331           4 :             FREE(v->subdfas);
     332             :     }
     333     1509880 :     if (v->ladfas != NULL)
     334             :     {
     335         848 :         n = (size_t) v->g->nlacons;
     336        2592 :         for (i = 0; i < n; i++)
     337             :         {
     338        1744 :             if (v->ladfas[i] != NULL)
     339          82 :                 freedfa(v->ladfas[i]);
     340             :         }
     341         848 :         FREE(v->ladfas);
     342             :     }
     343     1509880 :     if (v->lblastcss != NULL)
     344         848 :         FREE(v->lblastcss);
     345     1509880 :     if (v->lblastcp != NULL)
     346         848 :         FREE(v->lblastcp);
     347             : 
     348             : #ifdef REG_DEBUG
     349             :     if (v->eflags & (REG_FTRACE | REG_MTRACE))
     350             :         fflush(stdout);
     351             : #endif
     352             : 
     353     1509880 :     return st;
     354             : }
     355             : 
     356             : /*
     357             :  * getsubdfa - create or re-fetch the DFA for a tree subre node
     358             :  *
     359             :  * We only need to create the DFA once per overall regex execution.
     360             :  * The DFA will be freed by the cleanup step in pg_regexec().
     361             :  */
     362             : static struct dfa *
     363       17228 : getsubdfa(struct vars *v,
     364             :           struct subre *t)
     365             : {
     366       17228 :     struct dfa *d = v->subdfas[t->id];
     367             : 
     368       17228 :     if (d == NULL)
     369             :     {
     370       16214 :         d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
     371       16214 :         if (d == NULL)
     372           0 :             return NULL;
     373             :         /* set up additional info if this is a backref node */
     374       16214 :         if (t->op == 'b')
     375             :         {
     376         218 :             d->backno = t->backno;
     377         218 :             d->backmin = t->min;
     378         218 :             d->backmax = t->max;
     379             :         }
     380       16214 :         v->subdfas[t->id] = d;
     381             :     }
     382       17228 :     return d;
     383             : }
     384             : 
     385             : /*
     386             :  * getladfa - create or re-fetch the DFA for a LACON subre node
     387             :  *
     388             :  * Same as above, but for LACONs.
     389             :  */
     390             : static struct dfa *
     391         188 : getladfa(struct vars *v,
     392             :          int n)
     393             : {
     394             :     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
     395             : 
     396         188 :     if (v->ladfas[n] == NULL)
     397             :     {
     398          82 :         struct subre *sub = &v->g->lacons[n];
     399             : 
     400          82 :         v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
     401             :         /* a LACON can't contain a backref, so nothing else to do */
     402             :     }
     403         188 :     return v->ladfas[n];
     404             : }
     405             : 
     406             : /*
     407             :  * find - find a match for the main NFA (no-complications case)
     408             :  */
     409             : static int
     410     1509656 : find(struct vars *v,
     411             :      struct cnfa *cnfa,
     412             :      struct colormap *cm)
     413             : {
     414             :     struct dfa *s;
     415             :     struct dfa *d;
     416             :     chr        *begin;
     417     1509656 :     chr        *end = NULL;
     418             :     chr        *cold;
     419             :     chr        *open;           /* open and close of range of possible starts */
     420             :     chr        *close;
     421             :     int         hitend;
     422     1509656 :     int         shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
     423             : 
     424             :     /* first, a shot with the search RE */
     425     1509656 :     s = newdfa(v, &v->g->search, cm, &v->dfa1);
     426     1509656 :     if (s == NULL)
     427           0 :         return v->err;
     428             :     MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
     429     1509656 :     cold = NULL;
     430     1509656 :     close = shortest(v, s, v->search_start, v->search_start, v->stop,
     431             :                      &cold, (int *) NULL);
     432     1509656 :     freedfa(s);
     433     1509656 :     NOERR();
     434     1509656 :     if (v->g->cflags & REG_EXPECT)
     435             :     {
     436             :         assert(v->details != NULL);
     437          54 :         if (cold != NULL)
     438          54 :             v->details->rm_extend.rm_so = OFF(cold);
     439             :         else
     440           0 :             v->details->rm_extend.rm_so = OFF(v->stop);
     441          54 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     442             :     }
     443     1509656 :     if (close == NULL)          /* not found */
     444      472568 :         return REG_NOMATCH;
     445     1037088 :     if (v->nmatch == 0)          /* found, don't need exact location */
     446      228470 :         return REG_OKAY;
     447             : 
     448             :     /* find starting point and match */
     449             :     assert(cold != NULL);
     450      808618 :     open = cold;
     451      808618 :     cold = NULL;
     452             :     MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
     453      808618 :     d = newdfa(v, cnfa, cm, &v->dfa1);
     454      808618 :     if (d == NULL)
     455           0 :         return v->err;
     456      808672 :     for (begin = open; begin <= close; begin++)
     457             :     {
     458             :         MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
     459      808672 :         if (shorter)
     460          94 :             end = shortest(v, d, begin, begin, v->stop,
     461             :                            (chr **) NULL, &hitend);
     462             :         else
     463      808578 :             end = longest(v, d, begin, v->stop, &hitend);
     464      808672 :         if (ISERR())
     465             :         {
     466           0 :             freedfa(d);
     467           0 :             return v->err;
     468             :         }
     469      808672 :         if (hitend && cold == NULL)
     470        4062 :             cold = begin;
     471      808672 :         if (end != NULL)
     472      808618 :             break;              /* NOTE BREAK OUT */
     473             :     }
     474             :     assert(end != NULL);        /* search RE succeeded so loop should */
     475      808618 :     freedfa(d);
     476             : 
     477             :     /* and pin down details */
     478             :     assert(v->nmatch > 0);
     479      808618 :     v->pmatch[0].rm_so = OFF(begin);
     480      808618 :     v->pmatch[0].rm_eo = OFF(end);
     481      808618 :     if (v->g->cflags & REG_EXPECT)
     482             :     {
     483          20 :         if (cold != NULL)
     484          10 :             v->details->rm_extend.rm_so = OFF(cold);
     485             :         else
     486          10 :             v->details->rm_extend.rm_so = OFF(v->stop);
     487          20 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     488             :     }
     489      808618 :     if (v->nmatch == 1)          /* no need for submatches */
     490      805742 :         return REG_OKAY;
     491             : 
     492             :     /* find submatches */
     493        2876 :     return cdissect(v, v->g->tree, begin, end);
     494             : }
     495             : 
     496             : /*
     497             :  * cfind - find a match for the main NFA (with complications)
     498             :  */
     499             : static int
     500         224 : cfind(struct vars *v,
     501             :       struct cnfa *cnfa,
     502             :       struct colormap *cm)
     503             : {
     504             :     struct dfa *s;
     505             :     struct dfa *d;
     506             :     chr        *cold;
     507             :     int         ret;
     508             : 
     509         224 :     s = newdfa(v, &v->g->search, cm, &v->dfa1);
     510         224 :     if (s == NULL)
     511           0 :         return v->err;
     512         224 :     d = newdfa(v, cnfa, cm, &v->dfa2);
     513         224 :     if (d == NULL)
     514             :     {
     515           0 :         freedfa(s);
     516           0 :         return v->err;
     517             :     }
     518             : 
     519         224 :     ret = cfindloop(v, cnfa, cm, d, s, &cold);
     520             : 
     521         224 :     freedfa(d);
     522         224 :     freedfa(s);
     523         224 :     NOERR();
     524         224 :     if (v->g->cflags & REG_EXPECT)
     525             :     {
     526             :         assert(v->details != NULL);
     527           0 :         if (cold != NULL)
     528           0 :             v->details->rm_extend.rm_so = OFF(cold);
     529             :         else
     530           0 :             v->details->rm_extend.rm_so = OFF(v->stop);
     531           0 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
     532             :     }
     533         224 :     return ret;
     534             : }
     535             : 
     536             : /*
     537             :  * cfindloop - the heart of cfind
     538             :  */
     539             : static int
     540         224 : cfindloop(struct vars *v,
     541             :           struct cnfa *cnfa,
     542             :           struct colormap *cm,
     543             :           struct dfa *d,
     544             :           struct dfa *s,
     545             :           chr **coldp)          /* where to put coldstart pointer */
     546             : {
     547             :     chr        *begin;
     548             :     chr        *end;
     549             :     chr        *cold;
     550             :     chr        *open;           /* open and close of range of possible starts */
     551             :     chr        *close;
     552             :     chr        *estart;
     553             :     chr        *estop;
     554             :     int         er;
     555         224 :     int         shorter = v->g->tree->flags & SHORTER;
     556             :     int         hitend;
     557             : 
     558             :     assert(d != NULL && s != NULL);
     559         224 :     cold = NULL;
     560         224 :     close = v->search_start;
     561             :     do
     562             :     {
     563             :         /* Search with the search RE for match range at/beyond "close" */
     564             :         MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
     565         248 :         close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
     566         248 :         if (ISERR())
     567             :         {
     568           0 :             *coldp = cold;
     569           0 :             return v->err;
     570             :         }
     571         248 :         if (close == NULL)
     572          22 :             break;              /* no more possible match anywhere */
     573             :         assert(cold != NULL);
     574         226 :         open = cold;
     575         226 :         cold = NULL;
     576             :         /* Search for matches starting between "open" and "close" inclusive */
     577             :         MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
     578         782 :         for (begin = open; begin <= close; begin++)
     579             :         {
     580             :             MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
     581         690 :             estart = begin;
     582         690 :             estop = v->stop;
     583             :             for (;;)
     584             :             {
     585             :                 /* Here we use the top node's detailed RE */
     586         976 :                 if (shorter)
     587         130 :                     end = shortest(v, d, begin, estart,
     588             :                                    estop, (chr **) NULL, &hitend);
     589             :                 else
     590         846 :                     end = longest(v, d, begin, estop,
     591             :                                   &hitend);
     592         976 :                 if (ISERR())
     593             :                 {
     594           0 :                     *coldp = cold;
     595           0 :                     return v->err;
     596             :                 }
     597         976 :                 if (hitend && cold == NULL)
     598         164 :                     cold = begin;
     599         976 :                 if (end == NULL)
     600         532 :                     break;      /* no match with this begin point, try next */
     601             :                 MDEBUG(("tentative end %ld\n", LOFF(end)));
     602             :                 /* Dissect the potential match to see if it really matches */
     603         444 :                 er = cdissect(v, v->g->tree, begin, end);
     604         444 :                 if (er == REG_OKAY)
     605             :                 {
     606         134 :                     if (v->nmatch > 0)
     607             :                     {
     608         134 :                         v->pmatch[0].rm_so = OFF(begin);
     609         134 :                         v->pmatch[0].rm_eo = OFF(end);
     610             :                     }
     611         134 :                     *coldp = cold;
     612         134 :                     return REG_OKAY;
     613             :                 }
     614         310 :                 if (er != REG_NOMATCH)
     615             :                 {
     616           0 :                     ERR(er);
     617           0 :                     *coldp = cold;
     618           0 :                     return er;
     619             :                 }
     620             :                 /* Try next longer/shorter match with same begin point */
     621         310 :                 if (shorter)
     622             :                 {
     623         108 :                     if (end == estop)
     624          16 :                         break;  /* no more, so try next begin point */
     625          92 :                     estart = end + 1;
     626             :                 }
     627             :                 else
     628             :                 {
     629         202 :                     if (end == begin)
     630           8 :                         break;  /* no more, so try next begin point */
     631         194 :                     estop = end - 1;
     632             :                 }
     633             :             }                   /* end loop over endpoint positions */
     634             :         }                       /* end loop over beginning positions */
     635             : 
     636             :         /*
     637             :          * If we get here, there is no possible match starting at or before
     638             :          * "close", so consider matches beyond that.  We'll do a fresh search
     639             :          * with the search RE to find a new promising match range.
     640             :          */
     641          92 :         close++;
     642          92 :     } while (close < v->stop);
     643             : 
     644          90 :     *coldp = cold;
     645          90 :     return REG_NOMATCH;
     646             : }
     647             : 
     648             : /*
     649             :  * zapallsubs - initialize all subexpression matches to "no match"
     650             :  *
     651             :  * Note that p[0], the overall-match location, is not touched.
     652             :  */
     653             : static void
     654     1819100 : zapallsubs(regmatch_t *p,
     655             :            size_t n)
     656             : {
     657             :     size_t      i;
     658             : 
     659     1826458 :     for (i = n - 1; i > 0; i--)
     660             :     {
     661        7358 :         p[i].rm_so = -1;
     662        7358 :         p[i].rm_eo = -1;
     663             :     }
     664     1819100 : }
     665             : 
     666             : /*
     667             :  * zaptreesubs - initialize subexpressions within subtree to "no match"
     668             :  */
     669             : static void
     670         802 : zaptreesubs(struct vars *v,
     671             :             struct subre *t)
     672             : {
     673         802 :     int         n = t->capno;
     674             :     struct subre *t2;
     675             : 
     676         802 :     if (n > 0)
     677             :     {
     678         384 :         if ((size_t) n < v->nmatch)
     679             :         {
     680         384 :             v->pmatch[n].rm_so = -1;
     681         384 :             v->pmatch[n].rm_eo = -1;
     682             :         }
     683             :     }
     684             : 
     685        1076 :     for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
     686         274 :         zaptreesubs(v, t2);
     687         802 : }
     688             : 
     689             : /*
     690             :  * subset - set subexpression match data for a successful subre
     691             :  */
     692             : static void
     693        5408 : subset(struct vars *v,
     694             :        struct subre *sub,
     695             :        chr *begin,
     696             :        chr *end)
     697             : {
     698        5408 :     int         n = sub->capno;
     699             : 
     700             :     assert(n > 0);
     701        5408 :     if ((size_t) n >= v->nmatch)
     702          12 :         return;
     703             : 
     704             :     MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
     705        5396 :     v->pmatch[n].rm_so = OFF(begin);
     706        5396 :     v->pmatch[n].rm_eo = OFF(end);
     707             : }
     708             : 
     709             : /*
     710             :  * cdissect - check backrefs and determine subexpression matches
     711             :  *
     712             :  * cdissect recursively processes a subre tree to check matching of backrefs
     713             :  * and/or identify submatch boundaries for capture nodes.  The proposed match
     714             :  * runs from "begin" to "end" (not including "end"), and we are basically
     715             :  * "dissecting" it to see where the submatches are.
     716             :  *
     717             :  * Before calling any level of cdissect, the caller must have run the node's
     718             :  * DFA and found that the proposed substring satisfies the DFA.  (We make
     719             :  * the caller do that because in concatenation and iteration nodes, it's
     720             :  * much faster to check all the substrings against the child DFAs before we
     721             :  * recurse.)
     722             :  *
     723             :  * A side-effect of a successful match is to save match locations for
     724             :  * capturing subexpressions in v->pmatch[].  This is a little bit tricky,
     725             :  * so we make the following rules:
     726             :  * 1. Before initial entry to cdissect, all match data must have been
     727             :  *    cleared (this is seen to by zapallsubs).
     728             :  * 2. Before any recursive entry to cdissect, the match data for that
     729             :  *    subexpression tree must be guaranteed clear (see zaptreesubs).
     730             :  * 3. When returning REG_OKAY, each level of cdissect will have saved
     731             :  *    any relevant match locations.
     732             :  * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
     733             :  *    that its subexpression match locations are again clear.
     734             :  * 5. No guarantees are made for error cases (i.e., other result codes).
     735             :  * 6. When a level of cdissect abandons a successful sub-match, it will
     736             :  *    clear that subtree's match locations with zaptreesubs before trying
     737             :  *    any new DFA match or cdissect call for that subtree or any subtree
     738             :  *    to its right (that is, any subtree that could have a backref into the
     739             :  *    abandoned match).
     740             :  * This may seem overly complicated, but it's difficult to simplify it
     741             :  * because of the provision that match locations must be reset before
     742             :  * any fresh DFA match (a rule that is needed to make dfa_backref safe).
     743             :  * That means it won't work to just reset relevant match locations at the
     744             :  * start of each cdissect level.
     745             :  */
     746             : static int                      /* regexec return code */
     747       20164 : cdissect(struct vars *v,
     748             :          struct subre *t,
     749             :          chr *begin,            /* beginning of relevant substring */
     750             :          chr *end)              /* end of same */
     751             : {
     752             :     int         er;
     753             : 
     754             :     assert(t != NULL);
     755             :     MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
     756             : 
     757             :     /* handy place to check for operation cancel */
     758       20164 :     if (CANCEL_REQUESTED(v->re))
     759           0 :         return REG_CANCEL;
     760             :     /* ... and stack overrun */
     761       20164 :     if (STACK_TOO_DEEP(v->re))
     762           0 :         return REG_ETOOBIG;
     763             : 
     764       20164 :     switch (t->op)
     765             :     {
     766       11102 :         case '=':               /* terminal node */
     767             :             assert(t->child == NULL);
     768       11102 :             er = REG_OKAY;      /* no action, parent did the work */
     769       11102 :             break;
     770         322 :         case 'b':               /* back reference */
     771             :             assert(t->child == NULL);
     772         322 :             er = cbrdissect(v, t, begin, end);
     773         322 :             break;
     774        8502 :         case '.':               /* concatenation */
     775             :             assert(t->child != NULL);
     776        8502 :             if (t->child->flags & SHORTER)    /* reverse scan */
     777         226 :                 er = crevcondissect(v, t, begin, end);
     778             :             else
     779        8276 :                 er = ccondissect(v, t, begin, end);
     780        8502 :             break;
     781          52 :         case '|':               /* alternation */
     782             :             assert(t->child != NULL);
     783          52 :             er = caltdissect(v, t, begin, end);
     784          52 :             break;
     785         128 :         case '*':               /* iteration */
     786             :             assert(t->child != NULL);
     787         128 :             if (t->child->flags & SHORTER)    /* reverse scan */
     788          12 :                 er = creviterdissect(v, t, begin, end);
     789             :             else
     790         116 :                 er = citerdissect(v, t, begin, end);
     791         128 :             break;
     792          58 :         case '(':               /* no-op capture node */
     793             :             assert(t->child != NULL);
     794          58 :             er = cdissect(v, t->child, begin, end);
     795          58 :             break;
     796           0 :         default:
     797           0 :             er = REG_ASSERT;
     798           0 :             break;
     799             :     }
     800             : 
     801             :     /*
     802             :      * We should never have a match failure unless backrefs lurk below;
     803             :      * otherwise, either caller failed to check the DFA, or there's some
     804             :      * inconsistency between the DFA and the node's innards.
     805             :      */
     806             :     assert(er != REG_NOMATCH || (t->flags & BACKR));
     807             : 
     808             :     /*
     809             :      * If this node is marked as capturing, save successful match's location.
     810             :      */
     811       20164 :     if (t->capno > 0 && er == REG_OKAY)
     812        5408 :         subset(v, t, begin, end);
     813             : 
     814       20164 :     return er;
     815             : }
     816             : 
     817             : /*
     818             :  * ccondissect - dissect match for concatenation node
     819             :  */
     820             : static int                      /* regexec return code */
     821        8276 : ccondissect(struct vars *v,
     822             :             struct subre *t,
     823             :             chr *begin,         /* beginning of relevant substring */
     824             :             chr *end)           /* end of same */
     825             : {
     826        8276 :     struct subre *left = t->child;
     827        8276 :     struct subre *right = left->sibling;
     828             :     struct dfa *d;
     829             :     struct dfa *d2;
     830             :     chr        *mid;
     831             :     int         er;
     832             : 
     833             :     assert(t->op == '.');
     834             :     assert(left != NULL && left->cnfa.nstates > 0);
     835             :     assert(right != NULL && right->cnfa.nstates > 0);
     836             :     assert(right->sibling == NULL);
     837             :     assert(!(left->flags & SHORTER));
     838             : 
     839        8276 :     d = getsubdfa(v, left);
     840        8276 :     NOERR();
     841        8276 :     d2 = getsubdfa(v, right);
     842        8276 :     NOERR();
     843             :     MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
     844             : 
     845             :     /* pick a tentative midpoint */
     846        8276 :     mid = longest(v, d, begin, end, (int *) NULL);
     847        8276 :     NOERR();
     848        8276 :     if (mid == NULL)
     849          16 :         return REG_NOMATCH;
     850             :     MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
     851             : 
     852             :     /* iterate until satisfaction or failure */
     853             :     for (;;)
     854             :     {
     855             :         /* try this midpoint on for size */
     856       10546 :         if (longest(v, d2, mid, end, (int *) NULL) == end)
     857             :         {
     858        8194 :             er = cdissect(v, left, begin, mid);
     859        8194 :             if (er == REG_OKAY)
     860             :             {
     861        8118 :                 er = cdissect(v, right, mid, end);
     862        8118 :                 if (er == REG_OKAY)
     863             :                 {
     864             :                     /* satisfaction */
     865             :                     MDEBUG(("%d: successful\n", t->id));
     866        7748 :                     return REG_OKAY;
     867             :                 }
     868             :                 /* Reset left's matches (right should have done so itself) */
     869         370 :                 zaptreesubs(v, left);
     870             :             }
     871         446 :             if (er != REG_NOMATCH)
     872           0 :                 return er;
     873             :         }
     874        2798 :         NOERR();
     875             : 
     876             :         /* that midpoint didn't work, find a new one */
     877        2798 :         if (mid == begin)
     878             :         {
     879             :             /* all possibilities exhausted */
     880             :             MDEBUG(("%d: no midpoint\n", t->id));
     881         120 :             return REG_NOMATCH;
     882             :         }
     883        2678 :         mid = longest(v, d, begin, mid - 1, (int *) NULL);
     884        2678 :         NOERR();
     885        2678 :         if (mid == NULL)
     886             :         {
     887             :             /* failed to find a new one */
     888             :             MDEBUG(("%d: failed midpoint\n", t->id));
     889         392 :             return REG_NOMATCH;
     890             :         }
     891             :         MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
     892             :     }
     893             : 
     894             :     /* can't get here */
     895             :     return REG_ASSERT;
     896             : }
     897             : 
     898             : /*
     899             :  * crevcondissect - dissect match for concatenation node, shortest-first
     900             :  */
     901             : static int                      /* regexec return code */
     902         226 : crevcondissect(struct vars *v,
     903             :                struct subre *t,
     904             :                chr *begin,      /* beginning of relevant substring */
     905             :                chr *end)        /* end of same */
     906             : {
     907         226 :     struct subre *left = t->child;
     908         226 :     struct subre *right = left->sibling;
     909             :     struct dfa *d;
     910             :     struct dfa *d2;
     911             :     chr        *mid;
     912             :     int         er;
     913             : 
     914             :     assert(t->op == '.');
     915             :     assert(left != NULL && left->cnfa.nstates > 0);
     916             :     assert(right != NULL && right->cnfa.nstates > 0);
     917             :     assert(right->sibling == NULL);
     918             :     assert(left->flags & SHORTER);
     919             : 
     920         226 :     d = getsubdfa(v, left);
     921         226 :     NOERR();
     922         226 :     d2 = getsubdfa(v, right);
     923         226 :     NOERR();
     924             :     MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
     925             : 
     926             :     /* pick a tentative midpoint */
     927         226 :     mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
     928         226 :     NOERR();
     929         226 :     if (mid == NULL)
     930           0 :         return REG_NOMATCH;
     931             :     MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
     932             : 
     933             :     /* iterate until satisfaction or failure */
     934             :     for (;;)
     935             :     {
     936             :         /* try this midpoint on for size */
     937         858 :         if (longest(v, d2, mid, end, (int *) NULL) == end)
     938             :         {
     939         138 :             er = cdissect(v, left, begin, mid);
     940         138 :             if (er == REG_OKAY)
     941             :             {
     942         138 :                 er = cdissect(v, right, mid, end);
     943         138 :                 if (er == REG_OKAY)
     944             :                 {
     945             :                     /* satisfaction */
     946             :                     MDEBUG(("%d: successful\n", t->id));
     947         114 :                     return REG_OKAY;
     948             :                 }
     949             :                 /* Reset left's matches (right should have done so itself) */
     950          24 :                 zaptreesubs(v, left);
     951             :             }
     952          24 :             if (er != REG_NOMATCH)
     953           0 :                 return er;
     954             :         }
     955         744 :         NOERR();
     956             : 
     957             :         /* that midpoint didn't work, find a new one */
     958         744 :         if (mid == end)
     959             :         {
     960             :             /* all possibilities exhausted */
     961             :             MDEBUG(("%d: no midpoint\n", t->id));
     962         112 :             return REG_NOMATCH;
     963             :         }
     964         632 :         mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
     965         632 :         NOERR();
     966         632 :         if (mid == NULL)
     967             :         {
     968             :             /* failed to find a new one */
     969             :             MDEBUG(("%d: failed midpoint\n", t->id));
     970           0 :             return REG_NOMATCH;
     971             :         }
     972             :         MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
     973             :     }
     974             : 
     975             :     /* can't get here */
     976             :     return REG_ASSERT;
     977             : }
     978             : 
     979             : /*
     980             :  * cbrdissect - dissect match for backref node
     981             :  *
     982             :  * The backref match might already have been verified by dfa_backref(),
     983             :  * but we don't know that for sure so must check it here.
     984             :  */
     985             : static int                      /* regexec return code */
     986         322 : cbrdissect(struct vars *v,
     987             :            struct subre *t,
     988             :            chr *begin,          /* beginning of relevant substring */
     989             :            chr *end)            /* end of same */
     990             : {
     991         322 :     int         n = t->backno;
     992             :     size_t      numreps;
     993             :     size_t      tlen;
     994             :     size_t      brlen;
     995             :     chr        *brstring;
     996             :     chr        *p;
     997         322 :     int         min = t->min;
     998         322 :     int         max = t->max;
     999             : 
    1000             :     assert(t != NULL);
    1001             :     assert(t->op == 'b');
    1002             :     assert(n >= 0);
    1003             :     assert((size_t) n < v->nmatch);
    1004             : 
    1005             :     MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
    1006             :             LOFF(begin), LOFF(end)));
    1007             : 
    1008             :     /* get the backreferenced string */
    1009         322 :     if (v->pmatch[n].rm_so == -1)
    1010          82 :         return REG_NOMATCH;
    1011         240 :     brstring = v->start + v->pmatch[n].rm_so;
    1012         240 :     brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
    1013             : 
    1014             :     /* special cases for zero-length strings */
    1015         240 :     if (brlen == 0)
    1016             :     {
    1017             :         /*
    1018             :          * matches only if target is zero length, but any number of
    1019             :          * repetitions can be considered to be present
    1020             :          */
    1021          10 :         if (begin == end && min <= max)
    1022             :         {
    1023             :             MDEBUG(("%d: backref matched trivially\n", t->id));
    1024          10 :             return REG_OKAY;
    1025             :         }
    1026           0 :         return REG_NOMATCH;
    1027             :     }
    1028         230 :     if (begin == end)
    1029             :     {
    1030             :         /* matches only if zero repetitions are okay */
    1031          10 :         if (min == 0)
    1032             :         {
    1033             :             MDEBUG(("%d: backref matched trivially\n", t->id));
    1034           8 :             return REG_OKAY;
    1035             :         }
    1036           2 :         return REG_NOMATCH;
    1037             :     }
    1038             : 
    1039             :     /*
    1040             :      * check target length to see if it could possibly be an allowed number of
    1041             :      * repetitions of brstring
    1042             :      */
    1043             :     assert(end > begin);
    1044         220 :     tlen = end - begin;
    1045         220 :     if (tlen % brlen != 0)
    1046          10 :         return REG_NOMATCH;
    1047         210 :     numreps = tlen / brlen;
    1048         210 :     if (numreps < min || (numreps > max && max != DUPINF))
    1049           6 :         return REG_NOMATCH;
    1050             : 
    1051             :     /* okay, compare the actual string contents */
    1052         204 :     p = begin;
    1053         366 :     while (numreps-- > 0)
    1054             :     {
    1055         232 :         if ((*v->g->compare) (brstring, p, brlen) != 0)
    1056          70 :             return REG_NOMATCH;
    1057         162 :         p += brlen;
    1058             :     }
    1059             : 
    1060             :     MDEBUG(("%d: backref matched\n", t->id));
    1061         134 :     return REG_OKAY;
    1062             : }
    1063             : 
    1064             : /*
    1065             :  * caltdissect - dissect match for alternation node
    1066             :  */
    1067             : static int                      /* regexec return code */
    1068          52 : caltdissect(struct vars *v,
    1069             :             struct subre *t,
    1070             :             chr *begin,         /* beginning of relevant substring */
    1071             :             chr *end)           /* end of same */
    1072             : {
    1073             :     struct dfa *d;
    1074             :     int         er;
    1075             : 
    1076             :     assert(t->op == '|');
    1077             : 
    1078          52 :     t = t->child;
    1079             :     /* there should be at least 2 alternatives */
    1080             :     assert(t != NULL && t->sibling != NULL);
    1081             : 
    1082         136 :     while (t != NULL)
    1083             :     {
    1084             :         assert(t->cnfa.nstates > 0);
    1085             : 
    1086             :         MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1087             : 
    1088          98 :         d = getsubdfa(v, t);
    1089          98 :         NOERR();
    1090          98 :         if (longest(v, d, begin, end, (int *) NULL) == end)
    1091             :         {
    1092             :             MDEBUG(("%d: caltdissect matched\n", t->id));
    1093          64 :             er = cdissect(v, t, begin, end);
    1094          64 :             if (er != REG_NOMATCH)
    1095          14 :                 return er;
    1096             :         }
    1097          84 :         NOERR();
    1098             : 
    1099          84 :         t = t->sibling;
    1100             :     }
    1101             : 
    1102          38 :     return REG_NOMATCH;
    1103             : }
    1104             : 
    1105             : /*
    1106             :  * citerdissect - dissect match for iteration node
    1107             :  */
    1108             : static int                      /* regexec return code */
    1109         116 : citerdissect(struct vars *v,
    1110             :              struct subre *t,
    1111             :              chr *begin,        /* beginning of relevant substring */
    1112             :              chr *end)          /* end of same */
    1113             : {
    1114             :     struct dfa *d;
    1115             :     chr       **endpts;
    1116             :     chr        *limit;
    1117             :     int         min_matches;
    1118             :     size_t      max_matches;
    1119             :     int         nverified;
    1120             :     int         k;
    1121             :     int         i;
    1122             :     int         er;
    1123             : 
    1124             :     assert(t->op == '*');
    1125             :     assert(t->child != NULL && t->child->cnfa.nstates > 0);
    1126             :     assert(!(t->child->flags & SHORTER));
    1127             :     assert(begin <= end);
    1128             : 
    1129             :     MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1130             : 
    1131             :     /*
    1132             :      * For the moment, assume the minimum number of matches is 1.  If zero
    1133             :      * matches are allowed, and the target string is empty, we are allowed to
    1134             :      * match regardless of the contents of the iter node --- but we would
    1135             :      * prefer to match once, so that capturing parens get set.  (An example of
    1136             :      * the concern here is a pattern like "()*\1", which historically this
    1137             :      * code has allowed to succeed.)  Therefore, we deal with the zero-matches
    1138             :      * case at the bottom, after failing to find any other way to match.
    1139             :      */
    1140         116 :     min_matches = t->min;
    1141         116 :     if (min_matches <= 0)
    1142          62 :         min_matches = 1;
    1143             : 
    1144             :     /*
    1145             :      * We need workspace to track the endpoints of each sub-match.  Normally
    1146             :      * we consider only nonzero-length sub-matches, so there can be at most
    1147             :      * end-begin of them.  However, if min is larger than that, we will also
    1148             :      * consider zero-length sub-matches in order to find enough matches.
    1149             :      *
    1150             :      * For convenience, endpts[0] contains the "begin" pointer and we store
    1151             :      * sub-match endpoints in endpts[1..max_matches].
    1152             :      */
    1153         116 :     max_matches = end - begin;
    1154         116 :     if (max_matches > t->max && t->max != DUPINF)
    1155           0 :         max_matches = t->max;
    1156         116 :     if (max_matches < min_matches)
    1157          46 :         max_matches = min_matches;
    1158         116 :     endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
    1159         116 :     if (endpts == NULL)
    1160           0 :         return REG_ESPACE;
    1161         116 :     endpts[0] = begin;
    1162             : 
    1163         116 :     d = getsubdfa(v, t->child);
    1164         116 :     if (ISERR())
    1165             :     {
    1166           0 :         FREE(endpts);
    1167           0 :         return v->err;
    1168             :     }
    1169             : 
    1170             :     /*
    1171             :      * Our strategy is to first find a set of sub-match endpoints that are
    1172             :      * valid according to the child node's DFA, and then recursively dissect
    1173             :      * each sub-match to confirm validity.  If any validity check fails,
    1174             :      * backtrack that sub-match and try again.  And, when we next try for a
    1175             :      * validity check, we need not recheck any successfully verified
    1176             :      * sub-matches that we didn't move the endpoints of.  nverified remembers
    1177             :      * how many sub-matches are currently known okay.
    1178             :      */
    1179             : 
    1180             :     /* initialize to consider first sub-match */
    1181         116 :     nverified = 0;
    1182         116 :     k = 1;
    1183         116 :     limit = end;
    1184             : 
    1185             :     /* iterate until satisfaction or failure */
    1186         632 :     while (k > 0)
    1187             :     {
    1188             :         /* try to find an endpoint for the k'th sub-match */
    1189         552 :         endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
    1190         552 :         if (ISERR())
    1191             :         {
    1192           0 :             FREE(endpts);
    1193           0 :             return v->err;
    1194             :         }
    1195         552 :         if (endpts[k] == NULL)
    1196             :         {
    1197             :             /* no match possible, so see if we can shorten previous one */
    1198         254 :             k--;
    1199         254 :             goto backtrack;
    1200             :         }
    1201             :         MDEBUG(("%d: working endpoint %d: %ld\n",
    1202             :                 t->id, k, LOFF(endpts[k])));
    1203             : 
    1204             :         /* k'th sub-match can no longer be considered verified */
    1205         298 :         if (nverified >= k)
    1206          12 :             nverified = k - 1;
    1207             : 
    1208         298 :         if (endpts[k] != end)
    1209             :         {
    1210             :             /* haven't reached end yet, try another iteration if allowed */
    1211         202 :             if (k >= max_matches)
    1212             :             {
    1213             :                 /* must try to shorten some previous match */
    1214           0 :                 k--;
    1215           0 :                 goto backtrack;
    1216             :             }
    1217             : 
    1218             :             /* reject zero-length match unless necessary to achieve min */
    1219         202 :             if (endpts[k] == endpts[k - 1] &&
    1220           0 :                 (k >= min_matches || min_matches - k < end - endpts[k]))
    1221           0 :                 goto backtrack;
    1222             : 
    1223         202 :             k++;
    1224         202 :             limit = end;
    1225         202 :             continue;
    1226             :         }
    1227             : 
    1228             :         /*
    1229             :          * We've identified a way to divide the string into k sub-matches that
    1230             :          * works so far as the child DFA can tell.  If k is an allowed number
    1231             :          * of matches, start the slow part: recurse to verify each sub-match.
    1232             :          * We always have k <= max_matches, needn't check that.
    1233             :          */
    1234          96 :         if (k < min_matches)
    1235           0 :             goto backtrack;
    1236             : 
    1237             :         MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
    1238             : 
    1239         160 :         for (i = nverified + 1; i <= k; i++)
    1240             :         {
    1241             :             /* zap any match data from a non-last iteration */
    1242         124 :             zaptreesubs(v, t->child);
    1243         124 :             er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
    1244         124 :             if (er == REG_OKAY)
    1245             :             {
    1246          64 :                 nverified = i;
    1247          64 :                 continue;
    1248             :             }
    1249          60 :             if (er == REG_NOMATCH)
    1250          60 :                 break;
    1251             :             /* oops, something failed */
    1252           0 :             FREE(endpts);
    1253           0 :             return er;
    1254             :         }
    1255             : 
    1256          96 :         if (i > k)
    1257             :         {
    1258             :             /* satisfaction */
    1259             :             MDEBUG(("%d: successful\n", t->id));
    1260          36 :             FREE(endpts);
    1261          36 :             return REG_OKAY;
    1262             :         }
    1263             : 
    1264             :         /* i'th match failed to verify, so backtrack it */
    1265          60 :         k = i;
    1266             : 
    1267         314 : backtrack:
    1268             : 
    1269             :         /*
    1270             :          * Must consider shorter versions of the k'th sub-match.  However,
    1271             :          * we'll only ask for a zero-length match if necessary.
    1272             :          */
    1273         314 :         while (k > 0)
    1274             :         {
    1275         234 :             chr        *prev_end = endpts[k - 1];
    1276             : 
    1277         234 :             if (endpts[k] > prev_end)
    1278             :             {
    1279         234 :                 limit = endpts[k] - 1;
    1280         234 :                 if (limit > prev_end ||
    1281           0 :                     (k < min_matches && min_matches - k >= end - prev_end))
    1282             :                 {
    1283             :                     /* break out of backtrack loop, continue the outer one */
    1284             :                     break;
    1285             :                 }
    1286             :             }
    1287             :             /* can't shorten k'th sub-match any more, consider previous one */
    1288           0 :             k--;
    1289             :         }
    1290             :     }
    1291             : 
    1292             :     /* all possibilities exhausted */
    1293          80 :     FREE(endpts);
    1294             : 
    1295             :     /*
    1296             :      * Now consider the possibility that we can match to a zero-length string
    1297             :      * by using zero repetitions.
    1298             :      */
    1299          80 :     if (t->min == 0 && begin == end)
    1300             :     {
    1301             :         MDEBUG(("%d: allowing zero matches\n", t->id));
    1302          38 :         return REG_OKAY;
    1303             :     }
    1304             : 
    1305             :     MDEBUG(("%d: failed\n", t->id));
    1306          42 :     return REG_NOMATCH;
    1307             : }
    1308             : 
    1309             : /*
    1310             :  * creviterdissect - dissect match for iteration node, shortest-first
    1311             :  */
    1312             : static int                      /* regexec return code */
    1313          12 : creviterdissect(struct vars *v,
    1314             :                 struct subre *t,
    1315             :                 chr *begin,     /* beginning of relevant substring */
    1316             :                 chr *end)       /* end of same */
    1317             : {
    1318             :     struct dfa *d;
    1319             :     chr       **endpts;
    1320             :     chr        *limit;
    1321             :     int         min_matches;
    1322             :     size_t      max_matches;
    1323             :     int         nverified;
    1324             :     int         k;
    1325             :     int         i;
    1326             :     int         er;
    1327             : 
    1328             :     assert(t->op == '*');
    1329             :     assert(t->child != NULL && t->child->cnfa.nstates > 0);
    1330             :     assert(t->child->flags & SHORTER);
    1331             :     assert(begin <= end);
    1332             : 
    1333             :     MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
    1334             : 
    1335             :     /*
    1336             :      * If zero matches are allowed, and target string is empty, just declare
    1337             :      * victory.  OTOH, if target string isn't empty, zero matches can't work
    1338             :      * so we pretend the min is 1.
    1339             :      */
    1340          12 :     min_matches = t->min;
    1341          12 :     if (min_matches <= 0)
    1342             :     {
    1343          12 :         if (begin == end)
    1344             :         {
    1345             :             MDEBUG(("%d: allowing zero matches\n", t->id));
    1346           2 :             return REG_OKAY;
    1347             :         }
    1348          10 :         min_matches = 1;
    1349             :     }
    1350             : 
    1351             :     /*
    1352             :      * We need workspace to track the endpoints of each sub-match.  Normally
    1353             :      * we consider only nonzero-length sub-matches, so there can be at most
    1354             :      * end-begin of them.  However, if min is larger than that, we will also
    1355             :      * consider zero-length sub-matches in order to find enough matches.
    1356             :      *
    1357             :      * For convenience, endpts[0] contains the "begin" pointer and we store
    1358             :      * sub-match endpoints in endpts[1..max_matches].
    1359             :      */
    1360          10 :     max_matches = end - begin;
    1361          10 :     if (max_matches > t->max && t->max != DUPINF)
    1362          10 :         max_matches = t->max;
    1363          10 :     if (max_matches < min_matches)
    1364           0 :         max_matches = min_matches;
    1365          10 :     endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
    1366          10 :     if (endpts == NULL)
    1367           0 :         return REG_ESPACE;
    1368          10 :     endpts[0] = begin;
    1369             : 
    1370          10 :     d = getsubdfa(v, t->child);
    1371          10 :     if (ISERR())
    1372             :     {
    1373           0 :         FREE(endpts);
    1374           0 :         return v->err;
    1375             :     }
    1376             : 
    1377             :     /*
    1378             :      * Our strategy is to first find a set of sub-match endpoints that are
    1379             :      * valid according to the child node's DFA, and then recursively dissect
    1380             :      * each sub-match to confirm validity.  If any validity check fails,
    1381             :      * backtrack that sub-match and try again.  And, when we next try for a
    1382             :      * validity check, we need not recheck any successfully verified
    1383             :      * sub-matches that we didn't move the endpoints of.  nverified remembers
    1384             :      * how many sub-matches are currently known okay.
    1385             :      */
    1386             : 
    1387             :     /* initialize to consider first sub-match */
    1388          10 :     nverified = 0;
    1389          10 :     k = 1;
    1390          10 :     limit = begin;
    1391             : 
    1392             :     /* iterate until satisfaction or failure */
    1393          14 :     while (k > 0)
    1394             :     {
    1395             :         /* disallow zero-length match unless necessary to achieve min */
    1396          10 :         if (limit == endpts[k - 1] &&
    1397          10 :             limit != end &&
    1398           0 :             (k >= min_matches || min_matches - k < end - limit))
    1399          10 :             limit++;
    1400             : 
    1401             :         /* if this is the last allowed sub-match, it must reach to the end */
    1402          10 :         if (k >= max_matches)
    1403          10 :             limit = end;
    1404             : 
    1405             :         /* try to find an endpoint for the k'th sub-match */
    1406          10 :         endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
    1407             :                              (chr **) NULL, (int *) NULL);
    1408          10 :         if (ISERR())
    1409             :         {
    1410           0 :             FREE(endpts);
    1411           0 :             return v->err;
    1412             :         }
    1413          10 :         if (endpts[k] == NULL)
    1414             :         {
    1415             :             /* no match possible, so see if we can lengthen previous one */
    1416           0 :             k--;
    1417           0 :             goto backtrack;
    1418             :         }
    1419             :         MDEBUG(("%d: working endpoint %d: %ld\n",
    1420             :                 t->id, k, LOFF(endpts[k])));
    1421             : 
    1422             :         /* k'th sub-match can no longer be considered verified */
    1423          10 :         if (nverified >= k)
    1424           0 :             nverified = k - 1;
    1425             : 
    1426          10 :         if (endpts[k] != end)
    1427             :         {
    1428             :             /* haven't reached end yet, try another iteration if allowed */
    1429           0 :             if (k >= max_matches)
    1430             :             {
    1431             :                 /* must try to lengthen some previous match */
    1432           0 :                 k--;
    1433           0 :                 goto backtrack;
    1434             :             }
    1435             : 
    1436           0 :             k++;
    1437           0 :             limit = endpts[k - 1];
    1438           0 :             continue;
    1439             :         }
    1440             : 
    1441             :         /*
    1442             :          * We've identified a way to divide the string into k sub-matches that
    1443             :          * works so far as the child DFA can tell.  If k is an allowed number
    1444             :          * of matches, start the slow part: recurse to verify each sub-match.
    1445             :          * We always have k <= max_matches, needn't check that.
    1446             :          */
    1447          10 :         if (k < min_matches)
    1448           0 :             goto backtrack;
    1449             : 
    1450             :         MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
    1451             : 
    1452          16 :         for (i = nverified + 1; i <= k; i++)
    1453             :         {
    1454             :             /* zap any match data from a non-last iteration */
    1455          10 :             zaptreesubs(v, t->child);
    1456          10 :             er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
    1457          10 :             if (er == REG_OKAY)
    1458             :             {
    1459           6 :                 nverified = i;
    1460           6 :                 continue;
    1461             :             }
    1462           4 :             if (er == REG_NOMATCH)
    1463           4 :                 break;
    1464             :             /* oops, something failed */
    1465           0 :             FREE(endpts);
    1466           0 :             return er;
    1467             :         }
    1468             : 
    1469          10 :         if (i > k)
    1470             :         {
    1471             :             /* satisfaction */
    1472             :             MDEBUG(("%d: successful\n", t->id));
    1473           6 :             FREE(endpts);
    1474           6 :             return REG_OKAY;
    1475             :         }
    1476             : 
    1477             :         /* i'th match failed to verify, so backtrack it */
    1478           4 :         k = i;
    1479             : 
    1480           4 : backtrack:
    1481             : 
    1482             :         /*
    1483             :          * Must consider longer versions of the k'th sub-match.
    1484             :          */
    1485           8 :         while (k > 0)
    1486             :         {
    1487           4 :             if (endpts[k] < end)
    1488             :             {
    1489           0 :                 limit = endpts[k] + 1;
    1490             :                 /* break out of backtrack loop, continue the outer one */
    1491           0 :                 break;
    1492             :             }
    1493             :             /* can't lengthen k'th sub-match any more, consider previous one */
    1494           4 :             k--;
    1495             :         }
    1496             :     }
    1497             : 
    1498             :     /* all possibilities exhausted */
    1499             :     MDEBUG(("%d: failed\n", t->id));
    1500           4 :     FREE(endpts);
    1501           4 :     return REG_NOMATCH;
    1502             : }
    1503             : 
    1504             : 
    1505             : 
    1506             : #include "rege_dfa.c"

Generated by: LCOV version 1.14