LCOV - code coverage report
Current view: top level - src/backend/regex - rege_dfa.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 302 367 82.3 %
Date: 2020-05-31 22:07:05 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * DFA routines
       3             :  * This file is #included by regexec.c.
       4             :  *
       5             :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
       6             :  *
       7             :  * Development of this software was funded, in part, by Cray Research Inc.,
       8             :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
       9             :  * Corporation, none of whom are responsible for the results.  The author
      10             :  * thanks all of them.
      11             :  *
      12             :  * Redistribution and use in source and binary forms -- with or without
      13             :  * modification -- are permitted for any purpose, provided that
      14             :  * redistributions in source form retain this entire copyright notice and
      15             :  * indicate the origin and nature of any modifications.
      16             :  *
      17             :  * I'd appreciate being given credit for this package in the documentation
      18             :  * of software which uses it, but that is not a requirement.
      19             :  *
      20             :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
      21             :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      22             :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      23             :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      24             :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      25             :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
      26             :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      27             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      28             :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      29             :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             :  *
      31             :  * src/backend/regex/rege_dfa.c
      32             :  *
      33             :  */
      34             : 
      35             : /*
      36             :  * longest - longest-preferred matching engine
      37             :  *
      38             :  * On success, returns match endpoint address.  Returns NULL on no match.
      39             :  * Internal errors also return NULL, with v->err set.
      40             :  */
      41             : static chr *
      42      820032 : longest(struct vars *v,
      43             :         struct dfa *d,
      44             :         chr *start,             /* where the match should start */
      45             :         chr *stop,              /* match must end at or before here */
      46             :         int *hitstopp)          /* record whether hit v->stop, if non-NULL */
      47             : {
      48             :     chr        *cp;
      49      820032 :     chr        *realstop = (stop == v->stop) ? stop : stop + 1;
      50             :     color       co;
      51             :     struct sset *css;
      52             :     struct sset *ss;
      53             :     chr        *post;
      54             :     int         i;
      55      820032 :     struct colormap *cm = d->cm;
      56             : 
      57             :     /* prevent "uninitialized variable" warnings */
      58      820032 :     if (hitstopp != NULL)
      59      805080 :         *hitstopp = 0;
      60             : 
      61             :     /* initialize */
      62      820032 :     css = initialize(v, d, start);
      63      820032 :     if (css == NULL)
      64           0 :         return NULL;
      65      820032 :     cp = start;
      66             : 
      67             :     /* startup */
      68             :     FDEBUG(("+++ startup +++\n"));
      69      820032 :     if (cp == v->start)
      70             :     {
      71        1800 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
      72             :         FDEBUG(("color %ld\n", (long) co));
      73             :     }
      74             :     else
      75             :     {
      76      818232 :         co = GETCOLOR(cm, *(cp - 1));
      77             :         FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
      78             :     }
      79      820032 :     css = miss(v, d, css, co, cp, start);
      80      820032 :     if (css == NULL)
      81         188 :         return NULL;
      82      819844 :     css->lastseen = cp;
      83             : 
      84             :     /*
      85             :      * This is the main text-scanning loop.  It seems worth having two copies
      86             :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
      87             :      * builds, when you're not actively tracing.
      88             :      */
      89             : #ifdef REG_DEBUG
      90             :     if (v->eflags & REG_FTRACE)
      91             :     {
      92             :         while (cp < realstop)
      93             :         {
      94             :             FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
      95             :             co = GETCOLOR(cm, *cp);
      96             :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
      97             :             ss = css->outs[co];
      98             :             if (ss == NULL)
      99             :             {
     100             :                 ss = miss(v, d, css, co, cp + 1, start);
     101             :                 if (ss == NULL)
     102             :                     break;      /* NOTE BREAK OUT */
     103             :             }
     104             :             cp++;
     105             :             ss->lastseen = cp;
     106             :             css = ss;
     107             :         }
     108             :     }
     109             :     else
     110             : #endif
     111             :     {
     112   293967024 :         while (cp < realstop)
     113             :         {
     114   293954514 :             co = GETCOLOR(cm, *cp);
     115   293954514 :             ss = css->outs[co];
     116   293954514 :             if (ss == NULL)
     117             :             {
     118     2511402 :                 ss = miss(v, d, css, co, cp + 1, start);
     119     2511402 :                 if (ss == NULL)
     120      807334 :                     break;      /* NOTE BREAK OUT */
     121             :             }
     122   293147180 :             cp++;
     123   293147180 :             ss->lastseen = cp;
     124   293147180 :             css = ss;
     125             :         }
     126             :     }
     127             : 
     128      819844 :     if (ISERR())
     129           0 :         return NULL;
     130             : 
     131             :     /* shutdown */
     132             :     FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
     133      819844 :     if (cp == v->stop && stop == v->stop)
     134             :     {
     135        4316 :         if (hitstopp != NULL)
     136        1490 :             *hitstopp = 1;
     137        4316 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
     138             :         FDEBUG(("color %ld\n", (long) co));
     139        4316 :         ss = miss(v, d, css, co, cp, start);
     140        4316 :         if (ISERR())
     141           0 :             return NULL;
     142             :         /* special case:  match ended at eol? */
     143        4316 :         if (ss != NULL && (ss->flags & POSTSTATE))
     144        1806 :             return cp;
     145        2510 :         else if (ss != NULL)
     146           0 :             ss->lastseen = cp;   /* to be tidy */
     147             :     }
     148             : 
     149             :     /* find last match, if any */
     150      818038 :     post = d->lastpost;
     151     4183744 :     for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
     152     3365706 :         if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
     153        6688 :             (post == NULL || post < ss->lastseen))
     154      820172 :             post = ss->lastseen;
     155      818038 :     if (post != NULL)           /* found one */
     156      815354 :         return post - 1;
     157             : 
     158        2684 :     return NULL;
     159             : }
     160             : 
     161             : /*
     162             :  * shortest - shortest-preferred matching engine
     163             :  *
     164             :  * On success, returns match endpoint address.  Returns NULL on no match.
     165             :  * Internal errors also return NULL, with v->err set.
     166             :  */
     167             : static chr *
     168     1422424 : shortest(struct vars *v,
     169             :          struct dfa *d,
     170             :          chr *start,            /* where the match should start */
     171             :          chr *min,              /* match must end at or after here */
     172             :          chr *max,              /* match must end at or before here */
     173             :          chr **coldp,           /* store coldstart pointer here, if non-NULL */
     174             :          int *hitstopp)         /* record whether hit v->stop, if non-NULL */
     175             : {
     176             :     chr        *cp;
     177     1422424 :     chr        *realmin = (min == v->stop) ? min : min + 1;
     178     1422424 :     chr        *realmax = (max == v->stop) ? max : max + 1;
     179             :     color       co;
     180             :     struct sset *css;
     181             :     struct sset *ss;
     182     1422424 :     struct colormap *cm = d->cm;
     183             : 
     184             :     /* prevent "uninitialized variable" warnings */
     185     1422424 :     if (coldp != NULL)
     186     1421194 :         *coldp = NULL;
     187     1422424 :     if (hitstopp != NULL)
     188         182 :         *hitstopp = 0;
     189             : 
     190             :     /* initialize */
     191     1422424 :     css = initialize(v, d, start);
     192     1422424 :     if (css == NULL)
     193           0 :         return NULL;
     194     1422424 :     cp = start;
     195             : 
     196             :     /* startup */
     197             :     FDEBUG(("--- startup ---\n"));
     198     1422424 :     if (cp == v->start)
     199             :     {
     200      618424 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
     201             :         FDEBUG(("color %ld\n", (long) co));
     202             :     }
     203             :     else
     204             :     {
     205      804000 :         co = GETCOLOR(cm, *(cp - 1));
     206             :         FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
     207             :     }
     208     1422424 :     css = miss(v, d, css, co, cp, start);
     209     1422424 :     if (css == NULL)
     210           0 :         return NULL;
     211     1422424 :     css->lastseen = cp;
     212     1422424 :     ss = css;
     213             : 
     214             :     /*
     215             :      * This is the main text-scanning loop.  It seems worth having two copies
     216             :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
     217             :      * builds, when you're not actively tracing.
     218             :      */
     219             : #ifdef REG_DEBUG
     220             :     if (v->eflags & REG_FTRACE)
     221             :     {
     222             :         while (cp < realmax)
     223             :         {
     224             :             FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
     225             :             co = GETCOLOR(cm, *cp);
     226             :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
     227             :             ss = css->outs[co];
     228             :             if (ss == NULL)
     229             :             {
     230             :                 ss = miss(v, d, css, co, cp + 1, start);
     231             :                 if (ss == NULL)
     232             :                     break;      /* NOTE BREAK OUT */
     233             :             }
     234             :             cp++;
     235             :             ss->lastseen = cp;
     236             :             css = ss;
     237             :             if ((ss->flags & POSTSTATE) && cp >= realmin)
     238             :                 break;          /* NOTE BREAK OUT */
     239             :         }
     240             :     }
     241             :     else
     242             : #endif
     243             :     {
     244    31864750 :         while (cp < realmax)
     245             :         {
     246    31515316 :             co = GETCOLOR(cm, *cp);
     247    31515316 :             ss = css->outs[co];
     248    31515316 :             if (ss == NULL)
     249             :             {
     250     5021732 :                 ss = miss(v, d, css, co, cp + 1, start);
     251     5021732 :                 if (ss == NULL)
     252       51402 :                     break;      /* NOTE BREAK OUT */
     253             :             }
     254    31463914 :             cp++;
     255    31463914 :             ss->lastseen = cp;
     256    31463914 :             css = ss;
     257    31463914 :             if ((ss->flags & POSTSTATE) && cp >= realmin)
     258     1021588 :                 break;          /* NOTE BREAK OUT */
     259             :         }
     260             :     }
     261             : 
     262     1422424 :     if (ss == NULL)
     263       51402 :         return NULL;
     264             : 
     265     1371022 :     if (coldp != NULL)          /* report last no-progress state set, if any */
     266     1369820 :         *coldp = lastcold(v, d);
     267             : 
     268     1371022 :     if ((ss->flags & POSTSTATE) && cp > min)
     269             :     {
     270             :         assert(cp >= realmin);
     271     1021544 :         cp--;
     272             :     }
     273      349478 :     else if (cp == v->stop && max == v->stop)
     274             :     {
     275      349478 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
     276             :         FDEBUG(("color %ld\n", (long) co));
     277      349478 :         ss = miss(v, d, css, co, cp, start);
     278             :         /* match might have ended at eol */
     279      349478 :         if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
     280           8 :             *hitstopp = 1;
     281             :     }
     282             : 
     283     1371022 :     if (ss == NULL || !(ss->flags & POSTSTATE))
     284      344102 :         return NULL;
     285             : 
     286     1026920 :     return cp;
     287             : }
     288             : 
     289             : /*
     290             :  * matchuntil - incremental matching engine
     291             :  *
     292             :  * This is meant for use with a search-style NFA (that is, the pattern is
     293             :  * known to act as though it had a leading .*).  We determine whether a
     294             :  * match exists starting at v->start and ending at probe.  Multiple calls
     295             :  * require only O(N) time not O(N^2) so long as the probe values are
     296             :  * nondecreasing.  *lastcss and *lastcp must be initialized to NULL before
     297             :  * starting a series of calls.
     298             :  *
     299             :  * Returns 1 if a match exists, 0 if not.
     300             :  * Internal errors also return 0, with v->err set.
     301             :  */
     302             : static int
     303          56 : matchuntil(struct vars *v,
     304             :            struct dfa *d,
     305             :            chr *probe,          /* we want to know if a match ends here */
     306             :            struct sset **lastcss,   /* state storage across calls */
     307             :            chr **lastcp)        /* state storage across calls */
     308             : {
     309          56 :     chr        *cp = *lastcp;
     310             :     color       co;
     311          56 :     struct sset *css = *lastcss;
     312             :     struct sset *ss;
     313          56 :     struct colormap *cm = d->cm;
     314             : 
     315             :     /* initialize and startup, or restart, if necessary */
     316          56 :     if (cp == NULL || cp > probe)
     317             :     {
     318          16 :         cp = v->start;
     319          16 :         css = initialize(v, d, cp);
     320          16 :         if (css == NULL)
     321           0 :             return 0;
     322             : 
     323             :         FDEBUG((">>> startup >>>\n"));
     324          16 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
     325             :         FDEBUG(("color %ld\n", (long) co));
     326             : 
     327          16 :         css = miss(v, d, css, co, cp, v->start);
     328          16 :         if (css == NULL)
     329           0 :             return 0;
     330          16 :         css->lastseen = cp;
     331             :     }
     332          40 :     else if (css == NULL)
     333             :     {
     334             :         /* we previously found that no match is possible beyond *lastcp */
     335           0 :         return 0;
     336             :     }
     337          56 :     ss = css;
     338             : 
     339             :     /*
     340             :      * This is the main text-scanning loop.  It seems worth having two copies
     341             :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
     342             :      * builds, when you're not actively tracing.
     343             :      */
     344             : #ifdef REG_DEBUG
     345             :     if (v->eflags & REG_FTRACE)
     346             :     {
     347             :         while (cp < probe)
     348             :         {
     349             :             FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
     350             :             co = GETCOLOR(cm, *cp);
     351             :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
     352             :             ss = css->outs[co];
     353             :             if (ss == NULL)
     354             :             {
     355             :                 ss = miss(v, d, css, co, cp + 1, v->start);
     356             :                 if (ss == NULL)
     357             :                     break;      /* NOTE BREAK OUT */
     358             :             }
     359             :             cp++;
     360             :             ss->lastseen = cp;
     361             :             css = ss;
     362             :         }
     363             :     }
     364             :     else
     365             : #endif
     366             :     {
     367         120 :         while (cp < probe)
     368             :         {
     369          64 :             co = GETCOLOR(cm, *cp);
     370          64 :             ss = css->outs[co];
     371          64 :             if (ss == NULL)
     372             :             {
     373           8 :                 ss = miss(v, d, css, co, cp + 1, v->start);
     374           8 :                 if (ss == NULL)
     375           0 :                     break;      /* NOTE BREAK OUT */
     376             :             }
     377          64 :             cp++;
     378          64 :             ss->lastseen = cp;
     379          64 :             css = ss;
     380             :         }
     381             :     }
     382             : 
     383          56 :     *lastcss = ss;
     384          56 :     *lastcp = cp;
     385             : 
     386          56 :     if (ss == NULL)
     387           0 :         return 0;               /* impossible match, or internal error */
     388             : 
     389             :     /* We need to process one more chr, or the EOS symbol, to check match */
     390          56 :     if (cp < v->stop)
     391             :     {
     392             :         FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
     393          56 :         co = GETCOLOR(cm, *cp);
     394             :         FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
     395          56 :         ss = css->outs[co];
     396          56 :         if (ss == NULL)
     397          36 :             ss = miss(v, d, css, co, cp + 1, v->start);
     398             :     }
     399             :     else
     400             :     {
     401             :         assert(cp == v->stop);
     402           0 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
     403             :         FDEBUG(("color %ld\n", (long) co));
     404           0 :         ss = miss(v, d, css, co, cp, v->start);
     405             :     }
     406             : 
     407          56 :     if (ss == NULL || !(ss->flags & POSTSTATE))
     408          40 :         return 0;
     409             : 
     410          16 :     return 1;
     411             : }
     412             : 
     413             : /*
     414             :  * lastcold - determine last point at which no progress had been made
     415             :  */
     416             : static chr *                    /* endpoint, or NULL */
     417     1369820 : lastcold(struct vars *v,
     418             :          struct dfa *d)
     419             : {
     420             :     struct sset *ss;
     421             :     chr        *nopr;
     422             :     int         i;
     423             : 
     424     1369820 :     nopr = d->lastnopr;
     425     1369820 :     if (nopr == NULL)
     426     1369820 :         nopr = v->start;
     427     7377018 :     for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
     428     6007198 :         if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
     429     2104166 :             nopr = ss->lastseen;
     430     1369820 :     return nopr;
     431             : }
     432             : 
     433             : /*
     434             :  * newdfa - set up a fresh DFA
     435             :  */
     436             : static struct dfa *
     437     2234582 : newdfa(struct vars *v,
     438             :        struct cnfa *cnfa,
     439             :        struct colormap *cm,
     440             :        struct smalldfa *sml)    /* preallocated space, may be NULL */
     441             : {
     442             :     struct dfa *d;
     443     2234582 :     size_t      nss = cnfa->nstates * 2;
     444     2234582 :     int         wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
     445     2234582 :     struct smalldfa *smallwas = sml;
     446             : 
     447             :     assert(cnfa != NULL && cnfa->nstates != 0);
     448             : 
     449     2234582 :     if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
     450             :     {
     451             :         assert(wordsper == 1);
     452     2158810 :         if (sml == NULL)
     453             :         {
     454        7762 :             sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
     455        7762 :             if (sml == NULL)
     456             :             {
     457           0 :                 ERR(REG_ESPACE);
     458           0 :                 return NULL;
     459             :             }
     460             :         }
     461     2158810 :         d = &sml->dfa;
     462     2158810 :         d->ssets = sml->ssets;
     463     2158810 :         d->statesarea = sml->statesarea;
     464     2158810 :         d->work = &d->statesarea[nss];
     465     2158810 :         d->outsarea = sml->outsarea;
     466     2158810 :         d->incarea = sml->incarea;
     467     2158810 :         d->cptsmalloced = 0;
     468     2158810 :         d->mallocarea = (smallwas == NULL) ? (char *) sml : NULL;
     469             :     }
     470             :     else
     471             :     {
     472       75772 :         d = (struct dfa *) MALLOC(sizeof(struct dfa));
     473       75772 :         if (d == NULL)
     474             :         {
     475           0 :             ERR(REG_ESPACE);
     476           0 :             return NULL;
     477             :         }
     478       75772 :         d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
     479       75772 :         d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
     480             :                                             sizeof(unsigned));
     481       75772 :         d->work = &d->statesarea[nss * wordsper];
     482       75772 :         d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
     483             :                                               sizeof(struct sset *));
     484       75772 :         d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
     485             :                                             sizeof(struct arcp));
     486       75772 :         d->cptsmalloced = 1;
     487       75772 :         d->mallocarea = (char *) d;
     488       75772 :         if (d->ssets == NULL || d->statesarea == NULL ||
     489       75772 :             d->outsarea == NULL || d->incarea == NULL)
     490             :         {
     491           0 :             freedfa(d);
     492           0 :             ERR(REG_ESPACE);
     493           0 :             return NULL;
     494             :         }
     495             :     }
     496             : 
     497     2234582 :     d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
     498     2234582 :     d->nssused = 0;
     499     2234582 :     d->nstates = cnfa->nstates;
     500     2234582 :     d->ncolors = cnfa->ncolors;
     501     2234582 :     d->wordsper = wordsper;
     502     2234582 :     d->cnfa = cnfa;
     503     2234582 :     d->cm = cm;
     504     2234582 :     d->lastpost = NULL;
     505     2234582 :     d->lastnopr = NULL;
     506     2234582 :     d->search = d->ssets;
     507             : 
     508             :     /* initialization of sset fields is done as needed */
     509             : 
     510     2234582 :     return d;
     511             : }
     512             : 
     513             : /*
     514             :  * freedfa - free a DFA
     515             :  */
     516             : static void
     517     2234582 : freedfa(struct dfa *d)
     518             : {
     519     2234582 :     if (d->cptsmalloced)
     520             :     {
     521       75772 :         if (d->ssets != NULL)
     522       75772 :             FREE(d->ssets);
     523       75772 :         if (d->statesarea != NULL)
     524       75772 :             FREE(d->statesarea);
     525       75772 :         if (d->outsarea != NULL)
     526       75772 :             FREE(d->outsarea);
     527       75772 :         if (d->incarea != NULL)
     528       75772 :             FREE(d->incarea);
     529             :     }
     530             : 
     531     2234582 :     if (d->mallocarea != NULL)
     532       83534 :         FREE(d->mallocarea);
     533     2234582 : }
     534             : 
     535             : /*
     536             :  * hash - construct a hash code for a bitvector
     537             :  *
     538             :  * There are probably better ways, but they're more expensive.
     539             :  */
     540             : static unsigned
     541       24050 : hash(unsigned *uv,
     542             :      int n)
     543             : {
     544             :     int         i;
     545             :     unsigned    h;
     546             : 
     547       24050 :     h = 0;
     548      127452 :     for (i = 0; i < n; i++)
     549      103402 :         h ^= uv[i];
     550       24050 :     return h;
     551             : }
     552             : 
     553             : /*
     554             :  * initialize - hand-craft a cache entry for startup, otherwise get ready
     555             :  */
     556             : static struct sset *
     557     2242472 : initialize(struct vars *v,
     558             :            struct dfa *d,
     559             :            chr *start)
     560             : {
     561             :     struct sset *ss;
     562             :     int         i;
     563             : 
     564             :     /* is previous one still there? */
     565     2242472 :     if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
     566        7894 :         ss = &d->ssets[0];
     567             :     else
     568             :     {                           /* no, must (re)build it */
     569     2234578 :         ss = getvacant(v, d, start, start);
     570     2234578 :         if (ss == NULL)
     571           0 :             return NULL;
     572     4469956 :         for (i = 0; i < d->wordsper; i++)
     573     2235378 :             ss->states[i] = 0;
     574     2234578 :         BSET(ss->states, d->cnfa->pre);
     575     2234578 :         ss->hash = HASH(ss->states, d->wordsper);
     576             :         assert(d->cnfa->pre != d->cnfa->post);
     577     2234578 :         ss->flags = STARTER | LOCKED | NOPROGRESS;
     578             :         /* lastseen dealt with below */
     579             :     }
     580             : 
     581     4565280 :     for (i = 0; i < d->nssused; i++)
     582     2322808 :         d->ssets[i].lastseen = NULL;
     583     2242472 :     ss->lastseen = start;        /* maybe untrue, but harmless */
     584     2242472 :     d->lastpost = NULL;
     585     2242472 :     d->lastnopr = NULL;
     586     2242472 :     return ss;
     587             : }
     588             : 
     589             : /*
     590             :  * miss - handle a stateset cache miss
     591             :  *
     592             :  * css is the current stateset, co is the color of the current input character,
     593             :  * cp points to the character after that (which is where we may need to test
     594             :  * LACONs).  start does not affect matching behavior but is needed for pickss'
     595             :  * heuristics about which stateset cache entry to replace.
     596             :  *
     597             :  * Ordinarily, returns the address of the next stateset (the one that is
     598             :  * valid after consuming the input character).  Returns NULL if no valid
     599             :  * NFA states remain, ie we have a certain match failure.
     600             :  * Internal errors also return NULL, with v->err set.
     601             :  */
     602             : static struct sset *
     603    10129444 : miss(struct vars *v,
     604             :      struct dfa *d,
     605             :      struct sset *css,
     606             :      color co,
     607             :      chr *cp,                   /* next chr */
     608             :      chr *start)                /* where the attempt got started */
     609             : {
     610    10129444 :     struct cnfa *cnfa = d->cnfa;
     611             :     int         i;
     612             :     unsigned    h;
     613             :     struct carc *ca;
     614             :     struct sset *p;
     615             :     int         ispost;
     616             :     int         noprogress;
     617             :     int         gotstate;
     618             :     int         dolacons;
     619             :     int         sawlacons;
     620             : 
     621             :     /* for convenience, we can be called even if it might not be a miss */
     622    10129444 :     if (css->outs[co] != NULL)
     623             :     {
     624             :         FDEBUG(("hit\n"));
     625        7416 :         return css->outs[co];
     626             :     }
     627             :     FDEBUG(("miss\n"));
     628             : 
     629             :     /*
     630             :      * Checking for operation cancel in the inner text search loop seems
     631             :      * unduly expensive.  As a compromise, check during cache misses.
     632             :      */
     633    10122028 :     if (CANCEL_REQUESTED(v->re))
     634             :     {
     635           0 :         ERR(REG_CANCEL);
     636           0 :         return NULL;
     637             :     }
     638             : 
     639             :     /*
     640             :      * What set of states would we end up in after consuming the co character?
     641             :      * We first consider PLAIN arcs that consume the character, and then look
     642             :      * to see what LACON arcs could be traversed after consuming it.
     643             :      */
     644    20326600 :     for (i = 0; i < d->wordsper; i++)
     645    10204572 :         d->work[i] = 0;          /* build new stateset bitmap in d->work */
     646    10122028 :     ispost = 0;
     647    10122028 :     noprogress = 1;
     648    10122028 :     gotstate = 0;
     649    70776786 :     for (i = 0; i < d->nstates; i++)
     650    60654758 :         if (ISBSET(css->states, i))
     651   122713254 :             for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
     652   104609632 :                 if (ca->co == co)
     653             :                 {
     654    19036492 :                     BSET(d->work, ca->to);
     655    19036492 :                     gotstate = 1;
     656    19036492 :                     if (ca->to == cnfa->post)
     657     1851604 :                         ispost = 1;
     658    19036492 :                     if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
     659     5922362 :                         noprogress = 0;
     660             :                     FDEBUG(("%d -> %d\n", i, ca->to));
     661             :                 }
     662    10122028 :     if (!gotstate)
     663     1205536 :         return NULL;            /* character cannot reach any new state */
     664     8916492 :     dolacons = (cnfa->flags & HASLACONS);
     665     8916492 :     sawlacons = 0;
     666             :     /* outer loop handles transitive closure of reachable-by-LACON states */
     667     8917508 :     while (dolacons)
     668             :     {
     669        1016 :         dolacons = 0;
     670        9612 :         for (i = 0; i < d->nstates; i++)
     671        8596 :             if (ISBSET(d->work, i))
     672        3520 :                 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
     673             :                 {
     674        2292 :                     if (ca->co < cnfa->ncolors)
     675        2132 :                         continue;   /* not a LACON arc */
     676         160 :                     if (ISBSET(d->work, ca->to))
     677          52 :                         continue;   /* arc would be a no-op anyway */
     678         108 :                     sawlacons = 1;  /* this LACON affects our result */
     679         108 :                     if (!lacon(v, cnfa, cp, ca->co))
     680             :                     {
     681          64 :                         if (ISERR())
     682           0 :                             return NULL;
     683          64 :                         continue;   /* LACON arc cannot be traversed */
     684             :                     }
     685          44 :                     if (ISERR())
     686           0 :                         return NULL;
     687          44 :                     BSET(d->work, ca->to);
     688          44 :                     dolacons = 1;
     689          44 :                     if (ca->to == cnfa->post)
     690           0 :                         ispost = 1;
     691          44 :                     if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
     692          44 :                         noprogress = 0;
     693             :                     FDEBUG(("%d :> %d\n", i, ca->to));
     694             :                 }
     695             :     }
     696     8916492 :     h = HASH(d->work, d->wordsper);
     697             : 
     698             :     /* Is this stateset already in the cache? */
     699    31102624 :     for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
     700    23903356 :         if (HIT(h, d->work, p, d->wordsper))
     701             :         {
     702             :             FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
     703             :             break;              /* NOTE BREAK OUT */
     704             :         }
     705     8916492 :     if (i == 0)
     706             :     {                           /* nope, need a new cache entry */
     707     7199268 :         p = getvacant(v, d, cp, start);
     708     7199268 :         if (p == NULL)
     709           0 :             return NULL;
     710             :         assert(p != css);
     711    14471200 :         for (i = 0; i < d->wordsper; i++)
     712     7271932 :             p->states[i] = d->work[i];
     713     7199268 :         p->hash = h;
     714     7199268 :         p->flags = (ispost) ? POSTSTATE : 0;
     715     7199268 :         if (noprogress)
     716     2234662 :             p->flags |= NOPROGRESS;
     717             :         /* lastseen to be dealt with by caller */
     718             :     }
     719             : 
     720             :     /*
     721             :      * Link new stateset to old, unless a LACON affected the result, in which
     722             :      * case we don't create the link.  That forces future transitions across
     723             :      * this same arc (same prior stateset and character color) to come through
     724             :      * miss() again, so that we can recheck the LACON(s), which might or might
     725             :      * not pass since context will be different.
     726             :      */
     727     8916492 :     if (!sawlacons)
     728             :     {
     729             :         FDEBUG(("c%d[%d]->c%d\n",
     730             :                 (int) (css - d->ssets), co, (int) (p - d->ssets)));
     731     8916412 :         css->outs[co] = p;
     732     8916412 :         css->inchain[co] = p->ins;
     733     8916412 :         p->ins.ss = css;
     734     8916412 :         p->ins.co = co;
     735             :     }
     736     8916492 :     return p;
     737             : }
     738             : 
     739             : /*
     740             :  * lacon - lookaround-constraint checker for miss()
     741             :  */
     742             : static int                      /* predicate:  constraint satisfied? */
     743         108 : lacon(struct vars *v,
     744             :       struct cnfa *pcnfa,       /* parent cnfa */
     745             :       chr *cp,
     746             :       color co)                 /* "color" of the lookaround constraint */
     747             : {
     748             :     int         n;
     749             :     struct subre *sub;
     750             :     struct dfa *d;
     751             :     chr        *end;
     752             :     int         satisfied;
     753             : 
     754             :     /* Since this is recursive, it could be driven to stack overflow */
     755         108 :     if (STACK_TOO_DEEP(v->re))
     756             :     {
     757           0 :         ERR(REG_ETOOBIG);
     758           0 :         return 0;
     759             :     }
     760             : 
     761         108 :     n = co - pcnfa->ncolors;
     762             :     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
     763             :     FDEBUG(("=== testing lacon %d\n", n));
     764         108 :     sub = &v->g->lacons[n];
     765         108 :     d = getladfa(v, n);
     766         108 :     if (d == NULL)
     767           0 :         return 0;
     768         108 :     if (LATYPE_IS_AHEAD(sub->subno))
     769             :     {
     770             :         /* used to use longest() here, but shortest() could be much cheaper */
     771          52 :         end = shortest(v, d, cp, cp, v->stop,
     772             :                        (chr **) NULL, (int *) NULL);
     773          52 :         satisfied = LATYPE_IS_POS(sub->subno) ? (end != NULL) : (end == NULL);
     774             :     }
     775             :     else
     776             :     {
     777             :         /*
     778             :          * To avoid doing O(N^2) work when repeatedly testing a lookbehind
     779             :          * constraint in an N-character string, we use matchuntil() which can
     780             :          * cache the DFA state across calls.  We only need to restart if the
     781             :          * probe point decreases, which is not common.  The NFA we're using is
     782             :          * a search NFA, so it doesn't mind scanning over stuff before the
     783             :          * nominal match.
     784             :          */
     785          56 :         satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
     786          56 :         if (!LATYPE_IS_POS(sub->subno))
     787           0 :             satisfied = !satisfied;
     788             :     }
     789             :     FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
     790         108 :     return satisfied;
     791             : }
     792             : 
     793             : /*
     794             :  * getvacant - get a vacant state set
     795             :  *
     796             :  * This routine clears out the inarcs and outarcs, but does not otherwise
     797             :  * clear the innards of the state set -- that's up to the caller.
     798             :  */
     799             : static struct sset *
     800     9433846 : getvacant(struct vars *v,
     801             :           struct dfa *d,
     802             :           chr *cp,
     803             :           chr *start)
     804             : {
     805             :     int         i;
     806             :     struct sset *ss;
     807             :     struct sset *p;
     808             :     struct arcp ap;
     809             :     color       co;
     810             : 
     811     9433846 :     ss = pickss(v, d, cp, start);
     812     9433846 :     if (ss == NULL)
     813           0 :         return NULL;
     814             :     assert(!(ss->flags & LOCKED));
     815             : 
     816             :     /* clear out its inarcs, including self-referential ones */
     817     9433846 :     ap = ss->ins;
     818     9433846 :     while ((p = ap.ss) != NULL)
     819             :     {
     820           0 :         co = ap.co;
     821             :         FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
     822           0 :         p->outs[co] = NULL;
     823           0 :         ap = p->inchain[co];
     824           0 :         p->inchain[co].ss = NULL;    /* paranoia */
     825             :     }
     826     9433846 :     ss->ins.ss = NULL;
     827             : 
     828             :     /* take it off the inarc chains of the ssets reached by its outarcs */
     829    78709634 :     for (i = 0; i < d->ncolors; i++)
     830             :     {
     831    69275788 :         p = ss->outs[i];
     832             :         assert(p != ss);        /* not self-referential */
     833    69275788 :         if (p == NULL)
     834    69275788 :             continue;           /* NOTE CONTINUE */
     835             :         FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
     836           0 :         if (p->ins.ss == ss && p->ins.co == i)
     837           0 :             p->ins = ss->inchain[i];
     838             :         else
     839             :         {
     840           0 :             struct arcp lastap = {NULL, 0};
     841             : 
     842             :             assert(p->ins.ss != NULL);
     843           0 :             for (ap = p->ins; ap.ss != NULL &&
     844           0 :                  !(ap.ss == ss && ap.co == i);
     845           0 :                  ap = ap.ss->inchain[ap.co])
     846           0 :                 lastap = ap;
     847             :             assert(ap.ss != NULL);
     848           0 :             lastap.ss->inchain[lastap.co] = ss->inchain[i];
     849             :         }
     850           0 :         ss->outs[i] = NULL;
     851           0 :         ss->inchain[i].ss = NULL;
     852             :     }
     853             : 
     854             :     /* if ss was a success state, may need to remember location */
     855     9433846 :     if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
     856           0 :         (d->lastpost == NULL || d->lastpost < ss->lastseen))
     857           0 :         d->lastpost = ss->lastseen;
     858             : 
     859             :     /* likewise for a no-progress state */
     860     9433846 :     if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
     861           0 :         (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
     862           0 :         d->lastnopr = ss->lastseen;
     863             : 
     864     9433846 :     return ss;
     865             : }
     866             : 
     867             : /*
     868             :  * pickss - pick the next stateset to be used
     869             :  */
     870             : static struct sset *
     871     9433846 : pickss(struct vars *v,
     872             :        struct dfa *d,
     873             :        chr *cp,
     874             :        chr *start)
     875             : {
     876             :     int         i;
     877             :     struct sset *ss;
     878             :     struct sset *end;
     879             :     chr        *ancient;
     880             : 
     881             :     /* shortcut for cases where cache isn't full */
     882     9433846 :     if (d->nssused < d->nssets)
     883             :     {
     884     9433846 :         i = d->nssused;
     885     9433846 :         d->nssused++;
     886     9433846 :         ss = &d->ssets[i];
     887             :         FDEBUG(("new c%d\n", i));
     888             :         /* set up innards */
     889     9433846 :         ss->states = &d->statesarea[i * d->wordsper];
     890     9433846 :         ss->flags = 0;
     891     9433846 :         ss->ins.ss = NULL;
     892     9433846 :         ss->ins.co = WHITE;      /* give it some value */
     893     9433846 :         ss->outs = &d->outsarea[i * d->ncolors];
     894     9433846 :         ss->inchain = &d->incarea[i * d->ncolors];
     895    78709634 :         for (i = 0; i < d->ncolors; i++)
     896             :         {
     897    69275788 :             ss->outs[i] = NULL;
     898    69275788 :             ss->inchain[i].ss = NULL;
     899             :         }
     900     9433846 :         return ss;
     901             :     }
     902             : 
     903             :     /* look for oldest, or old enough anyway */
     904           0 :     if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
     905           0 :         ancient = cp - d->nssets * 2 / 3;
     906             :     else
     907           0 :         ancient = start;
     908           0 :     for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
     909           0 :         if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
     910           0 :             !(ss->flags & LOCKED))
     911             :         {
     912           0 :             d->search = ss + 1;
     913             :             FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
     914           0 :             return ss;
     915             :         }
     916           0 :     for (ss = d->ssets, end = d->search; ss < end; ss++)
     917           0 :         if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
     918           0 :             !(ss->flags & LOCKED))
     919             :         {
     920           0 :             d->search = ss + 1;
     921             :             FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
     922           0 :             return ss;
     923             :         }
     924             : 
     925             :     /* nobody's old enough?!? -- something's really wrong */
     926             :     FDEBUG(("cannot find victim to replace!\n"));
     927           0 :     ERR(REG_ASSERT);
     928           0 :     return NULL;
     929             : }

Generated by: LCOV version 1.13