LCOV - code coverage report
Current view: top level - src/backend/regex - rege_dfa.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 302 367 82.3 %
Date: 2019-11-15 22:06:47 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * DFA routines
       3             :  * This file is #included by regexec.c.
       4             :  *
       5             :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
       6             :  *
       7             :  * Development of this software was funded, in part, by Cray Research Inc.,
       8             :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
       9             :  * Corporation, none of whom are responsible for the results.  The author
      10             :  * thanks all of them.
      11             :  *
      12             :  * Redistribution and use in source and binary forms -- with or without
      13             :  * modification -- are permitted for any purpose, provided that
      14             :  * redistributions in source form retain this entire copyright notice and
      15             :  * indicate the origin and nature of any modifications.
      16             :  *
      17             :  * I'd appreciate being given credit for this package in the documentation
      18             :  * of software which uses it, but that is not a requirement.
      19             :  *
      20             :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
      21             :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      22             :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      23             :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      24             :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      25             :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
      26             :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      27             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      28             :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      29             :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             :  *
      31             :  * src/backend/regex/rege_dfa.c
      32             :  *
      33             :  */
      34             : 
      35             : /*
      36             :  * longest - longest-preferred matching engine
      37             :  *
      38             :  * On success, returns match endpoint address.  Returns NULL on no match.
      39             :  * Internal errors also return NULL, with v->err set.
      40             :  */
      41             : static chr *
      42      814338 : longest(struct vars *v,
      43             :         struct dfa *d,
      44             :         chr *start,             /* where the match should start */
      45             :         chr *stop,              /* match must end at or before here */
      46             :         int *hitstopp)          /* record whether hit v->stop, if non-NULL */
      47             : {
      48             :     chr        *cp;
      49      814338 :     chr        *realstop = (stop == v->stop) ? stop : stop + 1;
      50             :     color       co;
      51             :     struct sset *css;
      52             :     struct sset *ss;
      53             :     chr        *post;
      54             :     int         i;
      55      814338 :     struct colormap *cm = d->cm;
      56             : 
      57             :     /* prevent "uninitialized variable" warnings */
      58      814338 :     if (hitstopp != NULL)
      59      802938 :         *hitstopp = 0;
      60             : 
      61             :     /* initialize */
      62      814338 :     css = initialize(v, d, start);
      63      814338 :     if (css == NULL)
      64           0 :         return NULL;
      65      814338 :     cp = start;
      66             : 
      67             :     /* startup */
      68             :     FDEBUG(("+++ startup +++\n"));
      69      814338 :     if (cp == v->start)
      70             :     {
      71        1764 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
      72             :         FDEBUG(("color %ld\n", (long) co));
      73             :     }
      74             :     else
      75             :     {
      76      812574 :         co = GETCOLOR(cm, *(cp - 1));
      77             :         FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
      78             :     }
      79      814338 :     css = miss(v, d, css, co, cp, start);
      80      814338 :     if (css == NULL)
      81         188 :         return NULL;
      82      814150 :     css->lastseen = cp;
      83             : 
      84             :     /*
      85             :      * This is the main text-scanning loop.  It seems worth having two copies
      86             :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
      87             :      * builds, when you're not actively tracing.
      88             :      */
      89             : #ifdef REG_DEBUG
      90             :     if (v->eflags & REG_FTRACE)
      91             :     {
      92             :         while (cp < realstop)
      93             :         {
      94             :             FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
      95             :             co = GETCOLOR(cm, *cp);
      96             :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
      97             :             ss = css->outs[co];
      98             :             if (ss == NULL)
      99             :             {
     100             :                 ss = miss(v, d, css, co, cp + 1, start);
     101             :                 if (ss == NULL)
     102             :                     break;      /* NOTE BREAK OUT */
     103             :             }
     104             :             cp++;
     105             :             ss->lastseen = cp;
     106             :             css = ss;
     107             :         }
     108             :     }
     109             :     else
     110             : #endif
     111             :     {
     112   294671334 :         while (cp < realstop)
     113             :         {
     114   293847832 :             co = GETCOLOR(cm, *cp);
     115   293847832 :             ss = css->outs[co];
     116   293847832 :             if (ss == NULL)
     117             :             {
     118     2466960 :                 ss = miss(v, d, css, co, cp + 1, start);
     119     2466960 :                 if (ss == NULL)
     120      804798 :                     break;      /* NOTE BREAK OUT */
     121             :             }
     122   293043034 :             cp++;
     123   293043034 :             ss->lastseen = cp;
     124   293043034 :             css = ss;
     125             :         }
     126             :     }
     127             : 
     128      814150 :     if (ISERR())
     129           0 :         return NULL;
     130             : 
     131             :     /* shutdown */
     132             :     FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
     133      814150 :     if (cp == v->stop && stop == v->stop)
     134             :     {
     135        3822 :         if (hitstopp != NULL)
     136         996 :             *hitstopp = 1;
     137        3822 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
     138             :         FDEBUG(("color %ld\n", (long) co));
     139        3822 :         ss = miss(v, d, css, co, cp, start);
     140        3822 :         if (ISERR())
     141           0 :             return NULL;
     142             :         /* special case:  match ended at eol? */
     143        3822 :         if (ss != NULL && (ss->flags & POSTSTATE))
     144        1778 :             return cp;
     145        2044 :         else if (ss != NULL)
     146           0 :             ss->lastseen = cp;   /* to be tidy */
     147             :     }
     148             : 
     149             :     /* find last match, if any */
     150      812372 :     post = d->lastpost;
     151     4139714 :     for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
     152     3327342 :         if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
     153        3988 :             (post == NULL || post < ss->lastseen))
     154      811806 :             post = ss->lastseen;
     155      812372 :     if (post != NULL)           /* found one */
     156      809688 :         return post - 1;
     157             : 
     158        2684 :     return NULL;
     159             : }
     160             : 
     161             : /*
     162             :  * shortest - shortest-preferred matching engine
     163             :  *
     164             :  * On success, returns match endpoint address.  Returns NULL on no match.
     165             :  * Internal errors also return NULL, with v->err set.
     166             :  */
     167             : static chr *
     168     1411560 : shortest(struct vars *v,
     169             :          struct dfa *d,
     170             :          chr *start,            /* where the match should start */
     171             :          chr *min,              /* match must end at or after here */
     172             :          chr *max,              /* match must end at or before here */
     173             :          chr **coldp,           /* store coldstart pointer here, if non-NULL */
     174             :          int *hitstopp)         /* record whether hit v->stop, if non-NULL */
     175             : {
     176             :     chr        *cp;
     177     1411560 :     chr        *realmin = (min == v->stop) ? min : min + 1;
     178     1411560 :     chr        *realmax = (max == v->stop) ? max : max + 1;
     179             :     color       co;
     180             :     struct sset *css;
     181             :     struct sset *ss;
     182     1411560 :     struct colormap *cm = d->cm;
     183             : 
     184             :     /* prevent "uninitialized variable" warnings */
     185     1411560 :     if (coldp != NULL)
     186     1410330 :         *coldp = NULL;
     187     1411560 :     if (hitstopp != NULL)
     188         182 :         *hitstopp = 0;
     189             : 
     190             :     /* initialize */
     191     1411560 :     css = initialize(v, d, start);
     192     1411560 :     if (css == NULL)
     193           0 :         return NULL;
     194     1411560 :     cp = start;
     195             : 
     196             :     /* startup */
     197             :     FDEBUG(("--- startup ---\n"));
     198     1411560 :     if (cp == v->start)
     199             :     {
     200      609222 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
     201             :         FDEBUG(("color %ld\n", (long) co));
     202             :     }
     203             :     else
     204             :     {
     205      802338 :         co = GETCOLOR(cm, *(cp - 1));
     206             :         FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
     207             :     }
     208     1411560 :     css = miss(v, d, css, co, cp, start);
     209     1411560 :     if (css == NULL)
     210           0 :         return NULL;
     211     1411560 :     css->lastseen = cp;
     212     1411560 :     ss = css;
     213             : 
     214             :     /*
     215             :      * This is the main text-scanning loop.  It seems worth having two copies
     216             :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
     217             :      * builds, when you're not actively tracing.
     218             :      */
     219             : #ifdef REG_DEBUG
     220             :     if (v->eflags & REG_FTRACE)
     221             :     {
     222             :         while (cp < realmax)
     223             :         {
     224             :             FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
     225             :             co = GETCOLOR(cm, *cp);
     226             :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
     227             :             ss = css->outs[co];
     228             :             if (ss == NULL)
     229             :             {
     230             :                 ss = miss(v, d, css, co, cp + 1, start);
     231             :                 if (ss == NULL)
     232             :                     break;      /* NOTE BREAK OUT */
     233             :             }
     234             :             cp++;
     235             :             ss->lastseen = cp;
     236             :             css = ss;
     237             :             if ((ss->flags & POSTSTATE) && cp >= realmin)
     238             :                 break;          /* NOTE BREAK OUT */
     239             :         }
     240             :     }
     241             :     else
     242             : #endif
     243             :     {
     244    32891792 :         while (cp < realmax)
     245             :         {
     246    31133582 :             co = GETCOLOR(cm, *cp);
     247    31133582 :             ss = css->outs[co];
     248    31133582 :             if (ss == NULL)
     249             :             {
     250     4973336 :                 ss = miss(v, d, css, co, cp + 1, start);
     251     4973336 :                 if (ss == NULL)
     252       47566 :                     break;      /* NOTE BREAK OUT */
     253             :             }
     254    31086016 :             cp++;
     255    31086016 :             ss->lastseen = cp;
     256    31086016 :             css = ss;
     257    31086016 :             if ((ss->flags & POSTSTATE) && cp >= realmin)
     258     1017344 :                 break;          /* NOTE BREAK OUT */
     259             :         }
     260             :     }
     261             : 
     262     1411560 :     if (ss == NULL)
     263       47566 :         return NULL;
     264             : 
     265     1363994 :     if (coldp != NULL)          /* report last no-progress state set, if any */
     266     1362792 :         *coldp = lastcold(v, d);
     267             : 
     268     1363994 :     if ((ss->flags & POSTSTATE) && cp > min)
     269             :     {
     270             :         assert(cp >= realmin);
     271     1017300 :         cp--;
     272             :     }
     273      346694 :     else if (cp == v->stop && max == v->stop)
     274             :     {
     275      346694 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
     276             :         FDEBUG(("color %ld\n", (long) co));
     277      346694 :         ss = miss(v, d, css, co, cp, start);
     278             :         /* match might have ended at eol */
     279      346694 :         if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
     280           8 :             *hitstopp = 1;
     281             :     }
     282             : 
     283     1363994 :     if (ss == NULL || !(ss->flags & POSTSTATE))
     284      341676 :         return NULL;
     285             : 
     286     1022318 :     return cp;
     287             : }
     288             : 
     289             : /*
     290             :  * matchuntil - incremental matching engine
     291             :  *
     292             :  * This is meant for use with a search-style NFA (that is, the pattern is
     293             :  * known to act as though it had a leading .*).  We determine whether a
     294             :  * match exists starting at v->start and ending at probe.  Multiple calls
     295             :  * require only O(N) time not O(N^2) so long as the probe values are
     296             :  * nondecreasing.  *lastcss and *lastcp must be initialized to NULL before
     297             :  * starting a series of calls.
     298             :  *
     299             :  * Returns 1 if a match exists, 0 if not.
     300             :  * Internal errors also return 0, with v->err set.
     301             :  */
     302             : static int
     303          56 : matchuntil(struct vars *v,
     304             :            struct dfa *d,
     305             :            chr *probe,          /* we want to know if a match ends here */
     306             :            struct sset **lastcss,   /* state storage across calls */
     307             :            chr **lastcp)        /* state storage across calls */
     308             : {
     309          56 :     chr        *cp = *lastcp;
     310             :     color       co;
     311          56 :     struct sset *css = *lastcss;
     312             :     struct sset *ss;
     313          56 :     struct colormap *cm = d->cm;
     314             : 
     315             :     /* initialize and startup, or restart, if necessary */
     316          56 :     if (cp == NULL || cp > probe)
     317             :     {
     318          16 :         cp = v->start;
     319          16 :         css = initialize(v, d, cp);
     320          16 :         if (css == NULL)
     321           0 :             return 0;
     322             : 
     323             :         FDEBUG((">>> startup >>>\n"));
     324          16 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
     325             :         FDEBUG(("color %ld\n", (long) co));
     326             : 
     327          16 :         css = miss(v, d, css, co, cp, v->start);
     328          16 :         if (css == NULL)
     329           0 :             return 0;
     330          16 :         css->lastseen = cp;
     331             :     }
     332          40 :     else if (css == NULL)
     333             :     {
     334             :         /* we previously found that no match is possible beyond *lastcp */
     335           0 :         return 0;
     336             :     }
     337          56 :     ss = css;
     338             : 
     339             :     /*
     340             :      * This is the main text-scanning loop.  It seems worth having two copies
     341             :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
     342             :      * builds, when you're not actively tracing.
     343             :      */
     344             : #ifdef REG_DEBUG
     345             :     if (v->eflags & REG_FTRACE)
     346             :     {
     347             :         while (cp < probe)
     348             :         {
     349             :             FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
     350             :             co = GETCOLOR(cm, *cp);
     351             :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
     352             :             ss = css->outs[co];
     353             :             if (ss == NULL)
     354             :             {
     355             :                 ss = miss(v, d, css, co, cp + 1, v->start);
     356             :                 if (ss == NULL)
     357             :                     break;      /* NOTE BREAK OUT */
     358             :             }
     359             :             cp++;
     360             :             ss->lastseen = cp;
     361             :             css = ss;
     362             :         }
     363             :     }
     364             :     else
     365             : #endif
     366             :     {
     367         176 :         while (cp < probe)
     368             :         {
     369          64 :             co = GETCOLOR(cm, *cp);
     370          64 :             ss = css->outs[co];
     371          64 :             if (ss == NULL)
     372             :             {
     373           8 :                 ss = miss(v, d, css, co, cp + 1, v->start);
     374           8 :                 if (ss == NULL)
     375           0 :                     break;      /* NOTE BREAK OUT */
     376             :             }
     377          64 :             cp++;
     378          64 :             ss->lastseen = cp;
     379          64 :             css = ss;
     380             :         }
     381             :     }
     382             : 
     383          56 :     *lastcss = ss;
     384          56 :     *lastcp = cp;
     385             : 
     386          56 :     if (ss == NULL)
     387           0 :         return 0;               /* impossible match, or internal error */
     388             : 
     389             :     /* We need to process one more chr, or the EOS symbol, to check match */
     390          56 :     if (cp < v->stop)
     391             :     {
     392             :         FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
     393          56 :         co = GETCOLOR(cm, *cp);
     394             :         FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
     395          56 :         ss = css->outs[co];
     396          56 :         if (ss == NULL)
     397          36 :             ss = miss(v, d, css, co, cp + 1, v->start);
     398             :     }
     399             :     else
     400             :     {
     401             :         assert(cp == v->stop);
     402           0 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
     403             :         FDEBUG(("color %ld\n", (long) co));
     404           0 :         ss = miss(v, d, css, co, cp, v->start);
     405             :     }
     406             : 
     407          56 :     if (ss == NULL || !(ss->flags & POSTSTATE))
     408          40 :         return 0;
     409             : 
     410          16 :     return 1;
     411             : }
     412             : 
     413             : /*
     414             :  * lastcold - determine last point at which no progress had been made
     415             :  */
     416             : static chr *                    /* endpoint, or NULL */
     417     1362792 : lastcold(struct vars *v,
     418             :          struct dfa *d)
     419             : {
     420             :     struct sset *ss;
     421             :     chr        *nopr;
     422             :     int         i;
     423             : 
     424     1362792 :     nopr = d->lastnopr;
     425     1362792 :     if (nopr == NULL)
     426     1362792 :         nopr = v->start;
     427     7331298 :     for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
     428     5968506 :         if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
     429     2098258 :             nopr = ss->lastseen;
     430     1362792 :     return nopr;
     431             : }
     432             : 
     433             : /*
     434             :  * newdfa - set up a fresh DFA
     435             :  */
     436             : static struct dfa *
     437     2218024 : newdfa(struct vars *v,
     438             :        struct cnfa *cnfa,
     439             :        struct colormap *cm,
     440             :        struct smalldfa *sml)    /* preallocated space, may be NULL */
     441             : {
     442             :     struct dfa *d;
     443     2218024 :     size_t      nss = cnfa->nstates * 2;
     444     2218024 :     int         wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
     445     2218024 :     struct smalldfa *smallwas = sml;
     446             : 
     447             :     assert(cnfa != NULL && cnfa->nstates != 0);
     448             : 
     449     2218024 :     if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
     450             :     {
     451             :         assert(wordsper == 1);
     452     2147412 :         if (sml == NULL)
     453             :         {
     454        4210 :             sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
     455        4210 :             if (sml == NULL)
     456             :             {
     457           0 :                 ERR(REG_ESPACE);
     458           0 :                 return NULL;
     459             :             }
     460             :         }
     461     2147412 :         d = &sml->dfa;
     462     2147412 :         d->ssets = sml->ssets;
     463     2147412 :         d->statesarea = sml->statesarea;
     464     2147412 :         d->work = &d->statesarea[nss];
     465     2147412 :         d->outsarea = sml->outsarea;
     466     2147412 :         d->incarea = sml->incarea;
     467     2147412 :         d->cptsmalloced = 0;
     468     2147412 :         d->mallocarea = (smallwas == NULL) ? (char *) sml : NULL;
     469             :     }
     470             :     else
     471             :     {
     472       70612 :         d = (struct dfa *) MALLOC(sizeof(struct dfa));
     473       70612 :         if (d == NULL)
     474             :         {
     475           0 :             ERR(REG_ESPACE);
     476           0 :             return NULL;
     477             :         }
     478       70612 :         d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
     479       70612 :         d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
     480             :                                             sizeof(unsigned));
     481       70612 :         d->work = &d->statesarea[nss * wordsper];
     482       70612 :         d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
     483             :                                               sizeof(struct sset *));
     484       70612 :         d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
     485             :                                             sizeof(struct arcp));
     486       70612 :         d->cptsmalloced = 1;
     487       70612 :         d->mallocarea = (char *) d;
     488      141224 :         if (d->ssets == NULL || d->statesarea == NULL ||
     489      141224 :             d->outsarea == NULL || d->incarea == NULL)
     490             :         {
     491           0 :             freedfa(d);
     492           0 :             ERR(REG_ESPACE);
     493           0 :             return NULL;
     494             :         }
     495             :     }
     496             : 
     497     2218024 :     d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
     498     2218024 :     d->nssused = 0;
     499     2218024 :     d->nstates = cnfa->nstates;
     500     2218024 :     d->ncolors = cnfa->ncolors;
     501     2218024 :     d->wordsper = wordsper;
     502     2218024 :     d->cnfa = cnfa;
     503     2218024 :     d->cm = cm;
     504     2218024 :     d->lastpost = NULL;
     505     2218024 :     d->lastnopr = NULL;
     506     2218024 :     d->search = d->ssets;
     507             : 
     508             :     /* initialization of sset fields is done as needed */
     509             : 
     510     2218024 :     return d;
     511             : }
     512             : 
     513             : /*
     514             :  * freedfa - free a DFA
     515             :  */
     516             : static void
     517     2218024 : freedfa(struct dfa *d)
     518             : {
     519     2218024 :     if (d->cptsmalloced)
     520             :     {
     521       70612 :         if (d->ssets != NULL)
     522       70612 :             FREE(d->ssets);
     523       70612 :         if (d->statesarea != NULL)
     524       70612 :             FREE(d->statesarea);
     525       70612 :         if (d->outsarea != NULL)
     526       70612 :             FREE(d->outsarea);
     527       70612 :         if (d->incarea != NULL)
     528       70612 :             FREE(d->incarea);
     529             :     }
     530             : 
     531     2218024 :     if (d->mallocarea != NULL)
     532       74822 :         FREE(d->mallocarea);
     533     2218024 : }
     534             : 
     535             : /*
     536             :  * hash - construct a hash code for a bitvector
     537             :  *
     538             :  * There are probably better ways, but they're more expensive.
     539             :  */
     540             : static unsigned
     541       25746 : hash(unsigned *uv,
     542             :      int n)
     543             : {
     544             :     int         i;
     545             :     unsigned    h;
     546             : 
     547       25746 :     h = 0;
     548      134456 :     for (i = 0; i < n; i++)
     549      108710 :         h ^= uv[i];
     550       25746 :     return h;
     551             : }
     552             : 
     553             : /*
     554             :  * initialize - hand-craft a cache entry for startup, otherwise get ready
     555             :  */
     556             : static struct sset *
     557     2225914 : initialize(struct vars *v,
     558             :            struct dfa *d,
     559             :            chr *start)
     560             : {
     561             :     struct sset *ss;
     562             :     int         i;
     563             : 
     564             :     /* is previous one still there? */
     565     2225914 :     if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
     566        7894 :         ss = &d->ssets[0];
     567             :     else
     568             :     {                           /* no, must (re)build it */
     569     2218020 :         ss = getvacant(v, d, start, start);
     570     2218020 :         if (ss == NULL)
     571           0 :             return NULL;
     572     4436900 :         for (i = 0; i < d->wordsper; i++)
     573     2218880 :             ss->states[i] = 0;
     574     2218020 :         BSET(ss->states, d->cnfa->pre);
     575     2218020 :         ss->hash = HASH(ss->states, d->wordsper);
     576             :         assert(d->cnfa->pre != d->cnfa->post);
     577     2218020 :         ss->flags = STARTER | LOCKED | NOPROGRESS;
     578             :         /* lastseen dealt with below */
     579             :     }
     580             : 
     581     4532164 :     for (i = 0; i < d->nssused; i++)
     582     2306250 :         d->ssets[i].lastseen = NULL;
     583     2225914 :     ss->lastseen = start;        /* maybe untrue, but harmless */
     584     2225914 :     d->lastpost = NULL;
     585     2225914 :     d->lastnopr = NULL;
     586     2225914 :     return ss;
     587             : }
     588             : 
     589             : /*
     590             :  * miss - handle a stateset cache miss
     591             :  *
     592             :  * css is the current stateset, co is the color of the current input character,
     593             :  * cp points to the character after that (which is where we may need to test
     594             :  * LACONs).  start does not affect matching behavior but is needed for pickss'
     595             :  * heuristics about which stateset cache entry to replace.
     596             :  *
     597             :  * Ordinarily, returns the address of the next stateset (the one that is
     598             :  * valid after consuming the input character).  Returns NULL if no valid
     599             :  * NFA states remain, ie we have a certain match failure.
     600             :  * Internal errors also return NULL, with v->err set.
     601             :  */
     602             : static struct sset *
     603    10016770 : miss(struct vars *v,
     604             :      struct dfa *d,
     605             :      struct sset *css,
     606             :      color co,
     607             :      chr *cp,                   /* next chr */
     608             :      chr *start)                /* where the attempt got started */
     609             : {
     610    10016770 :     struct cnfa *cnfa = d->cnfa;
     611             :     int         i;
     612             :     unsigned    h;
     613             :     struct carc *ca;
     614             :     struct sset *p;
     615             :     int         ispost;
     616             :     int         noprogress;
     617             :     int         gotstate;
     618             :     int         dolacons;
     619             :     int         sawlacons;
     620             : 
     621             :     /* for convenience, we can be called even if it might not be a miss */
     622    10016770 :     if (css->outs[co] != NULL)
     623             :     {
     624             :         FDEBUG(("hit\n"));
     625        7416 :         return css->outs[co];
     626             :     }
     627             :     FDEBUG(("miss\n"));
     628             : 
     629             :     /*
     630             :      * Checking for operation cancel in the inner text search loop seems
     631             :      * unduly expensive.  As a compromise, check during cache misses.
     632             :      */
     633    10009354 :     if (CANCEL_REQUESTED(v->re))
     634             :     {
     635           0 :         ERR(REG_CANCEL);
     636           0 :         return NULL;
     637             :     }
     638             : 
     639             :     /*
     640             :      * What set of states would we end up in after consuming the co character?
     641             :      * We first consider PLAIN arcs that consume the character, and then look
     642             :      * to see what LACON arcs could be traversed after consuming it.
     643             :      */
     644    20104868 :     for (i = 0; i < d->wordsper; i++)
     645    10095514 :         d->work[i] = 0;          /* build new stateset bitmap in d->work */
     646    10009354 :     ispost = 0;
     647    10009354 :     noprogress = 1;
     648    10009354 :     gotstate = 0;
     649    69652608 :     for (i = 0; i < d->nstates; i++)
     650    59643254 :         if (ISBSET(css->states, i))
     651   122138662 :             for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
     652   104202334 :                 if (ca->co == co)
     653             :                 {
     654    18864104 :                     BSET(d->work, ca->to);
     655    18864104 :                     gotstate = 1;
     656    18864104 :                     if (ca->to == cnfa->post)
     657     1836212 :                         ispost = 1;
     658    18864104 :                     if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
     659     5839402 :                         noprogress = 0;
     660             :                     FDEBUG(("%d -> %d\n", i, ca->to));
     661             :                 }
     662    10009354 :     if (!gotstate)
     663     1196272 :         return NULL;            /* character cannot reach any new state */
     664     8813082 :     dolacons = (cnfa->flags & HASLACONS);
     665     8813082 :     sawlacons = 0;
     666             :     /* outer loop handles transitive closure of reachable-by-LACON states */
     667    17627176 :     while (dolacons)
     668             :     {
     669        1012 :         dolacons = 0;
     670        9572 :         for (i = 0; i < d->nstates; i++)
     671        8560 :             if (ISBSET(d->work, i))
     672        3512 :                 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
     673             :                 {
     674        2288 :                     if (ca->co < cnfa->ncolors)
     675        2128 :                         continue;   /* not a LACON arc */
     676         160 :                     if (ISBSET(d->work, ca->to))
     677          52 :                         continue;   /* arc would be a no-op anyway */
     678         108 :                     sawlacons = 1;  /* this LACON affects our result */
     679         108 :                     if (!lacon(v, cnfa, cp, ca->co))
     680             :                     {
     681          64 :                         if (ISERR())
     682           0 :                             return NULL;
     683          64 :                         continue;   /* LACON arc cannot be traversed */
     684             :                     }
     685          44 :                     if (ISERR())
     686           0 :                         return NULL;
     687          44 :                     BSET(d->work, ca->to);
     688          44 :                     dolacons = 1;
     689          44 :                     if (ca->to == cnfa->post)
     690           0 :                         ispost = 1;
     691          44 :                     if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
     692          44 :                         noprogress = 0;
     693             :                     FDEBUG(("%d :> %d\n", i, ca->to));
     694             :                 }
     695             :     }
     696     8813082 :     h = HASH(d->work, d->wordsper);
     697             : 
     698             :     /* Is this stateset already in the cache? */
     699    30635012 :     for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
     700    23506114 :         if (HIT(h, d->work, p, d->wordsper))
     701             :         {
     702             :             FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
     703             :             break;              /* NOTE BREAK OUT */
     704             :         }
     705     8813082 :     if (i == 0)
     706             :     {                           /* nope, need a new cache entry */
     707     7128898 :         p = getvacant(v, d, cp, start);
     708     7128898 :         if (p == NULL)
     709           0 :             return NULL;
     710             :         assert(p != css);
     711    14332764 :         for (i = 0; i < d->wordsper; i++)
     712     7203866 :             p->states[i] = d->work[i];
     713     7128898 :         p->hash = h;
     714     7128898 :         p->flags = (ispost) ? POSTSTATE : 0;
     715     7128898 :         if (noprogress)
     716     2218112 :             p->flags |= NOPROGRESS;
     717             :         /* lastseen to be dealt with by caller */
     718             :     }
     719             : 
     720             :     /*
     721             :      * Link new stateset to old, unless a LACON affected the result, in which
     722             :      * case we don't create the link.  That forces future transitions across
     723             :      * this same arc (same prior stateset and character color) to come through
     724             :      * miss() again, so that we can recheck the LACON(s), which might or might
     725             :      * not pass since context will be different.
     726             :      */
     727     8813082 :     if (!sawlacons)
     728             :     {
     729             :         FDEBUG(("c%d[%d]->c%d\n",
     730             :                 (int) (css - d->ssets), co, (int) (p - d->ssets)));
     731     8813002 :         css->outs[co] = p;
     732     8813002 :         css->inchain[co] = p->ins;
     733     8813002 :         p->ins.ss = css;
     734     8813002 :         p->ins.co = co;
     735             :     }
     736     8813082 :     return p;
     737             : }
     738             : 
     739             : /*
     740             :  * lacon - lookaround-constraint checker for miss()
     741             :  */
     742             : static int                      /* predicate:  constraint satisfied? */
     743         108 : lacon(struct vars *v,
     744             :       struct cnfa *pcnfa,       /* parent cnfa */
     745             :       chr *cp,
     746             :       color co)                 /* "color" of the lookaround constraint */
     747             : {
     748             :     int         n;
     749             :     struct subre *sub;
     750             :     struct dfa *d;
     751             :     chr        *end;
     752             :     int         satisfied;
     753             : 
     754             :     /* Since this is recursive, it could be driven to stack overflow */
     755         108 :     if (STACK_TOO_DEEP(v->re))
     756             :     {
     757           0 :         ERR(REG_ETOOBIG);
     758           0 :         return 0;
     759             :     }
     760             : 
     761         108 :     n = co - pcnfa->ncolors;
     762             :     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
     763             :     FDEBUG(("=== testing lacon %d\n", n));
     764         108 :     sub = &v->g->lacons[n];
     765         108 :     d = getladfa(v, n);
     766         108 :     if (d == NULL)
     767           0 :         return 0;
     768         108 :     if (LATYPE_IS_AHEAD(sub->subno))
     769             :     {
     770             :         /* used to use longest() here, but shortest() could be much cheaper */
     771          52 :         end = shortest(v, d, cp, cp, v->stop,
     772             :                        (chr **) NULL, (int *) NULL);
     773          52 :         satisfied = LATYPE_IS_POS(sub->subno) ? (end != NULL) : (end == NULL);
     774             :     }
     775             :     else
     776             :     {
     777             :         /*
     778             :          * To avoid doing O(N^2) work when repeatedly testing a lookbehind
     779             :          * constraint in an N-character string, we use matchuntil() which can
     780             :          * cache the DFA state across calls.  We only need to restart if the
     781             :          * probe point decreases, which is not common.  The NFA we're using is
     782             :          * a search NFA, so it doesn't mind scanning over stuff before the
     783             :          * nominal match.
     784             :          */
     785          56 :         satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
     786          56 :         if (!LATYPE_IS_POS(sub->subno))
     787           0 :             satisfied = !satisfied;
     788             :     }
     789             :     FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
     790         108 :     return satisfied;
     791             : }
     792             : 
     793             : /*
     794             :  * getvacant - get a vacant state set
     795             :  *
     796             :  * This routine clears out the inarcs and outarcs, but does not otherwise
     797             :  * clear the innards of the state set -- that's up to the caller.
     798             :  */
     799             : static struct sset *
     800     9346918 : getvacant(struct vars *v,
     801             :           struct dfa *d,
     802             :           chr *cp,
     803             :           chr *start)
     804             : {
     805             :     int         i;
     806             :     struct sset *ss;
     807             :     struct sset *p;
     808             :     struct arcp ap;
     809             :     color       co;
     810             : 
     811     9346918 :     ss = pickss(v, d, cp, start);
     812     9346918 :     if (ss == NULL)
     813           0 :         return NULL;
     814             :     assert(!(ss->flags & LOCKED));
     815             : 
     816             :     /* clear out its inarcs, including self-referential ones */
     817     9346918 :     ap = ss->ins;
     818    18693836 :     while ((p = ap.ss) != NULL)
     819             :     {
     820           0 :         co = ap.co;
     821             :         FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
     822           0 :         p->outs[co] = NULL;
     823           0 :         ap = p->inchain[co];
     824           0 :         p->inchain[co].ss = NULL;    /* paranoia */
     825             :     }
     826     9346918 :     ss->ins.ss = NULL;
     827             : 
     828             :     /* take it off the inarc chains of the ssets reached by its outarcs */
     829    77708496 :     for (i = 0; i < d->ncolors; i++)
     830             :     {
     831    68361578 :         p = ss->outs[i];
     832             :         assert(p != ss);        /* not self-referential */
     833    68361578 :         if (p == NULL)
     834    68361578 :             continue;           /* NOTE CONTINUE */
     835             :         FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
     836           0 :         if (p->ins.ss == ss && p->ins.co == i)
     837           0 :             p->ins = ss->inchain[i];
     838             :         else
     839             :         {
     840           0 :             struct arcp lastap = {NULL, 0};
     841             : 
     842             :             assert(p->ins.ss != NULL);
     843           0 :             for (ap = p->ins; ap.ss != NULL &&
     844           0 :                  !(ap.ss == ss && ap.co == i);
     845           0 :                  ap = ap.ss->inchain[ap.co])
     846           0 :                 lastap = ap;
     847             :             assert(ap.ss != NULL);
     848           0 :             lastap.ss->inchain[lastap.co] = ss->inchain[i];
     849             :         }
     850           0 :         ss->outs[i] = NULL;
     851           0 :         ss->inchain[i].ss = NULL;
     852             :     }
     853             : 
     854             :     /* if ss was a success state, may need to remember location */
     855     9346918 :     if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
     856           0 :         (d->lastpost == NULL || d->lastpost < ss->lastseen))
     857           0 :         d->lastpost = ss->lastseen;
     858             : 
     859             :     /* likewise for a no-progress state */
     860     9346918 :     if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
     861           0 :         (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
     862           0 :         d->lastnopr = ss->lastseen;
     863             : 
     864     9346918 :     return ss;
     865             : }
     866             : 
     867             : /*
     868             :  * pickss - pick the next stateset to be used
     869             :  */
     870             : static struct sset *
     871     9346918 : pickss(struct vars *v,
     872             :        struct dfa *d,
     873             :        chr *cp,
     874             :        chr *start)
     875             : {
     876             :     int         i;
     877             :     struct sset *ss;
     878             :     struct sset *end;
     879             :     chr        *ancient;
     880             : 
     881             :     /* shortcut for cases where cache isn't full */
     882     9346918 :     if (d->nssused < d->nssets)
     883             :     {
     884     9346918 :         i = d->nssused;
     885     9346918 :         d->nssused++;
     886     9346918 :         ss = &d->ssets[i];
     887             :         FDEBUG(("new c%d\n", i));
     888             :         /* set up innards */
     889     9346918 :         ss->states = &d->statesarea[i * d->wordsper];
     890     9346918 :         ss->flags = 0;
     891     9346918 :         ss->ins.ss = NULL;
     892     9346918 :         ss->ins.co = WHITE;      /* give it some value */
     893     9346918 :         ss->outs = &d->outsarea[i * d->ncolors];
     894     9346918 :         ss->inchain = &d->incarea[i * d->ncolors];
     895    77708496 :         for (i = 0; i < d->ncolors; i++)
     896             :         {
     897    68361578 :             ss->outs[i] = NULL;
     898    68361578 :             ss->inchain[i].ss = NULL;
     899             :         }
     900     9346918 :         return ss;
     901             :     }
     902             : 
     903             :     /* look for oldest, or old enough anyway */
     904           0 :     if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
     905           0 :         ancient = cp - d->nssets * 2 / 3;
     906             :     else
     907           0 :         ancient = start;
     908           0 :     for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
     909           0 :         if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
     910           0 :             !(ss->flags & LOCKED))
     911             :         {
     912           0 :             d->search = ss + 1;
     913             :             FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
     914           0 :             return ss;
     915             :         }
     916           0 :     for (ss = d->ssets, end = d->search; ss < end; ss++)
     917           0 :         if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
     918           0 :             !(ss->flags & LOCKED))
     919             :         {
     920           0 :             d->search = ss + 1;
     921             :             FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
     922           0 :             return ss;
     923             :         }
     924             : 
     925             :     /* nobody's old enough?!? -- something's really wrong */
     926             :     FDEBUG(("cannot find victim to replace!\n"));
     927           0 :     ERR(REG_ASSERT);
     928           0 :     return NULL;
     929             : }

Generated by: LCOV version 1.13