LCOV - code coverage report
Current view: top level - src/backend/regex - regc_nfa.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 1119 1225 91.3 %
Date: 2024-03-29 12:13:04 Functions: 60 61 98.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * NFA utilities.
       3             :  * This file is #included by regcomp.c.
       4             :  *
       5             :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
       6             :  *
       7             :  * Development of this software was funded, in part, by Cray Research Inc.,
       8             :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
       9             :  * Corporation, none of whom are responsible for the results.  The author
      10             :  * thanks all of them.
      11             :  *
      12             :  * Redistribution and use in source and binary forms -- with or without
      13             :  * modification -- are permitted for any purpose, provided that
      14             :  * redistributions in source form retain this entire copyright notice and
      15             :  * indicate the origin and nature of any modifications.
      16             :  *
      17             :  * I'd appreciate being given credit for this package in the documentation
      18             :  * of software which uses it, but that is not a requirement.
      19             :  *
      20             :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
      21             :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      22             :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      23             :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      24             :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      25             :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
      26             :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      27             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      28             :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      29             :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             :  *
      31             :  * src/backend/regex/regc_nfa.c
      32             :  *
      33             :  *
      34             :  * One or two things that technically ought to be in here
      35             :  * are actually in color.c, thanks to some incestuous relationships in
      36             :  * the color chains.
      37             :  */
      38             : 
      39             : #define NISERR()    VISERR(nfa->v)
      40             : #define NERR(e)     VERR(nfa->v, (e))
      41             : 
      42             : 
      43             : /*
      44             :  * newnfa - set up an NFA
      45             :  */
      46             : static struct nfa *             /* the NFA, or NULL */
      47       17948 : newnfa(struct vars *v,
      48             :        struct colormap *cm,
      49             :        struct nfa *parent)      /* NULL if primary NFA */
      50             : {
      51             :     struct nfa *nfa;
      52             : 
      53       17948 :     nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
      54       17948 :     if (nfa == NULL)
      55             :     {
      56           0 :         ERR(REG_ESPACE);
      57           0 :         return NULL;
      58             :     }
      59             : 
      60             :     /* Make the NFA minimally valid, so freenfa() will behave sanely */
      61       17948 :     nfa->states = NULL;
      62       17948 :     nfa->slast = NULL;
      63       17948 :     nfa->freestates = NULL;
      64       17948 :     nfa->freearcs = NULL;
      65       17948 :     nfa->lastsb = NULL;
      66       17948 :     nfa->lastab = NULL;
      67       17948 :     nfa->lastsbused = 0;
      68       17948 :     nfa->lastabused = 0;
      69       17948 :     nfa->nstates = 0;
      70       17948 :     nfa->cm = cm;
      71       17948 :     nfa->v = v;
      72       17948 :     nfa->bos[0] = nfa->bos[1] = COLORLESS;
      73       17948 :     nfa->eos[0] = nfa->eos[1] = COLORLESS;
      74       17948 :     nfa->flags = 0;
      75       17948 :     nfa->minmatchall = nfa->maxmatchall = -1;
      76       17948 :     nfa->parent = parent;        /* Precedes newfstate so parent is valid. */
      77             : 
      78             :     /* Create required infrastructure */
      79       17948 :     nfa->post = newfstate(nfa, '@'); /* number 0 */
      80       17948 :     nfa->pre = newfstate(nfa, '>'); /* number 1 */
      81       17948 :     nfa->init = newstate(nfa);   /* may become invalid later */
      82       17948 :     nfa->final = newstate(nfa);
      83       17948 :     if (ISERR())
      84             :     {
      85           0 :         freenfa(nfa);
      86           0 :         return NULL;
      87             :     }
      88       17948 :     rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
      89       17948 :     newarc(nfa, '^', 1, nfa->pre, nfa->init);
      90       17948 :     newarc(nfa, '^', 0, nfa->pre, nfa->init);
      91       17948 :     rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
      92       17948 :     newarc(nfa, '$', 1, nfa->final, nfa->post);
      93       17948 :     newarc(nfa, '$', 0, nfa->final, nfa->post);
      94             : 
      95       17948 :     if (ISERR())
      96             :     {
      97           0 :         freenfa(nfa);
      98           0 :         return NULL;
      99             :     }
     100       17948 :     return nfa;
     101             : }
     102             : 
     103             : /*
     104             :  * freenfa - free an entire NFA
     105             :  */
     106             : static void
     107       17948 : freenfa(struct nfa *nfa)
     108             : {
     109             :     struct statebatch *sb;
     110             :     struct statebatch *sbnext;
     111             :     struct arcbatch *ab;
     112             :     struct arcbatch *abnext;
     113             : 
     114       37568 :     for (sb = nfa->lastsb; sb != NULL; sb = sbnext)
     115             :     {
     116       19620 :         sbnext = sb->next;
     117       19620 :         nfa->v->spaceused -= STATEBATCHSIZE(sb->nstates);
     118       19620 :         FREE(sb);
     119             :     }
     120       17948 :     nfa->lastsb = NULL;
     121       52078 :     for (ab = nfa->lastab; ab != NULL; ab = abnext)
     122             :     {
     123       34130 :         abnext = ab->next;
     124       34130 :         nfa->v->spaceused -= ARCBATCHSIZE(ab->narcs);
     125       34130 :         FREE(ab);
     126             :     }
     127       17948 :     nfa->lastab = NULL;
     128             : 
     129       17948 :     nfa->nstates = -1;
     130       17948 :     FREE(nfa);
     131       17948 : }
     132             : 
     133             : /*
     134             :  * newstate - allocate an NFA state, with zero flag value
     135             :  */
     136             : static struct state *           /* NULL on error */
     137      490536 : newstate(struct nfa *nfa)
     138             : {
     139             :     struct state *s;
     140             : 
     141             :     /*
     142             :      * This is a handy place to check for operation cancel during regex
     143             :      * compilation, since no code path will go very long without making a new
     144             :      * state or arc.
     145             :      */
     146      490536 :     INTERRUPT(nfa->v->re);
     147             : 
     148             :     /* first, recycle anything that's on the freelist */
     149      490536 :     if (nfa->freestates != NULL)
     150             :     {
     151       32678 :         s = nfa->freestates;
     152       32678 :         nfa->freestates = s->next;
     153             :     }
     154             :     /* otherwise, is there anything left in the last statebatch? */
     155      457858 :     else if (nfa->lastsb != NULL && nfa->lastsbused < nfa->lastsb->nstates)
     156             :     {
     157      438238 :         s = &nfa->lastsb->s[nfa->lastsbused++];
     158             :     }
     159             :     /* otherwise, need to allocate a new statebatch */
     160             :     else
     161             :     {
     162             :         struct statebatch *newSb;
     163             :         size_t      nstates;
     164             : 
     165       19620 :         if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
     166             :         {
     167           0 :             NERR(REG_ETOOBIG);
     168           0 :             return NULL;
     169             :         }
     170       19620 :         nstates = (nfa->lastsb != NULL) ? nfa->lastsb->nstates * 2 : FIRSTSBSIZE;
     171       19620 :         if (nstates > MAXSBSIZE)
     172          50 :             nstates = MAXSBSIZE;
     173       19620 :         newSb = (struct statebatch *) MALLOC(STATEBATCHSIZE(nstates));
     174       19620 :         if (newSb == NULL)
     175             :         {
     176           0 :             NERR(REG_ESPACE);
     177           0 :             return NULL;
     178             :         }
     179       19620 :         nfa->v->spaceused += STATEBATCHSIZE(nstates);
     180       19620 :         newSb->nstates = nstates;
     181       19620 :         newSb->next = nfa->lastsb;
     182       19620 :         nfa->lastsb = newSb;
     183       19620 :         nfa->lastsbused = 1;
     184       19620 :         s = &newSb->s[0];
     185             :     }
     186             : 
     187             :     assert(nfa->nstates >= 0);
     188      490536 :     s->no = nfa->nstates++;
     189      490536 :     s->flag = 0;
     190      490536 :     if (nfa->states == NULL)
     191       17948 :         nfa->states = s;
     192      490536 :     s->nins = 0;
     193      490536 :     s->ins = NULL;
     194      490536 :     s->nouts = 0;
     195      490536 :     s->outs = NULL;
     196      490536 :     s->tmp = NULL;
     197      490536 :     s->next = NULL;
     198      490536 :     if (nfa->slast != NULL)
     199             :     {
     200             :         assert(nfa->slast->next == NULL);
     201      472588 :         nfa->slast->next = s;
     202             :     }
     203      490536 :     s->prev = nfa->slast;
     204      490536 :     nfa->slast = s;
     205      490536 :     return s;
     206             : }
     207             : 
     208             : /*
     209             :  * newfstate - allocate an NFA state with a specified flag value
     210             :  */
     211             : static struct state *           /* NULL on error */
     212       35896 : newfstate(struct nfa *nfa, int flag)
     213             : {
     214             :     struct state *s;
     215             : 
     216       35896 :     s = newstate(nfa);
     217       35896 :     if (s != NULL)
     218       35896 :         s->flag = (char) flag;
     219       35896 :     return s;
     220             : }
     221             : 
     222             : /*
     223             :  * dropstate - delete a state's inarcs and outarcs and free it
     224             :  */
     225             : static void
     226      206840 : dropstate(struct nfa *nfa,
     227             :           struct state *s)
     228             : {
     229             :     struct arc *a;
     230             : 
     231      243600 :     while ((a = s->ins) != NULL)
     232       36760 :         freearc(nfa, a);
     233      342280 :     while ((a = s->outs) != NULL)
     234      135440 :         freearc(nfa, a);
     235      206840 :     freestate(nfa, s);
     236      206840 : }
     237             : 
     238             : /*
     239             :  * freestate - free a state, which has no in-arcs or out-arcs
     240             :  */
     241             : static void
     242      216052 : freestate(struct nfa *nfa,
     243             :           struct state *s)
     244             : {
     245             :     assert(s != NULL);
     246             :     assert(s->nins == 0 && s->nouts == 0);
     247             : 
     248      216052 :     s->no = FREESTATE;
     249      216052 :     s->flag = 0;
     250      216052 :     if (s->next != NULL)
     251      204684 :         s->next->prev = s->prev;
     252             :     else
     253             :     {
     254             :         assert(s == nfa->slast);
     255       11368 :         nfa->slast = s->prev;
     256             :     }
     257      216052 :     if (s->prev != NULL)
     258      216052 :         s->prev->next = s->next;
     259             :     else
     260             :     {
     261             :         assert(s == nfa->states);
     262           0 :         nfa->states = s->next;
     263             :     }
     264      216052 :     s->prev = NULL;
     265      216052 :     s->next = nfa->freestates;    /* don't delete it, put it on the free list */
     266      216052 :     nfa->freestates = s;
     267      216052 : }
     268             : 
     269             : /*
     270             :  * newarc - set up a new arc within an NFA
     271             :  *
     272             :  * This function checks to make sure that no duplicate arcs are created.
     273             :  * In general we never want duplicates.
     274             :  *
     275             :  * However: in principle, a RAINBOW arc is redundant with any plain arc
     276             :  * (unless that arc is for a pseudocolor).  But we don't try to recognize
     277             :  * that redundancy, either here or in allied operations such as moveins().
     278             :  * The pseudocolor consideration makes that more costly than it seems worth.
     279             :  */
     280             : static void
     281     1849136 : newarc(struct nfa *nfa,
     282             :        int t,
     283             :        color co,
     284             :        struct state *from,
     285             :        struct state *to)
     286             : {
     287             :     struct arc *a;
     288             : 
     289             :     assert(from != NULL && to != NULL);
     290             : 
     291             :     /*
     292             :      * This is a handy place to check for operation cancel during regex
     293             :      * compilation, since no code path will go very long without making a new
     294             :      * state or arc.
     295             :      */
     296     1849136 :     INTERRUPT(nfa->v->re);
     297             : 
     298             :     /* check for duplicate arc, using whichever chain is shorter */
     299     1849136 :     if (from->nouts <= to->nins)
     300             :     {
     301     5941868 :         for (a = from->outs; a != NULL; a = a->outchain)
     302     5087324 :             if (a->to == to && a->co == co && a->type == t)
     303      169920 :                 return;
     304             :     }
     305             :     else
     306             :     {
     307    76761904 :         for (a = to->ins; a != NULL; a = a->inchain)
     308    76108296 :             if (a->from == from && a->co == co && a->type == t)
     309      171064 :                 return;
     310             :     }
     311             : 
     312             :     /* no dup, so create the arc */
     313     1508152 :     createarc(nfa, t, co, from, to);
     314             : }
     315             : 
     316             : /*
     317             :  * createarc - create a new arc within an NFA
     318             :  *
     319             :  * This function must *only* be used after verifying that there is no existing
     320             :  * identical arc (same type/color/from/to).
     321             :  */
     322             : static void
     323    17382594 : createarc(struct nfa *nfa,
     324             :           int t,
     325             :           color co,
     326             :           struct state *from,
     327             :           struct state *to)
     328             : {
     329             :     struct arc *a;
     330             : 
     331    17382594 :     a = allocarc(nfa);
     332    17382594 :     if (NISERR())
     333       11418 :         return;
     334             :     assert(a != NULL);
     335             : 
     336    17371176 :     a->type = t;
     337    17371176 :     a->co = co;
     338    17371176 :     a->to = to;
     339    17371176 :     a->from = from;
     340             : 
     341             :     /*
     342             :      * Put the new arc on the beginning, not the end, of the chains; it's
     343             :      * simpler here, and freearc() is the same cost either way.  See also the
     344             :      * logic in moveins() and its cohorts, as well as fixempties().
     345             :      */
     346    17371176 :     a->inchain = to->ins;
     347    17371176 :     a->inchainRev = NULL;
     348    17371176 :     if (to->ins)
     349    16789084 :         to->ins->inchainRev = a;
     350    17371176 :     to->ins = a;
     351    17371176 :     a->outchain = from->outs;
     352    17371176 :     a->outchainRev = NULL;
     353    17371176 :     if (from->outs)
     354    16865498 :         from->outs->outchainRev = a;
     355    17371176 :     from->outs = a;
     356             : 
     357    17371176 :     from->nouts++;
     358    17371176 :     to->nins++;
     359             : 
     360    17371176 :     if (COLORED(a) && nfa->parent == NULL)
     361     1577056 :         colorchain(nfa->cm, a);
     362             : }
     363             : 
     364             : /*
     365             :  * allocarc - allocate a new arc within an NFA
     366             :  */
     367             : static struct arc *             /* NULL for failure */
     368    17382594 : allocarc(struct nfa *nfa)
     369             : {
     370             :     struct arc *a;
     371             : 
     372             :     /* first, recycle anything that's on the freelist */
     373    17382594 :     if (nfa->freearcs != NULL)
     374             :     {
     375     1356950 :         a = nfa->freearcs;
     376     1356950 :         nfa->freearcs = a->freechain;
     377             :     }
     378             :     /* otherwise, is there anything left in the last arcbatch? */
     379    16025644 :     else if (nfa->lastab != NULL && nfa->lastabused < nfa->lastab->narcs)
     380             :     {
     381    15980096 :         a = &nfa->lastab->a[nfa->lastabused++];
     382             :     }
     383             :     /* otherwise, need to allocate a new arcbatch */
     384             :     else
     385             :     {
     386             :         struct arcbatch *newAb;
     387             :         size_t      narcs;
     388             : 
     389       45548 :         if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
     390             :         {
     391       11418 :             NERR(REG_ETOOBIG);
     392       11418 :             return NULL;
     393             :         }
     394       34130 :         narcs = (nfa->lastab != NULL) ? nfa->lastab->narcs * 2 : FIRSTABSIZE;
     395       34130 :         if (narcs > MAXABSIZE)
     396       15058 :             narcs = MAXABSIZE;
     397       34130 :         newAb = (struct arcbatch *) MALLOC(ARCBATCHSIZE(narcs));
     398       34130 :         if (newAb == NULL)
     399             :         {
     400           0 :             NERR(REG_ESPACE);
     401           0 :             return NULL;
     402             :         }
     403       34130 :         nfa->v->spaceused += ARCBATCHSIZE(narcs);
     404       34130 :         newAb->narcs = narcs;
     405       34130 :         newAb->next = nfa->lastab;
     406       34130 :         nfa->lastab = newAb;
     407       34130 :         nfa->lastabused = 1;
     408       34130 :         a = &newAb->a[0];
     409             :     }
     410             : 
     411    17371176 :     return a;
     412             : }
     413             : 
     414             : /*
     415             :  * freearc - free an arc
     416             :  */
     417             : static void
     418     1569736 : freearc(struct nfa *nfa,
     419             :         struct arc *victim)
     420             : {
     421     1569736 :     struct state *from = victim->from;
     422     1569736 :     struct state *to = victim->to;
     423             :     struct arc *predecessor;
     424             : 
     425             :     assert(victim->type != 0);
     426             : 
     427             :     /* take it off color chain if necessary */
     428     1569736 :     if (COLORED(victim) && nfa->parent == NULL)
     429      728076 :         uncolorchain(nfa->cm, victim);
     430             : 
     431             :     /* take it off source's out-chain */
     432             :     assert(from != NULL);
     433     1569736 :     predecessor = victim->outchainRev;
     434     1569736 :     if (predecessor == NULL)
     435             :     {
     436             :         assert(from->outs == victim);
     437      449342 :         from->outs = victim->outchain;
     438             :     }
     439             :     else
     440             :     {
     441             :         assert(predecessor->outchain == victim);
     442     1120394 :         predecessor->outchain = victim->outchain;
     443             :     }
     444     1569736 :     if (victim->outchain != NULL)
     445             :     {
     446             :         assert(victim->outchain->outchainRev == victim);
     447     1009158 :         victim->outchain->outchainRev = predecessor;
     448             :     }
     449     1569736 :     from->nouts--;
     450             : 
     451             :     /* take it off target's in-chain */
     452             :     assert(to != NULL);
     453     1569736 :     predecessor = victim->inchainRev;
     454     1569736 :     if (predecessor == NULL)
     455             :     {
     456             :         assert(to->ins == victim);
     457      752852 :         to->ins = victim->inchain;
     458             :     }
     459             :     else
     460             :     {
     461             :         assert(predecessor->inchain == victim);
     462      816884 :         predecessor->inchain = victim->inchain;
     463             :     }
     464     1569736 :     if (victim->inchain != NULL)
     465             :     {
     466             :         assert(victim->inchain->inchainRev == victim);
     467     1040780 :         victim->inchain->inchainRev = predecessor;
     468             :     }
     469     1569736 :     to->nins--;
     470             : 
     471             :     /* clean up and place on NFA's free list */
     472     1569736 :     victim->type = 0;
     473     1569736 :     victim->from = NULL;     /* precautions... */
     474     1569736 :     victim->to = NULL;
     475     1569736 :     victim->inchain = NULL;
     476     1569736 :     victim->inchainRev = NULL;
     477     1569736 :     victim->outchain = NULL;
     478     1569736 :     victim->outchainRev = NULL;
     479     1569736 :     victim->freechain = nfa->freearcs;
     480     1569736 :     nfa->freearcs = victim;
     481     1569736 : }
     482             : 
     483             : /*
     484             :  * changearcsource - flip an arc to have a different from state
     485             :  *
     486             :  * Caller must have verified that there is no pre-existing duplicate arc.
     487             :  */
     488             : static void
     489         588 : changearcsource(struct arc *a, struct state *newfrom)
     490             : {
     491         588 :     struct state *oldfrom = a->from;
     492             :     struct arc *predecessor;
     493             : 
     494             :     assert(oldfrom != newfrom);
     495             : 
     496             :     /* take it off old source's out-chain */
     497             :     assert(oldfrom != NULL);
     498         588 :     predecessor = a->outchainRev;
     499         588 :     if (predecessor == NULL)
     500             :     {
     501             :         assert(oldfrom->outs == a);
     502         588 :         oldfrom->outs = a->outchain;
     503             :     }
     504             :     else
     505             :     {
     506             :         assert(predecessor->outchain == a);
     507           0 :         predecessor->outchain = a->outchain;
     508             :     }
     509         588 :     if (a->outchain != NULL)
     510             :     {
     511             :         assert(a->outchain->outchainRev == a);
     512         574 :         a->outchain->outchainRev = predecessor;
     513             :     }
     514         588 :     oldfrom->nouts--;
     515             : 
     516         588 :     a->from = newfrom;
     517             : 
     518             :     /* prepend it to new source's out-chain */
     519         588 :     a->outchain = newfrom->outs;
     520         588 :     a->outchainRev = NULL;
     521         588 :     if (newfrom->outs)
     522         588 :         newfrom->outs->outchainRev = a;
     523         588 :     newfrom->outs = a;
     524         588 :     newfrom->nouts++;
     525         588 : }
     526             : 
     527             : /*
     528             :  * changearctarget - flip an arc to have a different to state
     529             :  *
     530             :  * Caller must have verified that there is no pre-existing duplicate arc.
     531             :  */
     532             : static void
     533         320 : changearctarget(struct arc *a, struct state *newto)
     534             : {
     535         320 :     struct state *oldto = a->to;
     536             :     struct arc *predecessor;
     537             : 
     538             :     assert(oldto != newto);
     539             : 
     540             :     /* take it off old target's in-chain */
     541             :     assert(oldto != NULL);
     542         320 :     predecessor = a->inchainRev;
     543         320 :     if (predecessor == NULL)
     544             :     {
     545             :         assert(oldto->ins == a);
     546         320 :         oldto->ins = a->inchain;
     547             :     }
     548             :     else
     549             :     {
     550             :         assert(predecessor->inchain == a);
     551           0 :         predecessor->inchain = a->inchain;
     552             :     }
     553         320 :     if (a->inchain != NULL)
     554             :     {
     555             :         assert(a->inchain->inchainRev == a);
     556         312 :         a->inchain->inchainRev = predecessor;
     557             :     }
     558         320 :     oldto->nins--;
     559             : 
     560         320 :     a->to = newto;
     561             : 
     562             :     /* prepend it to new target's in-chain */
     563         320 :     a->inchain = newto->ins;
     564         320 :     a->inchainRev = NULL;
     565         320 :     if (newto->ins)
     566         320 :         newto->ins->inchainRev = a;
     567         320 :     newto->ins = a;
     568         320 :     newto->nins++;
     569         320 : }
     570             : 
     571             : /*
     572             :  * hasnonemptyout - Does state have a non-EMPTY out arc?
     573             :  */
     574             : static int
     575      221036 : hasnonemptyout(struct state *s)
     576             : {
     577             :     struct arc *a;
     578             : 
     579      249222 :     for (a = s->outs; a != NULL; a = a->outchain)
     580             :     {
     581      245274 :         if (a->type != EMPTY)
     582      217088 :             return 1;
     583             :     }
     584        3948 :     return 0;
     585             : }
     586             : 
     587             : /*
     588             :  * findarc - find arc, if any, from given source with given type and color
     589             :  * If there is more than one such arc, the result is random.
     590             :  */
     591             : static struct arc *
     592         574 : findarc(struct state *s,
     593             :         int type,
     594             :         color co)
     595             : {
     596             :     struct arc *a;
     597             : 
     598        1694 :     for (a = s->outs; a != NULL; a = a->outchain)
     599        1122 :         if (a->type == type && a->co == co)
     600           2 :             return a;
     601         572 :     return NULL;
     602             : }
     603             : 
     604             : /*
     605             :  * cparc - allocate a new arc within an NFA, copying details from old one
     606             :  */
     607             : static void
     608     1475984 : cparc(struct nfa *nfa,
     609             :       struct arc *oa,
     610             :       struct state *from,
     611             :       struct state *to)
     612             : {
     613     1475984 :     newarc(nfa, oa->type, oa->co, from, to);
     614     1475984 : }
     615             : 
     616             : /*
     617             :  * sortins - sort the in arcs of a state by from/color/type
     618             :  */
     619             : static void
     620       30614 : sortins(struct nfa *nfa,
     621             :         struct state *s)
     622             : {
     623             :     struct arc **sortarray;
     624             :     struct arc *a;
     625       30614 :     int         n = s->nins;
     626             :     int         i;
     627             : 
     628       30614 :     if (n <= 1)
     629           4 :         return;                 /* nothing to do */
     630             :     /* make an array of arc pointers ... */
     631       30610 :     sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
     632       30610 :     if (sortarray == NULL)
     633             :     {
     634           0 :         NERR(REG_ESPACE);
     635           0 :         return;
     636             :     }
     637       30610 :     i = 0;
     638      135904 :     for (a = s->ins; a != NULL; a = a->inchain)
     639      105294 :         sortarray[i++] = a;
     640             :     assert(i == n);
     641             :     /* ... sort the array */
     642       30610 :     qsort(sortarray, n, sizeof(struct arc *), sortins_cmp);
     643             :     /* ... and rebuild arc list in order */
     644             :     /* it seems worth special-casing first and last items to simplify loop */
     645       30610 :     a = sortarray[0];
     646       30610 :     s->ins = a;
     647       30610 :     a->inchain = sortarray[1];
     648       30610 :     a->inchainRev = NULL;
     649       74684 :     for (i = 1; i < n - 1; i++)
     650             :     {
     651       44074 :         a = sortarray[i];
     652       44074 :         a->inchain = sortarray[i + 1];
     653       44074 :         a->inchainRev = sortarray[i - 1];
     654             :     }
     655       30610 :     a = sortarray[i];
     656       30610 :     a->inchain = NULL;
     657       30610 :     a->inchainRev = sortarray[i - 1];
     658       30610 :     FREE(sortarray);
     659             : }
     660             : 
     661             : static int
     662    75786018 : sortins_cmp(const void *a, const void *b)
     663             : {
     664    75786018 :     const struct arc *aa = *((const struct arc *const *) a);
     665    75786018 :     const struct arc *bb = *((const struct arc *const *) b);
     666             : 
     667             :     /* we check the fields in the order they are most likely to be different */
     668    75786018 :     if (aa->from->no < bb->from->no)
     669    60277290 :         return -1;
     670    15508728 :     if (aa->from->no > bb->from->no)
     671    15048244 :         return 1;
     672      460484 :     if (aa->co < bb->co)
     673      244854 :         return -1;
     674      215630 :     if (aa->co > bb->co)
     675      212438 :         return 1;
     676        3192 :     if (aa->type < bb->type)
     677         112 :         return -1;
     678        3080 :     if (aa->type > bb->type)
     679          58 :         return 1;
     680        3022 :     return 0;
     681             : }
     682             : 
     683             : /*
     684             :  * sortouts - sort the out arcs of a state by to/color/type
     685             :  */
     686             : static void
     687          32 : sortouts(struct nfa *nfa,
     688             :          struct state *s)
     689             : {
     690             :     struct arc **sortarray;
     691             :     struct arc *a;
     692          32 :     int         n = s->nouts;
     693             :     int         i;
     694             : 
     695          32 :     if (n <= 1)
     696           0 :         return;                 /* nothing to do */
     697             :     /* make an array of arc pointers ... */
     698          32 :     sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
     699          32 :     if (sortarray == NULL)
     700             :     {
     701           0 :         NERR(REG_ESPACE);
     702           0 :         return;
     703             :     }
     704          32 :     i = 0;
     705         968 :     for (a = s->outs; a != NULL; a = a->outchain)
     706         936 :         sortarray[i++] = a;
     707             :     assert(i == n);
     708             :     /* ... sort the array */
     709          32 :     qsort(sortarray, n, sizeof(struct arc *), sortouts_cmp);
     710             :     /* ... and rebuild arc list in order */
     711             :     /* it seems worth special-casing first and last items to simplify loop */
     712          32 :     a = sortarray[0];
     713          32 :     s->outs = a;
     714          32 :     a->outchain = sortarray[1];
     715          32 :     a->outchainRev = NULL;
     716         904 :     for (i = 1; i < n - 1; i++)
     717             :     {
     718         872 :         a = sortarray[i];
     719         872 :         a->outchain = sortarray[i + 1];
     720         872 :         a->outchainRev = sortarray[i - 1];
     721             :     }
     722          32 :     a = sortarray[i];
     723          32 :     a->outchain = NULL;
     724          32 :     a->outchainRev = sortarray[i - 1];
     725          32 :     FREE(sortarray);
     726             : }
     727             : 
     728             : static int
     729        5492 : sortouts_cmp(const void *a, const void *b)
     730             : {
     731        5492 :     const struct arc *aa = *((const struct arc *const *) a);
     732        5492 :     const struct arc *bb = *((const struct arc *const *) b);
     733             : 
     734             :     /* we check the fields in the order they are most likely to be different */
     735        5492 :     if (aa->to->no < bb->to->no)
     736         610 :         return -1;
     737        4882 :     if (aa->to->no > bb->to->no)
     738         160 :         return 1;
     739        4722 :     if (aa->co < bb->co)
     740        2562 :         return -1;
     741        2160 :     if (aa->co > bb->co)
     742        2116 :         return 1;
     743          44 :     if (aa->type < bb->type)
     744           2 :         return -1;
     745          42 :     if (aa->type > bb->type)
     746          14 :         return 1;
     747          28 :     return 0;
     748             : }
     749             : 
     750             : /*
     751             :  * Common decision logic about whether to use arc-by-arc operations or
     752             :  * sort/merge.  If there's just a few source arcs we cannot recoup the
     753             :  * cost of sorting the destination arc list, no matter how large it is.
     754             :  * Otherwise, limit the number of arc-by-arc comparisons to about 1000
     755             :  * (a somewhat arbitrary choice, but the breakeven point would probably
     756             :  * be machine dependent anyway).
     757             :  */
     758             : #define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs) \
     759             :     ((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 || (ndestarcs) > 32))
     760             : 
     761             : /*
     762             :  * moveins - move all in arcs of a state to another state
     763             :  *
     764             :  * You might think this could be done better by just updating the
     765             :  * existing arcs, and you would be right if it weren't for the need
     766             :  * for duplicate suppression, which makes it easier to just make new
     767             :  * ones to exploit the suppression built into newarc.
     768             :  *
     769             :  * However, if we have a whole lot of arcs to deal with, retail duplicate
     770             :  * checks become too slow.  In that case we proceed by sorting and merging
     771             :  * the arc lists, and then we can indeed just update the arcs in-place.
     772             :  *
     773             :  * On the other hand, it's also true that this is frequently called with
     774             :  * a brand-new newState that has no existing in-arcs.  In that case,
     775             :  * de-duplication is unnecessary, so we can just blindly move all the arcs.
     776             :  */
     777             : static void
     778      247448 : moveins(struct nfa *nfa,
     779             :         struct state *oldState,
     780             :         struct state *newState)
     781             : {
     782             :     assert(oldState != newState);
     783             : 
     784      247448 :     if (newState->nins == 0)
     785             :     {
     786             :         /* No need for de-duplication */
     787             :         struct arc *a;
     788             : 
     789      201588 :         while ((a = oldState->ins) != NULL)
     790             :         {
     791      103484 :             createarc(nfa, a->type, a->co, a->from, newState);
     792      103484 :             freearc(nfa, a);
     793             :         }
     794             :     }
     795      149344 :     else if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
     796      149252 :     {
     797             :         /* With not too many arcs, just do them one at a time */
     798             :         struct arc *a;
     799             : 
     800      368650 :         while ((a = oldState->ins) != NULL)
     801             :         {
     802      219398 :             cparc(nfa, a, a->from, newState);
     803      219398 :             freearc(nfa, a);
     804             :         }
     805             :     }
     806             :     else
     807             :     {
     808             :         /*
     809             :          * With many arcs, use a sort-merge approach.  Note changearctarget()
     810             :          * will put the arc onto the front of newState's chain, so it does not
     811             :          * break our walk through the sorted part of the chain.
     812             :          */
     813             :         struct arc *oa;
     814             :         struct arc *na;
     815             : 
     816             :         /*
     817             :          * Because we bypass newarc() in this code path, we'd better include a
     818             :          * cancel check.
     819             :          */
     820          92 :         INTERRUPT(nfa->v->re);
     821             : 
     822          92 :         sortins(nfa, oldState);
     823          92 :         sortins(nfa, newState);
     824          92 :         if (NISERR())
     825           0 :             return;             /* might have failed to sort */
     826          92 :         oa = oldState->ins;
     827          92 :         na = newState->ins;
     828        3310 :         while (oa != NULL && na != NULL)
     829             :         {
     830        3214 :             struct arc *a = oa;
     831             : 
     832        3214 :             switch (sortins_cmp(&oa, &na))
     833             :             {
     834         166 :                 case -1:
     835             :                     /* newState does not have anything matching oa */
     836         166 :                     oa = oa->inchain;
     837             : 
     838             :                     /*
     839             :                      * Rather than doing createarc+freearc, we can just unlink
     840             :                      * and relink the existing arc struct.
     841             :                      */
     842         166 :                     changearctarget(a, newState);
     843         166 :                     break;
     844         472 :                 case 0:
     845             :                     /* match, advance in both lists */
     846         472 :                     oa = oa->inchain;
     847         472 :                     na = na->inchain;
     848             :                     /* ... and drop duplicate arc from oldState */
     849         472 :                     freearc(nfa, a);
     850         472 :                     break;
     851        2576 :                 case +1:
     852             :                     /* advance only na; oa might have a match later */
     853        2576 :                     na = na->inchain;
     854        2576 :                     break;
     855        3306 :                 default:
     856             :                     assert(NOTREACHED);
     857             :             }
     858             :         }
     859         246 :         while (oa != NULL)
     860             :         {
     861             :             /* newState does not have anything matching oa */
     862         154 :             struct arc *a = oa;
     863             : 
     864         154 :             oa = oa->inchain;
     865         154 :             changearctarget(a, newState);
     866             :         }
     867             :     }
     868             : 
     869             :     assert(oldState->nins == 0);
     870             :     assert(oldState->ins == NULL);
     871             : }
     872             : 
     873             : /*
     874             :  * copyins - copy in arcs of a state to another state
     875             :  *
     876             :  * The comments for moveins() apply here as well.  However, in current
     877             :  * usage, this is *only* called with brand-new target states, so that
     878             :  * only the "no need for de-duplication" code path is ever reached.
     879             :  * We keep the rest #ifdef'd out in case it's needed in the future.
     880             :  */
     881             : static void
     882       18354 : copyins(struct nfa *nfa,
     883             :         struct state *oldState,
     884             :         struct state *newState)
     885             : {
     886             :     assert(oldState != newState);
     887             :     assert(newState->nins == 0); /* see comment above */
     888             : 
     889       18354 :     if (newState->nins == 0)
     890             :     {
     891             :         /* No need for de-duplication */
     892             :         struct arc *a;
     893             : 
     894      257852 :         for (a = oldState->ins; a != NULL; a = a->inchain)
     895      239498 :             createarc(nfa, a->type, a->co, a->from, newState);
     896             :     }
     897             : #ifdef NOT_USED                 /* see comment above */
     898             :     else if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
     899             :     {
     900             :         /* With not too many arcs, just do them one at a time */
     901             :         struct arc *a;
     902             : 
     903             :         for (a = oldState->ins; a != NULL; a = a->inchain)
     904             :             cparc(nfa, a, a->from, newState);
     905             :     }
     906             :     else
     907             :     {
     908             :         /*
     909             :          * With many arcs, use a sort-merge approach.  Note that createarc()
     910             :          * will put new arcs onto the front of newState's chain, so it does
     911             :          * not break our walk through the sorted part of the chain.
     912             :          */
     913             :         struct arc *oa;
     914             :         struct arc *na;
     915             : 
     916             :         /*
     917             :          * Because we bypass newarc() in this code path, we'd better include a
     918             :          * cancel check.
     919             :          */
     920             :         INTERRUPT(nfa->v->re);
     921             : 
     922             :         sortins(nfa, oldState);
     923             :         sortins(nfa, newState);
     924             :         if (NISERR())
     925             :             return;             /* might have failed to sort */
     926             :         oa = oldState->ins;
     927             :         na = newState->ins;
     928             :         while (oa != NULL && na != NULL)
     929             :         {
     930             :             struct arc *a = oa;
     931             : 
     932             :             switch (sortins_cmp(&oa, &na))
     933             :             {
     934             :                 case -1:
     935             :                     /* newState does not have anything matching oa */
     936             :                     oa = oa->inchain;
     937             :                     createarc(nfa, a->type, a->co, a->from, newState);
     938             :                     break;
     939             :                 case 0:
     940             :                     /* match, advance in both lists */
     941             :                     oa = oa->inchain;
     942             :                     na = na->inchain;
     943             :                     break;
     944             :                 case +1:
     945             :                     /* advance only na; oa might have a match later */
     946             :                     na = na->inchain;
     947             :                     break;
     948             :                 default:
     949             :                     assert(NOTREACHED);
     950             :             }
     951             :         }
     952             :         while (oa != NULL)
     953             :         {
     954             :             /* newState does not have anything matching oa */
     955             :             struct arc *a = oa;
     956             : 
     957             :             oa = oa->inchain;
     958             :             createarc(nfa, a->type, a->co, a->from, newState);
     959             :         }
     960             :     }
     961             : #endif                          /* NOT_USED */
     962       18354 : }
     963             : 
     964             : /*
     965             :  * mergeins - merge a list of inarcs into a state
     966             :  *
     967             :  * This is much like copyins, but the source arcs are listed in an array,
     968             :  * and are not guaranteed unique.  It's okay to clobber the array contents.
     969             :  */
     970             : static void
     971      252496 : mergeins(struct nfa *nfa,
     972             :          struct state *s,
     973             :          struct arc **arcarray,
     974             :          int arccount)
     975             : {
     976             :     struct arc *na;
     977             :     int         i;
     978             :     int         j;
     979             : 
     980      252496 :     if (arccount <= 0)
     981      222066 :         return;
     982             : 
     983             :     /*
     984             :      * Because we bypass newarc() in this code path, we'd better include a
     985             :      * cancel check.
     986             :      */
     987       30430 :     INTERRUPT(nfa->v->re);
     988             : 
     989             :     /* Sort existing inarcs as well as proposed new ones */
     990       30430 :     sortins(nfa, s);
     991       30430 :     if (NISERR())
     992           0 :         return;                 /* might have failed to sort */
     993             : 
     994       30430 :     qsort(arcarray, arccount, sizeof(struct arc *), sortins_cmp);
     995             : 
     996             :     /*
     997             :      * arcarray very likely includes dups, so we must eliminate them.  (This
     998             :      * could be folded into the next loop, but it's not worth the trouble.)
     999             :      */
    1000       30430 :     j = 0;
    1001    15030586 :     for (i = 1; i < arccount; i++)
    1002             :     {
    1003    15000156 :         switch (sortins_cmp(&arcarray[j], &arcarray[i]))
    1004             :         {
    1005    14998944 :             case -1:
    1006             :                 /* non-dup */
    1007    14998944 :                 arcarray[++j] = arcarray[i];
    1008    14998944 :                 break;
    1009        1212 :             case 0:
    1010             :                 /* dup */
    1011        1212 :                 break;
    1012    15000156 :             default:
    1013             :                 /* trouble */
    1014             :                 assert(NOTREACHED);
    1015             :         }
    1016             :     }
    1017       30430 :     arccount = j + 1;
    1018             : 
    1019             :     /*
    1020             :      * Now merge into s' inchain.  Note that createarc() will put new arcs
    1021             :      * onto the front of s's chain, so it does not break our walk through the
    1022             :      * sorted part of the chain.
    1023             :      */
    1024       30430 :     i = 0;
    1025       30430 :     na = s->ins;
    1026    15060202 :     while (i < arccount && na != NULL)
    1027             :     {
    1028    15029772 :         struct arc *a = arcarray[i];
    1029             : 
    1030    15029772 :         switch (sortins_cmp(&a, &na))
    1031             :         {
    1032    14986990 :             case -1:
    1033             :                 /* s does not have anything matching a */
    1034    14986990 :                 createarc(nfa, a->type, a->co, a->from, s);
    1035    14986990 :                 i++;
    1036    14986990 :                 break;
    1037          20 :             case 0:
    1038             :                 /* match, advance in both lists */
    1039          20 :                 i++;
    1040          20 :                 na = na->inchain;
    1041          20 :                 break;
    1042       42762 :             case +1:
    1043             :                 /* advance only na; array might have a match later */
    1044       42762 :                 na = na->inchain;
    1045       42762 :                 break;
    1046    15029772 :             default:
    1047             :                 assert(NOTREACHED);
    1048             :         }
    1049             :     }
    1050       72794 :     while (i < arccount)
    1051             :     {
    1052             :         /* s does not have anything matching a */
    1053       42364 :         struct arc *a = arcarray[i];
    1054             : 
    1055       42364 :         createarc(nfa, a->type, a->co, a->from, s);
    1056       42364 :         i++;
    1057             :     }
    1058             : }
    1059             : 
    1060             : /*
    1061             :  * moveouts - move all out arcs of a state to another state
    1062             :  *
    1063             :  * See comments for moveins()
    1064             :  */
    1065             : static void
    1066       67924 : moveouts(struct nfa *nfa,
    1067             :          struct state *oldState,
    1068             :          struct state *newState)
    1069             : {
    1070             :     assert(oldState != newState);
    1071             : 
    1072       67924 :     if (newState->nouts == 0)
    1073             :     {
    1074             :         /* No need for de-duplication */
    1075             :         struct arc *a;
    1076             : 
    1077       54686 :         while ((a = oldState->outs) != NULL)
    1078             :         {
    1079       29872 :             createarc(nfa, a->type, a->co, newState, a->to);
    1080       29872 :             freearc(nfa, a);
    1081             :         }
    1082             :     }
    1083       43110 :     else if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
    1084       43094 :     {
    1085             :         /* With not too many arcs, just do them one at a time */
    1086             :         struct arc *a;
    1087             : 
    1088      104564 :         while ((a = oldState->outs) != NULL)
    1089             :         {
    1090       61470 :             cparc(nfa, a, newState, a->to);
    1091       61470 :             freearc(nfa, a);
    1092             :         }
    1093             :     }
    1094             :     else
    1095             :     {
    1096             :         /*
    1097             :          * With many arcs, use a sort-merge approach.  Note changearcsource()
    1098             :          * will put the arc onto the front of newState's chain, so it does not
    1099             :          * break our walk through the sorted part of the chain.
    1100             :          */
    1101             :         struct arc *oa;
    1102             :         struct arc *na;
    1103             : 
    1104             :         /*
    1105             :          * Because we bypass newarc() in this code path, we'd better include a
    1106             :          * cancel check.
    1107             :          */
    1108          16 :         INTERRUPT(nfa->v->re);
    1109             : 
    1110          16 :         sortouts(nfa, oldState);
    1111          16 :         sortouts(nfa, newState);
    1112          16 :         if (NISERR())
    1113           0 :             return;             /* might have failed to sort */
    1114          16 :         oa = oldState->outs;
    1115          16 :         na = newState->outs;
    1116         578 :         while (oa != NULL && na != NULL)
    1117             :         {
    1118         560 :             struct arc *a = oa;
    1119             : 
    1120         560 :             switch (sortouts_cmp(&oa, &na))
    1121             :             {
    1122         516 :                 case -1:
    1123             :                     /* newState does not have anything matching oa */
    1124         516 :                     oa = oa->outchain;
    1125             : 
    1126             :                     /*
    1127             :                      * Rather than doing createarc+freearc, we can just unlink
    1128             :                      * and relink the existing arc struct.
    1129             :                      */
    1130         516 :                     changearcsource(a, newState);
    1131         516 :                     break;
    1132          28 :                 case 0:
    1133             :                     /* match, advance in both lists */
    1134          28 :                     oa = oa->outchain;
    1135          28 :                     na = na->outchain;
    1136             :                     /* ... and drop duplicate arc from oldState */
    1137          28 :                     freearc(nfa, a);
    1138          28 :                     break;
    1139          16 :                 case +1:
    1140             :                     /* advance only na; oa might have a match later */
    1141          16 :                     na = na->outchain;
    1142          16 :                     break;
    1143         576 :                 default:
    1144             :                     assert(NOTREACHED);
    1145             :             }
    1146             :         }
    1147          88 :         while (oa != NULL)
    1148             :         {
    1149             :             /* newState does not have anything matching oa */
    1150          72 :             struct arc *a = oa;
    1151             : 
    1152          72 :             oa = oa->outchain;
    1153          72 :             changearcsource(a, newState);
    1154             :         }
    1155             :     }
    1156             : 
    1157             :     assert(oldState->nouts == 0);
    1158             :     assert(oldState->outs == NULL);
    1159             : }
    1160             : 
    1161             : /*
    1162             :  * copyouts - copy out arcs of a state to another state
    1163             :  *
    1164             :  * See comments for copyins()
    1165             :  */
    1166             : static void
    1167       13750 : copyouts(struct nfa *nfa,
    1168             :          struct state *oldState,
    1169             :          struct state *newState)
    1170             : {
    1171             :     assert(oldState != newState);
    1172             :     assert(newState->nouts == 0);    /* see comment above */
    1173             : 
    1174       13750 :     if (newState->nouts == 0)
    1175             :     {
    1176             :         /* No need for de-duplication */
    1177             :         struct arc *a;
    1178             : 
    1179      485984 :         for (a = oldState->outs; a != NULL; a = a->outchain)
    1180      472234 :             createarc(nfa, a->type, a->co, newState, a->to);
    1181             :     }
    1182             : #ifdef NOT_USED                 /* see comment above */
    1183             :     else if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
    1184             :     {
    1185             :         /* With not too many arcs, just do them one at a time */
    1186             :         struct arc *a;
    1187             : 
    1188             :         for (a = oldState->outs; a != NULL; a = a->outchain)
    1189             :             cparc(nfa, a, newState, a->to);
    1190             :     }
    1191             :     else
    1192             :     {
    1193             :         /*
    1194             :          * With many arcs, use a sort-merge approach.  Note that createarc()
    1195             :          * will put new arcs onto the front of newState's chain, so it does
    1196             :          * not break our walk through the sorted part of the chain.
    1197             :          */
    1198             :         struct arc *oa;
    1199             :         struct arc *na;
    1200             : 
    1201             :         /*
    1202             :          * Because we bypass newarc() in this code path, we'd better include a
    1203             :          * cancel check.
    1204             :          */
    1205             :         INTERRUPT(nfa->v->re);
    1206             : 
    1207             :         sortouts(nfa, oldState);
    1208             :         sortouts(nfa, newState);
    1209             :         if (NISERR())
    1210             :             return;             /* might have failed to sort */
    1211             :         oa = oldState->outs;
    1212             :         na = newState->outs;
    1213             :         while (oa != NULL && na != NULL)
    1214             :         {
    1215             :             struct arc *a = oa;
    1216             : 
    1217             :             switch (sortouts_cmp(&oa, &na))
    1218             :             {
    1219             :                 case -1:
    1220             :                     /* newState does not have anything matching oa */
    1221             :                     oa = oa->outchain;
    1222             :                     createarc(nfa, a->type, a->co, newState, a->to);
    1223             :                     break;
    1224             :                 case 0:
    1225             :                     /* match, advance in both lists */
    1226             :                     oa = oa->outchain;
    1227             :                     na = na->outchain;
    1228             :                     break;
    1229             :                 case +1:
    1230             :                     /* advance only na; oa might have a match later */
    1231             :                     na = na->outchain;
    1232             :                     break;
    1233             :                 default:
    1234             :                     assert(NOTREACHED);
    1235             :             }
    1236             :         }
    1237             :         while (oa != NULL)
    1238             :         {
    1239             :             /* newState does not have anything matching oa */
    1240             :             struct arc *a = oa;
    1241             : 
    1242             :             oa = oa->outchain;
    1243             :             createarc(nfa, a->type, a->co, newState, a->to);
    1244             :         }
    1245             :     }
    1246             : #endif                          /* NOT_USED */
    1247       13750 : }
    1248             : 
    1249             : /*
    1250             :  * cloneouts - copy out arcs of a state to another state pair, modifying type
    1251             :  *
    1252             :  * This is only used to convert PLAIN arcs to AHEAD/BEHIND arcs, which share
    1253             :  * the same interpretation of "co".  It wouldn't be sensible with LACONs.
    1254             :  */
    1255             : static void
    1256         342 : cloneouts(struct nfa *nfa,
    1257             :           struct state *old,
    1258             :           struct state *from,
    1259             :           struct state *to,
    1260             :           int type)
    1261             : {
    1262             :     struct arc *a;
    1263             : 
    1264             :     assert(old != from);
    1265             :     assert(type == AHEAD || type == BEHIND);
    1266             : 
    1267        1190 :     for (a = old->outs; a != NULL; a = a->outchain)
    1268             :     {
    1269             :         assert(a->type == PLAIN);
    1270         848 :         newarc(nfa, type, a->co, from, to);
    1271             :     }
    1272         342 : }
    1273             : 
    1274             : /*
    1275             :  * delsub - delete a sub-NFA, updating subre pointers if necessary
    1276             :  *
    1277             :  * This uses a recursive traversal of the sub-NFA, marking already-seen
    1278             :  * states using their tmp pointer.
    1279             :  */
    1280             : static void
    1281        9560 : delsub(struct nfa *nfa,
    1282             :        struct state *lp,        /* the sub-NFA goes from here... */
    1283             :        struct state *rp)        /* ...to here, *not* inclusive */
    1284             : {
    1285             :     assert(lp != rp);
    1286             : 
    1287        9560 :     rp->tmp = rp;                /* mark end */
    1288             : 
    1289        9560 :     deltraverse(nfa, lp, lp);
    1290        9560 :     if (NISERR())
    1291           0 :         return;                 /* asserts might not hold after failure */
    1292             :     assert(lp->nouts == 0 && rp->nins == 0);  /* did the job */
    1293             :     assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
    1294             : 
    1295        9560 :     rp->tmp = NULL;              /* unmark end */
    1296        9560 :     lp->tmp = NULL;              /* and begin, marked by deltraverse */
    1297             : }
    1298             : 
    1299             : /*
    1300             :  * deltraverse - the recursive heart of delsub
    1301             :  * This routine's basic job is to destroy all out-arcs of the state.
    1302             :  */
    1303             : static void
    1304       28034 : deltraverse(struct nfa *nfa,
    1305             :             struct state *leftend,
    1306             :             struct state *s)
    1307             : {
    1308             :     struct arc *a;
    1309             :     struct state *to;
    1310             : 
    1311             :     /* Since this is recursive, it could be driven to stack overflow */
    1312       28034 :     if (STACK_TOO_DEEP(nfa->v->re))
    1313             :     {
    1314           0 :         NERR(REG_ETOOBIG);
    1315           0 :         return;
    1316             :     }
    1317             : 
    1318       28034 :     if (s->nouts == 0)
    1319         202 :         return;                 /* nothing to do */
    1320       27832 :     if (s->tmp != NULL)
    1321        9388 :         return;                 /* already in progress */
    1322             : 
    1323       18444 :     s->tmp = s;                  /* mark as in progress */
    1324             : 
    1325       36918 :     while ((a = s->outs) != NULL)
    1326             :     {
    1327       18474 :         to = a->to;
    1328       18474 :         deltraverse(nfa, leftend, to);
    1329       18474 :         if (NISERR())
    1330           0 :             return;             /* asserts might not hold after failure */
    1331             :         assert(to->nouts == 0 || to->tmp != NULL);
    1332       18474 :         freearc(nfa, a);
    1333       18474 :         if (to->nins == 0 && to->tmp == NULL)
    1334             :         {
    1335             :             assert(to->nouts == 0);
    1336        8884 :             freestate(nfa, to);
    1337             :         }
    1338             :     }
    1339             : 
    1340             :     assert(s->no != FREESTATE); /* we're still here */
    1341             :     assert(s == leftend || s->nins != 0);    /* and still reachable */
    1342             :     assert(s->nouts == 0);       /* but have no outarcs */
    1343             : 
    1344       18444 :     s->tmp = NULL;               /* we're done here */
    1345             : }
    1346             : 
    1347             : /*
    1348             :  * dupnfa - duplicate sub-NFA
    1349             :  *
    1350             :  * Another recursive traversal, this time using tmp to point to duplicates
    1351             :  * as well as mark already-seen states.  (You knew there was a reason why
    1352             :  * it's a state pointer, didn't you? :-))
    1353             :  */
    1354             : static void
    1355       13858 : dupnfa(struct nfa *nfa,
    1356             :        struct state *start,     /* duplicate of subNFA starting here */
    1357             :        struct state *stop,      /* and stopping here */
    1358             :        struct state *from,      /* stringing duplicate from here */
    1359             :        struct state *to)        /* to here */
    1360             : {
    1361       13858 :     if (start == stop)
    1362             :     {
    1363           0 :         newarc(nfa, EMPTY, 0, from, to);
    1364           0 :         return;
    1365             :     }
    1366             : 
    1367       13858 :     stop->tmp = to;
    1368       13858 :     duptraverse(nfa, start, from);
    1369             :     /* done, except for clearing out the tmp pointers */
    1370             : 
    1371       13858 :     stop->tmp = NULL;
    1372       13858 :     cleartraverse(nfa, start);
    1373             : }
    1374             : 
    1375             : /*
    1376             :  * duptraverse - recursive heart of dupnfa
    1377             :  */
    1378             : static void
    1379      368776 : duptraverse(struct nfa *nfa,
    1380             :             struct state *s,
    1381             :             struct state *stmp) /* s's duplicate, or NULL */
    1382             : {
    1383             :     struct arc *a;
    1384             : 
    1385             :     /* Since this is recursive, it could be driven to stack overflow */
    1386      368776 :     if (STACK_TOO_DEEP(nfa->v->re))
    1387             :     {
    1388           0 :         NERR(REG_ETOOBIG);
    1389           0 :         return;
    1390             :     }
    1391             : 
    1392      368776 :     if (s->tmp != NULL)
    1393      116186 :         return;                 /* already done */
    1394             : 
    1395      252590 :     s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
    1396      252590 :     if (s->tmp == NULL)
    1397             :     {
    1398             :         assert(NISERR());
    1399           0 :         return;
    1400             :     }
    1401             : 
    1402      607508 :     for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
    1403             :     {
    1404      354918 :         duptraverse(nfa, a->to, (struct state *) NULL);
    1405      354918 :         if (NISERR())
    1406           0 :             break;
    1407             :         assert(a->to->tmp != NULL);
    1408      354918 :         cparc(nfa, a, s->tmp, a->to->tmp);
    1409             :     }
    1410             : }
    1411             : 
    1412             : /*
    1413             :  * removeconstraints - remove any constraints in an NFA
    1414             :  *
    1415             :  * Constraint arcs are replaced by empty arcs, essentially treating all
    1416             :  * constraints as automatically satisfied.
    1417             :  */
    1418             : static void
    1419         202 : removeconstraints(struct nfa *nfa,
    1420             :                   struct state *start,  /* process subNFA starting here */
    1421             :                   struct state *stop)   /* and stopping here */
    1422             : {
    1423         202 :     if (start == stop)
    1424           0 :         return;
    1425             : 
    1426         202 :     stop->tmp = stop;
    1427         202 :     removetraverse(nfa, start);
    1428             :     /* done, except for clearing out the tmp pointers */
    1429             : 
    1430         202 :     stop->tmp = NULL;
    1431         202 :     cleartraverse(nfa, start);
    1432             : }
    1433             : 
    1434             : /*
    1435             :  * removetraverse - recursive heart of removeconstraints
    1436             :  */
    1437             : static void
    1438         562 : removetraverse(struct nfa *nfa,
    1439             :                struct state *s)
    1440             : {
    1441             :     struct arc *a;
    1442             :     struct arc *oa;
    1443             : 
    1444             :     /* Since this is recursive, it could be driven to stack overflow */
    1445         562 :     if (STACK_TOO_DEEP(nfa->v->re))
    1446             :     {
    1447           0 :         NERR(REG_ETOOBIG);
    1448           0 :         return;
    1449             :     }
    1450             : 
    1451         562 :     if (s->tmp != NULL)
    1452         252 :         return;                 /* already done */
    1453             : 
    1454         310 :     s->tmp = s;
    1455         670 :     for (a = s->outs; a != NULL && !NISERR(); a = oa)
    1456             :     {
    1457         360 :         removetraverse(nfa, a->to);
    1458         360 :         if (NISERR())
    1459           0 :             break;
    1460         360 :         oa = a->outchain;
    1461         360 :         switch (a->type)
    1462             :         {
    1463         346 :             case PLAIN:
    1464             :             case EMPTY:
    1465             :                 /* nothing to do */
    1466         346 :                 break;
    1467          14 :             case AHEAD:
    1468             :             case BEHIND:
    1469             :             case '^':
    1470             :             case '$':
    1471             :             case LACON:
    1472             :                 /* replace it */
    1473          14 :                 newarc(nfa, EMPTY, 0, s, a->to);
    1474          14 :                 freearc(nfa, a);
    1475          14 :                 break;
    1476           0 :             default:
    1477           0 :                 NERR(REG_ASSERT);
    1478           0 :                 break;
    1479             :         }
    1480             :     }
    1481             : }
    1482             : 
    1483             : /*
    1484             :  * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
    1485             :  */
    1486             : static void
    1487     2093566 : cleartraverse(struct nfa *nfa,
    1488             :               struct state *s)
    1489             : {
    1490             :     struct arc *a;
    1491             : 
    1492             :     /* Since this is recursive, it could be driven to stack overflow */
    1493     2093566 :     if (STACK_TOO_DEEP(nfa->v->re))
    1494             :     {
    1495           0 :         NERR(REG_ETOOBIG);
    1496           0 :         return;
    1497             :     }
    1498             : 
    1499     2093566 :     if (s->tmp == NULL)
    1500     1221014 :         return;
    1501      872552 :     s->tmp = NULL;
    1502             : 
    1503     2916656 :     for (a = s->outs; a != NULL; a = a->outchain)
    1504     2044104 :         cleartraverse(nfa, a->to);
    1505             : }
    1506             : 
    1507             : /*
    1508             :  * single_color_transition - does getting from s1 to s2 cross one PLAIN arc?
    1509             :  *
    1510             :  * If traversing from s1 to s2 requires a single PLAIN match (possibly of any
    1511             :  * of a set of colors), return a state whose outarc list contains only PLAIN
    1512             :  * arcs of those color(s).  Otherwise return NULL.
    1513             :  *
    1514             :  * This is used before optimizing the NFA, so there may be EMPTY arcs, which
    1515             :  * we should ignore; the possibility of an EMPTY is why the result state could
    1516             :  * be different from s1.
    1517             :  *
    1518             :  * It's worth troubling to handle multiple parallel PLAIN arcs here because a
    1519             :  * bracket construct such as [abc] might yield either one or several parallel
    1520             :  * PLAIN arcs depending on earlier atoms in the expression.  We'd rather that
    1521             :  * that implementation detail not create user-visible performance differences.
    1522             :  */
    1523             : static struct state *
    1524         254 : single_color_transition(struct state *s1, struct state *s2)
    1525             : {
    1526             :     struct arc *a;
    1527             : 
    1528             :     /* Ignore leading EMPTY arc, if any */
    1529         254 :     if (s1->nouts == 1 && s1->outs->type == EMPTY)
    1530         254 :         s1 = s1->outs->to;
    1531             :     /* Likewise for any trailing EMPTY arc */
    1532         254 :     if (s2->nins == 1 && s2->ins->type == EMPTY)
    1533         254 :         s2 = s2->ins->from;
    1534             :     /* Perhaps we could have a single-state loop in between, if so reject */
    1535         254 :     if (s1 == s2)
    1536           0 :         return NULL;
    1537             :     /* s1 must have at least one outarc... */
    1538         254 :     if (s1->outs == NULL)
    1539           0 :         return NULL;
    1540             :     /* ... and they must all be PLAIN arcs to s2 */
    1541         436 :     for (a = s1->outs; a != NULL; a = a->outchain)
    1542             :     {
    1543         268 :         if (a->type != PLAIN || a->to != s2)
    1544          86 :             return NULL;
    1545             :     }
    1546             :     /* OK, return s1 as the possessor of the relevant outarcs */
    1547         168 :     return s1;
    1548             : }
    1549             : 
    1550             : /*
    1551             :  * specialcolors - fill in special colors for an NFA
    1552             :  */
    1553             : static void
    1554       17710 : specialcolors(struct nfa *nfa)
    1555             : {
    1556             :     /* false colors for BOS, BOL, EOS, EOL */
    1557       17710 :     if (nfa->parent == NULL)
    1558             :     {
    1559        6964 :         nfa->bos[0] = pseudocolor(nfa->cm);
    1560        6964 :         nfa->bos[1] = pseudocolor(nfa->cm);
    1561        6964 :         nfa->eos[0] = pseudocolor(nfa->cm);
    1562        6964 :         nfa->eos[1] = pseudocolor(nfa->cm);
    1563             :     }
    1564             :     else
    1565             :     {
    1566             :         assert(nfa->parent->bos[0] != COLORLESS);
    1567       10746 :         nfa->bos[0] = nfa->parent->bos[0];
    1568             :         assert(nfa->parent->bos[1] != COLORLESS);
    1569       10746 :         nfa->bos[1] = nfa->parent->bos[1];
    1570             :         assert(nfa->parent->eos[0] != COLORLESS);
    1571       10746 :         nfa->eos[0] = nfa->parent->eos[0];
    1572             :         assert(nfa->parent->eos[1] != COLORLESS);
    1573       10746 :         nfa->eos[1] = nfa->parent->eos[1];
    1574             :     }
    1575       17710 : }
    1576             : 
    1577             : /*
    1578             :  * optimize - optimize an NFA
    1579             :  *
    1580             :  * The main goal of this function is not so much "optimization" (though it
    1581             :  * does try to get rid of useless NFA states) as reducing the NFA to a form
    1582             :  * the regex executor can handle.  The executor, and indeed the cNFA format
    1583             :  * that is its input, can only handle PLAIN and LACON arcs.  The output of
    1584             :  * the regex parser also includes EMPTY (do-nothing) arcs, as well as
    1585             :  * ^, $, AHEAD, and BEHIND constraint arcs, which we must get rid of here.
    1586             :  * We first get rid of EMPTY arcs and then deal with the constraint arcs.
    1587             :  * The hardest part of either job is to get rid of circular loops of the
    1588             :  * target arc type.  We would have to do that in any case, though, as such a
    1589             :  * loop would otherwise allow the executor to cycle through the loop endlessly
    1590             :  * without making any progress in the input string.
    1591             :  */
    1592             : static long                     /* re_info bits */
    1593       17704 : optimize(struct nfa *nfa,
    1594             :          FILE *f)               /* for debug output; NULL none */
    1595             : {
    1596             : #ifdef REG_DEBUG
    1597             :     int         verbose = (f != NULL) ? 1 : 0;
    1598             : 
    1599             :     if (verbose)
    1600             :         fprintf(f, "\ninitial cleanup:\n");
    1601             : #endif
    1602       17704 :     cleanup(nfa);               /* may simplify situation */
    1603             : #ifdef REG_DEBUG
    1604             :     if (verbose)
    1605             :         dumpnfa(nfa, f);
    1606             :     if (verbose)
    1607             :         fprintf(f, "\nempties:\n");
    1608             : #endif
    1609       17704 :     fixempties(nfa, f);         /* get rid of EMPTY arcs */
    1610             : #ifdef REG_DEBUG
    1611             :     if (verbose)
    1612             :         fprintf(f, "\nconstraints:\n");
    1613             : #endif
    1614       17704 :     fixconstraintloops(nfa, f); /* get rid of constraint loops */
    1615       17704 :     pullback(nfa, f);           /* pull back constraints backward */
    1616       17704 :     pushfwd(nfa, f);            /* push fwd constraints forward */
    1617             : #ifdef REG_DEBUG
    1618             :     if (verbose)
    1619             :         fprintf(f, "\nfinal cleanup:\n");
    1620             : #endif
    1621       17704 :     cleanup(nfa);               /* final tidying */
    1622             : #ifdef REG_DEBUG
    1623             :     if (verbose)
    1624             :         dumpnfa(nfa, f);
    1625             : #endif
    1626       17704 :     return analyze(nfa);        /* and analysis */
    1627             : }
    1628             : 
    1629             : /*
    1630             :  * pullback - pull back constraints backward to eliminate them
    1631             :  */
    1632             : static void
    1633       27580 : pullback(struct nfa *nfa,
    1634             :          FILE *f)               /* for debug output; NULL none */
    1635             : {
    1636             :     struct state *s;
    1637             :     struct state *nexts;
    1638             :     struct arc *a;
    1639             :     struct arc *nexta;
    1640             :     struct state *intermediates;
    1641             :     int         progress;
    1642             : 
    1643             :     /* find and pull until there are no more */
    1644             :     do
    1645             :     {
    1646       27580 :         progress = 0;
    1647      432854 :         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    1648             :         {
    1649      405274 :             nexts = s->next;
    1650      405274 :             intermediates = NULL;
    1651     1833194 :             for (a = s->outs; a != NULL && !NISERR(); a = nexta)
    1652             :             {
    1653     1427920 :                 nexta = a->outchain;
    1654     1427920 :                 if (a->type == '^' || a->type == BEHIND)
    1655       93494 :                     if (pull(nfa, a, &intermediates))
    1656       36610 :                         progress = 1;
    1657             :             }
    1658             :             /* clear tmp fields of intermediate states created here */
    1659      407726 :             while (intermediates != NULL)
    1660             :             {
    1661        2452 :                 struct state *ns = intermediates->tmp;
    1662             : 
    1663        2452 :                 intermediates->tmp = NULL;
    1664        2452 :                 intermediates = ns;
    1665             :             }
    1666             :             /* if s is now useless, get rid of it */
    1667      405274 :             if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    1668       31394 :                 dropstate(nfa, s);
    1669             :         }
    1670       27580 :         if (progress && f != NULL)
    1671           0 :             dumpnfa(nfa, f);
    1672       27580 :     } while (progress && !NISERR());
    1673       17704 :     if (NISERR())
    1674           6 :         return;
    1675             : 
    1676             :     /*
    1677             :      * Any ^ constraints we were able to pull to the start state can now be
    1678             :      * replaced by PLAIN arcs referencing the BOS or BOL colors.  There should
    1679             :      * be no other ^ or BEHIND arcs left in the NFA, though we do not check
    1680             :      * that here (compact() will fail if so).
    1681             :      */
    1682       68658 :     for (a = nfa->pre->outs; a != NULL; a = nexta)
    1683             :     {
    1684       50960 :         nexta = a->outchain;
    1685       50960 :         if (a->type == '^')
    1686             :         {
    1687             :             assert(a->co == 0 || a->co == 1);
    1688       36508 :             newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
    1689       36508 :             freearc(nfa, a);
    1690             :         }
    1691             :     }
    1692             : }
    1693             : 
    1694             : /*
    1695             :  * pull - pull a back constraint backward past its source state
    1696             :  *
    1697             :  * Returns 1 if successful (which it always is unless the source is the
    1698             :  * start state or we have an internal error), 0 if nothing happened.
    1699             :  *
    1700             :  * A significant property of this function is that it deletes no pre-existing
    1701             :  * states, and no outarcs of the constraint's from state other than the given
    1702             :  * constraint arc.  This makes the loops in pullback() safe, at the cost that
    1703             :  * we may leave useless states behind.  Therefore, we leave it to pullback()
    1704             :  * to delete such states.
    1705             :  *
    1706             :  * If the from state has multiple back-constraint outarcs, and/or multiple
    1707             :  * compatible constraint inarcs, we only need to create one new intermediate
    1708             :  * state per combination of predecessor and successor states.  *intermediates
    1709             :  * points to a list of such intermediate states for this from state (chained
    1710             :  * through their tmp fields).
    1711             :  */
    1712             : static int
    1713       93494 : pull(struct nfa *nfa,
    1714             :      struct arc *con,
    1715             :      struct state **intermediates)
    1716             : {
    1717       93494 :     struct state *from = con->from;
    1718       93494 :     struct state *to = con->to;
    1719             :     struct arc *a;
    1720             :     struct arc *nexta;
    1721             :     struct state *s;
    1722             : 
    1723             :     assert(from != to);         /* should have gotten rid of this earlier */
    1724       93494 :     if (from->flag)              /* can't pull back beyond start */
    1725       56884 :         return 0;
    1726       36610 :     if (from->nins == 0)
    1727             :     {                           /* unreachable */
    1728        7304 :         freearc(nfa, con);
    1729        7304 :         return 1;
    1730             :     }
    1731             : 
    1732             :     /*
    1733             :      * First, clone from state if necessary to avoid other outarcs.  This may
    1734             :      * seem wasteful, but it simplifies the logic, and we'll get rid of the
    1735             :      * clone state again at the bottom.
    1736             :      */
    1737       29306 :     if (from->nouts > 1)
    1738             :     {
    1739       18354 :         s = newstate(nfa);
    1740       18354 :         if (NISERR())
    1741           0 :             return 0;
    1742       18354 :         copyins(nfa, from, s);  /* duplicate inarcs */
    1743       18354 :         cparc(nfa, con, s, to); /* move constraint arc */
    1744       18354 :         freearc(nfa, con);
    1745       18354 :         if (NISERR())
    1746           0 :             return 0;
    1747       18354 :         from = s;
    1748       18354 :         con = from->outs;
    1749             :     }
    1750             :     assert(from->nouts == 1);
    1751             : 
    1752             :     /* propagate the constraint into the from state's inarcs */
    1753      309692 :     for (a = from->ins; a != NULL && !NISERR(); a = nexta)
    1754             :     {
    1755      280386 :         nexta = a->inchain;
    1756      280386 :         switch (combine(nfa, con, a))
    1757             :         {
    1758       83700 :             case INCOMPATIBLE:  /* destroy the arc */
    1759       83700 :                 freearc(nfa, a);
    1760       83700 :                 break;
    1761       15508 :             case SATISFIED:     /* no action needed */
    1762       15508 :                 break;
    1763      179686 :             case COMPATIBLE:    /* swap the two arcs, more or less */
    1764             :                 /* need an intermediate state, but might have one already */
    1765      203272 :                 for (s = *intermediates; s != NULL; s = s->tmp)
    1766             :                 {
    1767             :                     assert(s->nins > 0 && s->nouts > 0);
    1768      200820 :                     if (s->ins->from == a->from && s->outs->to == to)
    1769      177234 :                         break;
    1770             :                 }
    1771      179686 :                 if (s == NULL)
    1772             :                 {
    1773        2452 :                     s = newstate(nfa);
    1774        2452 :                     if (NISERR())
    1775           0 :                         return 0;
    1776        2452 :                     s->tmp = *intermediates;
    1777        2452 :                     *intermediates = s;
    1778             :                 }
    1779      179686 :                 cparc(nfa, con, a->from, s);
    1780      179686 :                 cparc(nfa, a, s, to);
    1781      179686 :                 freearc(nfa, a);
    1782      179686 :                 break;
    1783        1492 :             case REPLACEARC:    /* replace arc's color */
    1784        1492 :                 newarc(nfa, a->type, con->co, a->from, to);
    1785        1492 :                 freearc(nfa, a);
    1786        1492 :                 break;
    1787           0 :             default:
    1788             :                 assert(NOTREACHED);
    1789           0 :                 break;
    1790             :         }
    1791             :     }
    1792             : 
    1793             :     /* remaining inarcs, if any, incorporate the constraint */
    1794       29306 :     moveins(nfa, from, to);
    1795       29306 :     freearc(nfa, con);
    1796             :     /* from state is now useless, but we leave it to pullback() to clean up */
    1797       29306 :     return 1;
    1798             : }
    1799             : 
    1800             : /*
    1801             :  * pushfwd - push forward constraints forward to eliminate them
    1802             :  */
    1803             : static void
    1804       26598 : pushfwd(struct nfa *nfa,
    1805             :         FILE *f)                /* for debug output; NULL none */
    1806             : {
    1807             :     struct state *s;
    1808             :     struct state *nexts;
    1809             :     struct arc *a;
    1810             :     struct arc *nexta;
    1811             :     struct state *intermediates;
    1812             :     int         progress;
    1813             : 
    1814             :     /* find and push until there are no more */
    1815             :     do
    1816             :     {
    1817       26598 :         progress = 0;
    1818      378796 :         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    1819             :         {
    1820      352198 :             nexts = s->next;
    1821      352198 :             intermediates = NULL;
    1822     1637052 :             for (a = s->ins; a != NULL && !NISERR(); a = nexta)
    1823             :             {
    1824     1284854 :                 nexta = a->inchain;
    1825     1284854 :                 if (a->type == '$' || a->type == AHEAD)
    1826       66594 :                     if (push(nfa, a, &intermediates))
    1827       21018 :                         progress = 1;
    1828             :             }
    1829             :             /* clear tmp fields of intermediate states created here */
    1830      352202 :             while (intermediates != NULL)
    1831             :             {
    1832           4 :                 struct state *ns = intermediates->tmp;
    1833             : 
    1834           4 :                 intermediates->tmp = NULL;
    1835           4 :                 intermediates = ns;
    1836             :             }
    1837             :             /* if s is now useless, get rid of it */
    1838      352198 :             if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    1839       21578 :                 dropstate(nfa, s);
    1840             :         }
    1841       26598 :         if (progress && f != NULL)
    1842           0 :             dumpnfa(nfa, f);
    1843       26598 :     } while (progress && !NISERR());
    1844       17704 :     if (NISERR())
    1845           6 :         return;
    1846             : 
    1847             :     /*
    1848             :      * Any $ constraints we were able to push to the post state can now be
    1849             :      * replaced by PLAIN arcs referencing the EOS or EOL colors.  There should
    1850             :      * be no other $ or AHEAD arcs left in the NFA, though we do not check
    1851             :      * that here (compact() will fail if so).
    1852             :      */
    1853       55398 :     for (a = nfa->post->ins; a != NULL; a = nexta)
    1854             :     {
    1855       37700 :         nexta = a->inchain;
    1856       37700 :         if (a->type == '$')
    1857             :         {
    1858             :             assert(a->co == 0 || a->co == 1);
    1859       27012 :             newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
    1860       27012 :             freearc(nfa, a);
    1861             :         }
    1862             :     }
    1863             : }
    1864             : 
    1865             : /*
    1866             :  * push - push a forward constraint forward past its destination state
    1867             :  *
    1868             :  * Returns 1 if successful (which it always is unless the destination is the
    1869             :  * post state or we have an internal error), 0 if nothing happened.
    1870             :  *
    1871             :  * A significant property of this function is that it deletes no pre-existing
    1872             :  * states, and no inarcs of the constraint's to state other than the given
    1873             :  * constraint arc.  This makes the loops in pushfwd() safe, at the cost that
    1874             :  * we may leave useless states behind.  Therefore, we leave it to pushfwd()
    1875             :  * to delete such states.
    1876             :  *
    1877             :  * If the to state has multiple forward-constraint inarcs, and/or multiple
    1878             :  * compatible constraint outarcs, we only need to create one new intermediate
    1879             :  * state per combination of predecessor and successor states.  *intermediates
    1880             :  * points to a list of such intermediate states for this to state (chained
    1881             :  * through their tmp fields).
    1882             :  */
    1883             : static int
    1884       66594 : push(struct nfa *nfa,
    1885             :      struct arc *con,
    1886             :      struct state **intermediates)
    1887             : {
    1888       66594 :     struct state *from = con->from;
    1889       66594 :     struct state *to = con->to;
    1890             :     struct arc *a;
    1891             :     struct arc *nexta;
    1892             :     struct state *s;
    1893             : 
    1894             :     assert(to != from);         /* should have gotten rid of this earlier */
    1895       66594 :     if (to->flag)                /* can't push forward beyond end */
    1896       45576 :         return 0;
    1897       21018 :     if (to->nouts == 0)
    1898             :     {                           /* dead end */
    1899         820 :         freearc(nfa, con);
    1900         820 :         return 1;
    1901             :     }
    1902             : 
    1903             :     /*
    1904             :      * First, clone to state if necessary to avoid other inarcs.  This may
    1905             :      * seem wasteful, but it simplifies the logic, and we'll get rid of the
    1906             :      * clone state again at the bottom.
    1907             :      */
    1908       20198 :     if (to->nins > 1)
    1909             :     {
    1910       10822 :         s = newstate(nfa);
    1911       10822 :         if (NISERR())
    1912           0 :             return 0;
    1913       10822 :         copyouts(nfa, to, s);   /* duplicate outarcs */
    1914       10822 :         cparc(nfa, con, from, s);   /* move constraint arc */
    1915       10822 :         freearc(nfa, con);
    1916       10822 :         if (NISERR())
    1917           0 :             return 0;
    1918       10822 :         to = s;
    1919       10822 :         con = to->ins;
    1920             :     }
    1921             :     assert(to->nins == 1);
    1922             : 
    1923             :     /* propagate the constraint into the to state's outarcs */
    1924      125272 :     for (a = to->outs; a != NULL && !NISERR(); a = nexta)
    1925             :     {
    1926      105074 :         nexta = a->outchain;
    1927      105074 :         switch (combine(nfa, con, a))
    1928             :         {
    1929       89056 :             case INCOMPATIBLE:  /* destroy the arc */
    1930       89056 :                 freearc(nfa, a);
    1931       89056 :                 break;
    1932       13672 :             case SATISFIED:     /* no action needed */
    1933       13672 :                 break;
    1934          16 :             case COMPATIBLE:    /* swap the two arcs, more or less */
    1935             :                 /* need an intermediate state, but might have one already */
    1936          16 :                 for (s = *intermediates; s != NULL; s = s->tmp)
    1937             :                 {
    1938             :                     assert(s->nins > 0 && s->nouts > 0);
    1939          12 :                     if (s->ins->from == from && s->outs->to == a->to)
    1940          12 :                         break;
    1941             :                 }
    1942          16 :                 if (s == NULL)
    1943             :                 {
    1944           4 :                     s = newstate(nfa);
    1945           4 :                     if (NISERR())
    1946           0 :                         return 0;
    1947           4 :                     s->tmp = *intermediates;
    1948           4 :                     *intermediates = s;
    1949             :                 }
    1950          16 :                 cparc(nfa, con, s, a->to);
    1951          16 :                 cparc(nfa, a, from, s);
    1952          16 :                 freearc(nfa, a);
    1953          16 :                 break;
    1954        2330 :             case REPLACEARC:    /* replace arc's color */
    1955        2330 :                 newarc(nfa, a->type, con->co, from, a->to);
    1956        2330 :                 freearc(nfa, a);
    1957        2330 :                 break;
    1958           0 :             default:
    1959             :                 assert(NOTREACHED);
    1960           0 :                 break;
    1961             :         }
    1962             :     }
    1963             : 
    1964             :     /* remaining outarcs, if any, incorporate the constraint */
    1965       20198 :     moveouts(nfa, to, from);
    1966       20198 :     freearc(nfa, con);
    1967             :     /* to state is now useless, but we leave it to pushfwd() to clean up */
    1968       20198 :     return 1;
    1969             : }
    1970             : 
    1971             : /*
    1972             :  * combine - constraint lands on an arc, what happens?
    1973             :  *
    1974             :  * #def INCOMPATIBLE    1   // destroys arc
    1975             :  * #def SATISFIED       2   // constraint satisfied
    1976             :  * #def COMPATIBLE      3   // compatible but not satisfied yet
    1977             :  * #def REPLACEARC      4   // replace arc's color with constraint color
    1978             :  */
    1979             : static int
    1980      385460 : combine(struct nfa *nfa,
    1981             :         struct arc *con,
    1982             :         struct arc *a)
    1983             : {
    1984             : #define  CA(ct,at)   (((ct)<<CHAR_BIT) | (at))
    1985             : 
    1986      385460 :     switch (CA(con->type, a->type))
    1987             :     {
    1988       87368 :         case CA('^', PLAIN):    /* newlines are handled separately */
    1989             :         case CA('$', PLAIN):
    1990       87368 :             return INCOMPATIBLE;
    1991             :             break;
    1992       35288 :         case CA(AHEAD, PLAIN):  /* color constraints meet colors */
    1993             :         case CA(BEHIND, PLAIN):
    1994       35288 :             if (con->co == a->co)
    1995        1562 :                 return SATISFIED;
    1996       33726 :             if (con->co == RAINBOW)
    1997             :             {
    1998             :                 /* con is satisfied unless arc's color is a pseudocolor */
    1999           4 :                 if (!(nfa->cm->cd[a->co].flags & PSEUDO))
    2000           4 :                     return SATISFIED;
    2001             :             }
    2002       33722 :             else if (a->co == RAINBOW)
    2003             :             {
    2004             :                 /* con is incompatible if it's for a pseudocolor */
    2005             :                 /* (this is hypothetical; we make no such constraints today) */
    2006        3814 :                 if (nfa->cm->cd[con->co].flags & PSEUDO)
    2007           0 :                     return INCOMPATIBLE;
    2008             :                 /* otherwise, constraint constrains arc to be only its color */
    2009        3814 :                 return REPLACEARC;
    2010             :             }
    2011       29908 :             return INCOMPATIBLE;
    2012             :             break;
    2013       46574 :         case CA('^', '^'):      /* collision, similar constraints */
    2014             :         case CA('$', '$'):
    2015       46574 :             if (con->co == a->co) /* true duplication */
    2016       26692 :                 return SATISFIED;
    2017       19882 :             return INCOMPATIBLE;
    2018             :             break;
    2019       24620 :         case CA(AHEAD, AHEAD):  /* collision, similar constraints */
    2020             :         case CA(BEHIND, BEHIND):
    2021       24620 :             if (con->co == a->co) /* true duplication */
    2022         918 :                 return SATISFIED;
    2023       23702 :             if (con->co == RAINBOW)
    2024             :             {
    2025             :                 /* con is satisfied unless arc's color is a pseudocolor */
    2026           4 :                 if (!(nfa->cm->cd[a->co].flags & PSEUDO))
    2027           4 :                     return SATISFIED;
    2028             :             }
    2029       23698 :             else if (a->co == RAINBOW)
    2030             :             {
    2031             :                 /* con is incompatible if it's for a pseudocolor */
    2032             :                 /* (this is hypothetical; we make no such constraints today) */
    2033           8 :                 if (nfa->cm->cd[con->co].flags & PSEUDO)
    2034           0 :                     return INCOMPATIBLE;
    2035             :                 /* otherwise, constraint constrains arc to be only its color */
    2036           8 :                 return REPLACEARC;
    2037             :             }
    2038       23690 :             return INCOMPATIBLE;
    2039             :             break;
    2040       11908 :         case CA('^', BEHIND):   /* collision, dissimilar constraints */
    2041             :         case CA(BEHIND, '^'):
    2042             :         case CA('$', AHEAD):
    2043             :         case CA(AHEAD, '$'):
    2044       11908 :             return INCOMPATIBLE;
    2045             :             break;
    2046      179702 :         case CA('^', '$'):      /* constraints passing each other */
    2047             :         case CA('^', AHEAD):
    2048             :         case CA(BEHIND, '$'):
    2049             :         case CA(BEHIND, AHEAD):
    2050             :         case CA('$', '^'):
    2051             :         case CA('$', BEHIND):
    2052             :         case CA(AHEAD, '^'):
    2053             :         case CA(AHEAD, BEHIND):
    2054             :         case CA('^', LACON):
    2055             :         case CA(BEHIND, LACON):
    2056             :         case CA('$', LACON):
    2057             :         case CA(AHEAD, LACON):
    2058      179702 :             return COMPATIBLE;
    2059             :             break;
    2060             :     }
    2061             :     assert(NOTREACHED);
    2062           0 :     return INCOMPATIBLE;        /* for benefit of blind compilers */
    2063             : }
    2064             : 
    2065             : /*
    2066             :  * fixempties - get rid of EMPTY arcs
    2067             :  */
    2068             : static void
    2069       17704 : fixempties(struct nfa *nfa,
    2070             :            FILE *f)             /* for debug output; NULL none */
    2071             : {
    2072             :     struct state *s;
    2073             :     struct state *s2;
    2074             :     struct state *nexts;
    2075             :     struct arc *a;
    2076             :     struct arc *nexta;
    2077             :     int         totalinarcs;
    2078             :     struct arc **inarcsorig;
    2079             :     struct arc **arcarray;
    2080             :     int         arccount;
    2081             :     int         prevnins;
    2082             :     int         nskip;
    2083             : 
    2084             :     /*
    2085             :      * First, get rid of any states whose sole out-arc is an EMPTY, since
    2086             :      * they're basically just aliases for their successor.  The parsing
    2087             :      * algorithm creates enough of these that it's worth special-casing this.
    2088             :      */
    2089      422060 :     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2090             :     {
    2091      404356 :         nexts = s->next;
    2092      404356 :         if (s->flag || s->nouts != 1)
    2093      104806 :             continue;
    2094      299550 :         a = s->outs;
    2095             :         assert(a != NULL && a->outchain == NULL);
    2096      299550 :         if (a->type != EMPTY)
    2097      179512 :             continue;
    2098      120038 :         if (s != a->to)
    2099      120038 :             moveins(nfa, s, a->to);
    2100      120038 :         dropstate(nfa, s);
    2101             :     }
    2102             : 
    2103             :     /*
    2104             :      * Similarly, get rid of any state with a single EMPTY in-arc, by folding
    2105             :      * it into its predecessor.
    2106             :      */
    2107      302022 :     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2108             :     {
    2109      284318 :         nexts = s->next;
    2110             :         /* while we're at it, ensure tmp fields are clear for next step */
    2111             :         assert(s->tmp == NULL);
    2112      284318 :         if (s->flag || s->nins != 1)
    2113       99206 :             continue;
    2114      185112 :         a = s->ins;
    2115             :         assert(a != NULL && a->inchain == NULL);
    2116      185112 :         if (a->type != EMPTY)
    2117      162200 :             continue;
    2118       22912 :         if (s != a->from)
    2119       22912 :             moveouts(nfa, s, a->from);
    2120       22912 :         dropstate(nfa, s);
    2121             :     }
    2122             : 
    2123       17704 :     if (NISERR())
    2124           0 :         return;
    2125             : 
    2126             :     /*
    2127             :      * For each remaining NFA state, find all other states from which it is
    2128             :      * reachable by a chain of one or more EMPTY arcs.  Then generate new arcs
    2129             :      * that eliminate the need for each such chain.
    2130             :      *
    2131             :      * We could replace a chain of EMPTY arcs that leads from a "from" state
    2132             :      * to a "to" state either by pushing non-EMPTY arcs forward (linking
    2133             :      * directly from "from"'s predecessors to "to") or by pulling them back
    2134             :      * (linking directly from "from" to "to"'s successors).  We choose to
    2135             :      * always do the former; this choice is somewhat arbitrary, but the
    2136             :      * approach below requires that we uniformly do one or the other.
    2137             :      *
    2138             :      * Suppose we have a chain of N successive EMPTY arcs (where N can easily
    2139             :      * approach the size of the NFA).  All of the intermediate states must
    2140             :      * have additional inarcs and outarcs, else they'd have been removed by
    2141             :      * the steps above.  Assuming their inarcs are mostly not empties, we will
    2142             :      * add O(N^2) arcs to the NFA, since a non-EMPTY inarc leading to any one
    2143             :      * state in the chain must be duplicated to lead to all its successor
    2144             :      * states as well.  So there is no hope of doing less than O(N^2) work;
    2145             :      * however, we should endeavor to keep the big-O cost from being even
    2146             :      * worse than that, which it can easily become without care.  In
    2147             :      * particular, suppose we were to copy all S1's inarcs forward to S2, and
    2148             :      * then also to S3, and then later we consider pushing S2's inarcs forward
    2149             :      * to S3.  If we include the arcs already copied from S1 in that, we'd be
    2150             :      * doing O(N^3) work.  (The duplicate-arc elimination built into newarc()
    2151             :      * and its cohorts would get rid of the extra arcs, but not without cost.)
    2152             :      *
    2153             :      * We can avoid this cost by treating only arcs that existed at the start
    2154             :      * of this phase as candidates to be pushed forward.  To identify those,
    2155             :      * we remember the first inarc each state had to start with.  We rely on
    2156             :      * the fact that newarc() and friends put new arcs on the front of their
    2157             :      * to-states' inchains, and that this phase never deletes arcs, so that
    2158             :      * the original arcs must be the last arcs in their to-states' inchains.
    2159             :      *
    2160             :      * So the process here is that, for each state in the NFA, we gather up
    2161             :      * all non-EMPTY inarcs of states that can reach the target state via
    2162             :      * EMPTY arcs.  We then sort, de-duplicate, and merge these arcs into the
    2163             :      * target state's inchain.  (We can safely use sort-merge for this as long
    2164             :      * as we update each state's original-arcs pointer after we add arcs to
    2165             :      * it; the sort step of mergeins probably changed the order of the old
    2166             :      * arcs.)
    2167             :      *
    2168             :      * Another refinement worth making is that, because we only add non-EMPTY
    2169             :      * arcs during this phase, and all added arcs have the same from-state as
    2170             :      * the non-EMPTY arc they were cloned from, we know ahead of time that any
    2171             :      * states having only EMPTY outarcs will be useless for lack of outarcs
    2172             :      * after we drop the EMPTY arcs.  (They cannot gain non-EMPTY outarcs if
    2173             :      * they had none to start with.)  So we need not bother to update the
    2174             :      * inchains of such states at all.
    2175             :      */
    2176             : 
    2177             :     /* Remember the states' first original inarcs */
    2178             :     /* ... and while at it, count how many old inarcs there are altogether */
    2179       17704 :     inarcsorig = (struct arc **) MALLOC(nfa->nstates * sizeof(struct arc *));
    2180       17704 :     if (inarcsorig == NULL)
    2181             :     {
    2182           0 :         NERR(REG_ESPACE);
    2183           0 :         return;
    2184             :     }
    2185       17704 :     totalinarcs = 0;
    2186      279110 :     for (s = nfa->states; s != NULL; s = s->next)
    2187             :     {
    2188      261406 :         inarcsorig[s->no] = s->ins;
    2189      261406 :         totalinarcs += s->nins;
    2190             :     }
    2191             : 
    2192             :     /*
    2193             :      * Create a workspace for accumulating the inarcs to be added to the
    2194             :      * current target state.  totalinarcs is probably a considerable
    2195             :      * overestimate of the space needed, but the NFA is unlikely to be large
    2196             :      * enough at this point to make it worth being smarter.
    2197             :      */
    2198       17704 :     arcarray = (struct arc **) MALLOC(totalinarcs * sizeof(struct arc *));
    2199       17704 :     if (arcarray == NULL)
    2200             :     {
    2201           0 :         NERR(REG_ESPACE);
    2202           0 :         FREE(inarcsorig);
    2203           0 :         return;
    2204             :     }
    2205             : 
    2206             :     /* And iterate over the target states */
    2207      274148 :     for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
    2208             :     {
    2209             :         /* Ignore target states without non-EMPTY outarcs, per note above */
    2210      256444 :         if (!s->flag && !hasnonemptyout(s))
    2211        3948 :             continue;
    2212             : 
    2213             :         /* Find predecessor states and accumulate their original inarcs */
    2214      252496 :         arccount = 0;
    2215    15239406 :         for (s2 = emptyreachable(nfa, s, s, inarcsorig); s2 != s; s2 = nexts)
    2216             :         {
    2217             :             /* Add s2's original inarcs to arcarray[], but ignore empties */
    2218    45002510 :             for (a = inarcsorig[s2->no]; a != NULL; a = a->inchain)
    2219             :             {
    2220    30015600 :                 if (a->type != EMPTY)
    2221    15030586 :                     arcarray[arccount++] = a;
    2222             :             }
    2223             : 
    2224             :             /* Reset the tmp fields as we walk back */
    2225    14986910 :             nexts = s2->tmp;
    2226    14986910 :             s2->tmp = NULL;
    2227             :         }
    2228      252496 :         s->tmp = NULL;
    2229             :         assert(arccount <= totalinarcs);
    2230             : 
    2231             :         /* Remember how many original inarcs this state has */
    2232      252496 :         prevnins = s->nins;
    2233             : 
    2234             :         /* Add non-duplicate inarcs to target state */
    2235      252496 :         mergeins(nfa, s, arcarray, arccount);
    2236             : 
    2237             :         /* Now we must update the state's inarcsorig pointer */
    2238      252496 :         nskip = s->nins - prevnins;
    2239      252496 :         a = s->ins;
    2240    15270432 :         while (nskip-- > 0)
    2241    15017936 :             a = a->inchain;
    2242      252496 :         inarcsorig[s->no] = a;
    2243             :     }
    2244             : 
    2245       17704 :     FREE(arcarray);
    2246       17704 :     FREE(inarcsorig);
    2247             : 
    2248       17704 :     if (NISERR())
    2249           6 :         return;
    2250             : 
    2251             :     /*
    2252             :      * Now remove all the EMPTY arcs, since we don't need them anymore.
    2253             :      */
    2254      261092 :     for (s = nfa->states; s != NULL; s = s->next)
    2255             :     {
    2256     1467764 :         for (a = s->outs; a != NULL; a = nexta)
    2257             :         {
    2258     1224370 :             nexta = a->outchain;
    2259     1224370 :             if (a->type == EMPTY)
    2260       25278 :                 freearc(nfa, a);
    2261             :         }
    2262             :     }
    2263             : 
    2264             :     /*
    2265             :      * And remove any states that have become useless.  (This cleanup is not
    2266             :      * very thorough, and would be even less so if we tried to combine it with
    2267             :      * the previous step; but cleanup() will take care of anything we miss.)
    2268             :      */
    2269      261092 :     for (s = nfa->states; s != NULL; s = nexts)
    2270             :     {
    2271      243394 :         nexts = s->next;
    2272      243394 :         if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    2273        3948 :             dropstate(nfa, s);
    2274             :     }
    2275             : 
    2276       17698 :     if (f != NULL)
    2277           0 :         dumpnfa(nfa, f);
    2278             : }
    2279             : 
    2280             : /*
    2281             :  * emptyreachable - recursively find all states that can reach s by EMPTY arcs
    2282             :  *
    2283             :  * The return value is the last such state found.  Its tmp field links back
    2284             :  * to the next-to-last such state, and so on back to s, so that all these
    2285             :  * states can be located without searching the whole NFA.
    2286             :  *
    2287             :  * Since this is only used in fixempties(), we pass in the inarcsorig[] array
    2288             :  * maintained by that function.  This lets us skip over all new inarcs, which
    2289             :  * are certainly not EMPTY arcs.
    2290             :  *
    2291             :  * The maximum recursion depth here is equal to the length of the longest
    2292             :  * loop-free chain of EMPTY arcs, which is surely no more than the size of
    2293             :  * the NFA ... but that could still be enough to cause trouble.
    2294             :  */
    2295             : static struct state *
    2296    15239406 : emptyreachable(struct nfa *nfa,
    2297             :                struct state *s,
    2298             :                struct state *lastfound,
    2299             :                struct arc **inarcsorig)
    2300             : {
    2301             :     struct arc *a;
    2302             : 
    2303             :     /* Since this is recursive, it could be driven to stack overflow */
    2304    15239406 :     if (STACK_TOO_DEEP(nfa->v->re))
    2305             :     {
    2306           0 :         NERR(REG_ETOOBIG);
    2307           0 :         return lastfound;
    2308             :     }
    2309             : 
    2310    15239406 :     s->tmp = lastfound;
    2311    15239406 :     lastfound = s;
    2312    45666440 :     for (a = inarcsorig[s->no]; a != NULL; a = a->inchain)
    2313             :     {
    2314    30427034 :         if (a->type == EMPTY && a->from->tmp == NULL)
    2315    14986910 :             lastfound = emptyreachable(nfa, a->from, lastfound, inarcsorig);
    2316             :     }
    2317    15239406 :     return lastfound;
    2318             : }
    2319             : 
    2320             : /*
    2321             :  * isconstraintarc - detect whether an arc is of a constraint type
    2322             :  */
    2323             : static inline int
    2324     2530618 : isconstraintarc(struct arc *a)
    2325             : {
    2326     2530618 :     switch (a->type)
    2327             :     {
    2328      308614 :         case '^':
    2329             :         case '$':
    2330             :         case BEHIND:
    2331             :         case AHEAD:
    2332             :         case LACON:
    2333      308614 :             return 1;
    2334             :     }
    2335     2222004 :     return 0;
    2336             : }
    2337             : 
    2338             : /*
    2339             :  * hasconstraintout - does state have a constraint out arc?
    2340             :  */
    2341             : static int
    2342       25262 : hasconstraintout(struct state *s)
    2343             : {
    2344             :     struct arc *a;
    2345             : 
    2346       47920 :     for (a = s->outs; a != NULL; a = a->outchain)
    2347             :     {
    2348       38444 :         if (isconstraintarc(a))
    2349       15786 :             return 1;
    2350             :     }
    2351        9476 :     return 0;
    2352             : }
    2353             : 
    2354             : /*
    2355             :  * fixconstraintloops - get rid of loops containing only constraint arcs
    2356             :  *
    2357             :  * A loop of states that contains only constraint arcs is useless, since
    2358             :  * passing around the loop represents no forward progress.  Moreover, it
    2359             :  * would cause infinite looping in pullback/pushfwd, so we need to get rid
    2360             :  * of such loops before doing that.
    2361             :  */
    2362             : static void
    2363       17704 : fixconstraintloops(struct nfa *nfa,
    2364             :                    FILE *f)     /* for debug output; NULL none */
    2365             : {
    2366             :     struct state *s;
    2367             :     struct state *nexts;
    2368             :     struct arc *a;
    2369             :     struct arc *nexta;
    2370             :     int         hasconstraints;
    2371             : 
    2372             :     /*
    2373             :      * In the trivial case of a state that loops to itself, we can just drop
    2374             :      * the constraint arc altogether.  This is worth special-casing because
    2375             :      * such loops are far more common than loops containing multiple states.
    2376             :      * While we're at it, note whether any constraint arcs survive.
    2377             :      */
    2378       17704 :     hasconstraints = 0;
    2379      257150 :     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2380             :     {
    2381      239446 :         nexts = s->next;
    2382             :         /* while we're at it, ensure tmp fields are clear for next step */
    2383             :         assert(s->tmp == NULL);
    2384     1434592 :         for (a = s->outs; a != NULL && !NISERR(); a = nexta)
    2385             :         {
    2386     1195146 :             nexta = a->outchain;
    2387     1195146 :             if (isconstraintarc(a))
    2388             :             {
    2389      112110 :                 if (a->to == s)
    2390         352 :                     freearc(nfa, a);
    2391             :                 else
    2392      111758 :                     hasconstraints = 1;
    2393             :             }
    2394             :         }
    2395             :         /* If we removed all the outarcs, the state is useless. */
    2396      239446 :         if (s->nouts == 0 && !s->flag)
    2397           0 :             dropstate(nfa, s);
    2398             :     }
    2399             : 
    2400             :     /* Nothing to do if no remaining constraint arcs */
    2401       17704 :     if (NISERR() || !hasconstraints)
    2402           6 :         return;
    2403             : 
    2404             :     /*
    2405             :      * Starting from each remaining NFA state, search outwards for a
    2406             :      * constraint loop.  If we find a loop, break the loop, then start the
    2407             :      * search over.  (We could possibly retain some state from the first scan,
    2408             :      * but it would complicate things greatly, and multi-state constraint
    2409             :      * loops are rare enough that it's not worth optimizing the case.)
    2410             :      */
    2411       17698 : restart:
    2412      272078 :     for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
    2413             :     {
    2414      254380 :         if (findconstraintloop(nfa, s))
    2415         412 :             goto restart;
    2416             :     }
    2417             : 
    2418       17698 :     if (NISERR())
    2419           0 :         return;
    2420             : 
    2421             :     /*
    2422             :      * Now remove any states that have become useless.  (This cleanup is not
    2423             :      * very thorough, and would be even less so if we tried to combine it with
    2424             :      * the previous step; but cleanup() will take care of anything we miss.)
    2425             :      *
    2426             :      * Because findconstraintloop intentionally doesn't reset all tmp fields,
    2427             :      * we have to clear them after it's done.  This is a convenient place to
    2428             :      * do that, too.
    2429             :      */
    2430      258866 :     for (s = nfa->states; s != NULL; s = nexts)
    2431             :     {
    2432      241168 :         nexts = s->next;
    2433      241168 :         s->tmp = NULL;
    2434      241168 :         if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    2435         426 :             dropstate(nfa, s);
    2436             :     }
    2437             : 
    2438       17698 :     if (f != NULL)
    2439           0 :         dumpnfa(nfa, f);
    2440             : }
    2441             : 
    2442             : /*
    2443             :  * findconstraintloop - recursively find a loop of constraint arcs
    2444             :  *
    2445             :  * If we find a loop, break it by calling breakconstraintloop(), then
    2446             :  * return 1; otherwise return 0.
    2447             :  *
    2448             :  * State tmp fields are guaranteed all NULL on a success return, because
    2449             :  * breakconstraintloop does that.  After a failure return, any state that
    2450             :  * is known not to be part of a loop is marked with s->tmp == s; this allows
    2451             :  * us not to have to re-prove that fact on later calls.  (This convention is
    2452             :  * workable because we already eliminated single-state loops.)
    2453             :  *
    2454             :  * Note that the found loop doesn't necessarily include the first state we
    2455             :  * are called on.  Any loop reachable from that state will do.
    2456             :  *
    2457             :  * The maximum recursion depth here is one more than the length of the longest
    2458             :  * loop-free chain of constraint arcs, which is surely no more than the size
    2459             :  * of the NFA ... but that could still be enough to cause trouble.
    2460             :  */
    2461             : static int
    2462      406266 : findconstraintloop(struct nfa *nfa, struct state *s)
    2463             : {
    2464             :     struct arc *a;
    2465             : 
    2466             :     /* Since this is recursive, it could be driven to stack overflow */
    2467      406266 :     if (STACK_TOO_DEEP(nfa->v->re))
    2468             :     {
    2469           0 :         NERR(REG_ETOOBIG);
    2470           0 :         return 1;               /* to exit as quickly as possible */
    2471             :     }
    2472             : 
    2473      406266 :     if (s->tmp != NULL)
    2474             :     {
    2475             :         /* Already proven uninteresting? */
    2476      147146 :         if (s->tmp == s)
    2477      146734 :             return 0;
    2478             :         /* Found a loop involving s */
    2479         412 :         breakconstraintloop(nfa, s);
    2480             :         /* The tmp fields have been cleaned up by breakconstraintloop */
    2481         412 :         return 1;
    2482             :     }
    2483     1524956 :     for (a = s->outs; a != NULL; a = a->outchain)
    2484             :     {
    2485     1267154 :         if (isconstraintarc(a))
    2486             :         {
    2487      151886 :             struct state *sto = a->to;
    2488             : 
    2489             :             assert(sto != s);
    2490      151886 :             s->tmp = sto;
    2491      151886 :             if (findconstraintloop(nfa, sto))
    2492        1318 :                 return 1;
    2493             :         }
    2494             :     }
    2495             : 
    2496             :     /*
    2497             :      * If we get here, no constraint loop exists leading out from s.  Mark it
    2498             :      * with s->tmp == s so we need not rediscover that fact again later.
    2499             :      */
    2500      257802 :     s->tmp = s;
    2501      257802 :     return 0;
    2502             : }
    2503             : 
    2504             : /*
    2505             :  * breakconstraintloop - break a loop of constraint arcs
    2506             :  *
    2507             :  * sinitial is any one member state of the loop.  Each loop member's tmp
    2508             :  * field links to its successor within the loop.  (Note that this function
    2509             :  * will reset all the tmp fields to NULL.)
    2510             :  *
    2511             :  * We can break the loop by, for any one state S1 in the loop, cloning its
    2512             :  * loop successor state S2 (and possibly following states), and then moving
    2513             :  * all S1->S2 constraint arcs to point to the cloned S2.  The cloned S2 should
    2514             :  * copy any non-constraint outarcs of S2.  Constraint outarcs should be
    2515             :  * dropped if they point back to S1, else they need to be copied as arcs to
    2516             :  * similarly cloned states S3, S4, etc.  In general, each cloned state copies
    2517             :  * non-constraint outarcs, drops constraint outarcs that would lead to itself
    2518             :  * or any earlier cloned state, and sends other constraint outarcs to newly
    2519             :  * cloned states.  No cloned state will have any inarcs that aren't constraint
    2520             :  * arcs or do not lead from S1 or earlier-cloned states.  It's okay to drop
    2521             :  * constraint back-arcs since they would not take us to any state we've not
    2522             :  * already been in; therefore, no new constraint loop is created.  In this way
    2523             :  * we generate a modified NFA that can still represent every useful state
    2524             :  * sequence, but not sequences that represent state loops with no consumption
    2525             :  * of input data.  Note that the set of cloned states will certainly include
    2526             :  * all of the loop member states other than S1, and it may also include
    2527             :  * non-loop states that are reachable from S2 via constraint arcs.  This is
    2528             :  * important because there is no guarantee that findconstraintloop found a
    2529             :  * maximal loop (and searching for one would be NP-hard, so don't try).
    2530             :  * Frequently the "non-loop states" are actually part of a larger loop that
    2531             :  * we didn't notice, and indeed there may be several overlapping loops.
    2532             :  * This technique ensures convergence in such cases, while considering only
    2533             :  * the originally-found loop does not.
    2534             :  *
    2535             :  * If there is only one S1->S2 constraint arc, then that constraint is
    2536             :  * certainly satisfied when we enter any of the clone states.  This means that
    2537             :  * in the common case where many of the constraint arcs are identically
    2538             :  * labeled, we can merge together clone states linked by a similarly-labeled
    2539             :  * constraint: if we can get to the first one we can certainly get to the
    2540             :  * second, so there's no need to distinguish.  This greatly reduces the number
    2541             :  * of new states needed, so we preferentially break the given loop at a state
    2542             :  * pair where this is true.
    2543             :  *
    2544             :  * Furthermore, it's fairly common to find that a cloned successor state has
    2545             :  * no outarcs, especially if we're a bit aggressive about removing unnecessary
    2546             :  * outarcs.  If that happens, then there is simply not any interesting state
    2547             :  * that can be reached through the predecessor's loop arcs, which means we can
    2548             :  * break the loop just by removing those loop arcs, with no new states added.
    2549             :  */
    2550             : static void
    2551         412 : breakconstraintloop(struct nfa *nfa, struct state *sinitial)
    2552             : {
    2553             :     struct state *s;
    2554             :     struct state *shead;
    2555             :     struct state *stail;
    2556             :     struct state *sclone;
    2557             :     struct state *nexts;
    2558             :     struct arc *refarc;
    2559             :     struct arc *a;
    2560             :     struct arc *nexta;
    2561             : 
    2562             :     /*
    2563             :      * Start by identifying which loop step we want to break at.
    2564             :      * Preferentially this is one with only one constraint arc.  (XXX are
    2565             :      * there any other secondary heuristics we want to use here?)  Set refarc
    2566             :      * to point to the selected lone constraint arc, if there is one.
    2567             :      */
    2568         412 :     refarc = NULL;
    2569         412 :     s = sinitial;
    2570             :     do
    2571             :     {
    2572        1060 :         nexts = s->tmp;
    2573             :         assert(nexts != s);     /* should not see any one-element loops */
    2574        1060 :         if (refarc == NULL)
    2575             :         {
    2576         664 :             int         narcs = 0;
    2577             : 
    2578        7600 :             for (a = s->outs; a != NULL; a = a->outchain)
    2579             :             {
    2580        6936 :                 if (a->to == nexts && isconstraintarc(a))
    2581             :                 {
    2582        2592 :                     refarc = a;
    2583        2592 :                     narcs++;
    2584             :                 }
    2585             :             }
    2586             :             assert(narcs > 0);
    2587         664 :             if (narcs > 1)
    2588         372 :                 refarc = NULL;  /* multiple constraint arcs here, no good */
    2589             :         }
    2590        1060 :         s = nexts;
    2591        1060 :     } while (s != sinitial);
    2592             : 
    2593         412 :     if (refarc)
    2594             :     {
    2595             :         /* break at the refarc */
    2596         292 :         shead = refarc->from;
    2597         292 :         stail = refarc->to;
    2598             :         assert(stail == shead->tmp);
    2599             :     }
    2600             :     else
    2601             :     {
    2602             :         /* for lack of a better idea, break after sinitial */
    2603         120 :         shead = sinitial;
    2604         120 :         stail = sinitial->tmp;
    2605             :     }
    2606             : 
    2607             :     /*
    2608             :      * Reset the tmp fields so that we can use them for local storage in
    2609             :      * clonesuccessorstates.  (findconstraintloop won't mind, since it's just
    2610             :      * going to abandon its search anyway.)
    2611             :      */
    2612       34722 :     for (s = nfa->states; s != NULL; s = s->next)
    2613       34310 :         s->tmp = NULL;
    2614             : 
    2615             :     /*
    2616             :      * Recursively build clone state(s) as needed.
    2617             :      */
    2618         412 :     sclone = newstate(nfa);
    2619         412 :     if (sclone == NULL)
    2620             :     {
    2621             :         assert(NISERR());
    2622           0 :         return;
    2623             :     }
    2624             : 
    2625         412 :     clonesuccessorstates(nfa, stail, sclone, shead, refarc,
    2626             :                          NULL, NULL, nfa->nstates);
    2627             : 
    2628         412 :     if (NISERR())
    2629           0 :         return;
    2630             : 
    2631             :     /*
    2632             :      * It's possible that sclone has no outarcs at all, in which case it's
    2633             :      * useless.  (We don't try extremely hard to get rid of useless states
    2634             :      * here, but this is an easy and fairly common case.)
    2635             :      */
    2636         412 :     if (sclone->nouts == 0)
    2637             :     {
    2638          98 :         freestate(nfa, sclone);
    2639          98 :         sclone = NULL;
    2640             :     }
    2641             : 
    2642             :     /*
    2643             :      * Move shead's constraint-loop arcs to point to sclone, or just drop them
    2644             :      * if we discovered we don't need sclone.
    2645             :      */
    2646        4512 :     for (a = shead->outs; a != NULL; a = nexta)
    2647             :     {
    2648        4100 :         nexta = a->outchain;
    2649        4100 :         if (a->to == stail && isconstraintarc(a))
    2650             :         {
    2651         978 :             if (sclone)
    2652         824 :                 cparc(nfa, a, shead, sclone);
    2653         978 :             freearc(nfa, a);
    2654         978 :             if (NISERR())
    2655           0 :                 break;
    2656             :         }
    2657             :     }
    2658             : }
    2659             : 
    2660             : /*
    2661             :  * clonesuccessorstates - create a tree of constraint-arc successor states
    2662             :  *
    2663             :  * ssource is the state to be cloned, and sclone is the state to copy its
    2664             :  * outarcs into.  sclone's inarcs, if any, should already be set up.
    2665             :  *
    2666             :  * spredecessor is the original predecessor state that we are trying to build
    2667             :  * successors for (it may not be the immediate predecessor of ssource).
    2668             :  * refarc, if not NULL, is the original constraint arc that is known to have
    2669             :  * been traversed out of spredecessor to reach the successor(s).
    2670             :  *
    2671             :  * For each cloned successor state, we transiently create a "donemap" that is
    2672             :  * a boolean array showing which source states we've already visited for this
    2673             :  * clone state.  This prevents infinite recursion as well as useless repeat
    2674             :  * visits to the same state subtree (which can add up fast, since typical NFAs
    2675             :  * have multiple redundant arc pathways).  Each donemap is a char array
    2676             :  * indexed by state number.  The donemaps are all of the same size "nstates",
    2677             :  * which is nfa->nstates as of the start of the recursion.  This is enough to
    2678             :  * have entries for all pre-existing states, but *not* entries for clone
    2679             :  * states created during the recursion.  That's okay since we have no need to
    2680             :  * mark those.
    2681             :  *
    2682             :  * curdonemap is NULL when recursing to a new sclone state, or sclone's
    2683             :  * donemap when we are recursing without having created a new state (which we
    2684             :  * do when we decide we can merge a successor state into the current clone
    2685             :  * state).  outerdonemap is NULL at the top level and otherwise the parent
    2686             :  * clone state's donemap.
    2687             :  *
    2688             :  * The successor states we create and fill here form a strict tree structure,
    2689             :  * with each state having exactly one predecessor, except that the toplevel
    2690             :  * state has no inarcs as yet (breakconstraintloop will add its inarcs from
    2691             :  * spredecessor after we're done).  Thus, we can examine sclone's inarcs back
    2692             :  * to the root, plus refarc if any, to identify the set of constraints already
    2693             :  * known valid at the current point.  This allows us to avoid generating extra
    2694             :  * successor states.
    2695             :  */
    2696             : static void
    2697        3622 : clonesuccessorstates(struct nfa *nfa,
    2698             :                      struct state *ssource,
    2699             :                      struct state *sclone,
    2700             :                      struct state *spredecessor,
    2701             :                      struct arc *refarc,
    2702             :                      char *curdonemap,
    2703             :                      char *outerdonemap,
    2704             :                      int nstates)
    2705             : {
    2706             :     char       *donemap;
    2707             :     struct arc *a;
    2708             : 
    2709             :     /* Since this is recursive, it could be driven to stack overflow */
    2710        3622 :     if (STACK_TOO_DEEP(nfa->v->re))
    2711             :     {
    2712           0 :         NERR(REG_ETOOBIG);
    2713           0 :         return;
    2714             :     }
    2715             : 
    2716             :     /* If this state hasn't already got a donemap, create one */
    2717        3622 :     donemap = curdonemap;
    2718        3622 :     if (donemap == NULL)
    2719             :     {
    2720        1820 :         donemap = (char *) MALLOC(nstates * sizeof(char));
    2721        1820 :         if (donemap == NULL)
    2722             :         {
    2723           0 :             NERR(REG_ESPACE);
    2724           0 :             return;
    2725             :         }
    2726             : 
    2727        1820 :         if (outerdonemap != NULL)
    2728             :         {
    2729             :             /*
    2730             :              * Not at outermost recursion level, so copy the outer level's
    2731             :              * donemap; this ensures that we see states in process of being
    2732             :              * visited at outer levels, or already merged into predecessor
    2733             :              * states, as ones we shouldn't traverse back to.
    2734             :              */
    2735        1408 :             memcpy(donemap, outerdonemap, nstates * sizeof(char));
    2736             :         }
    2737             :         else
    2738             :         {
    2739             :             /* At outermost level, only spredecessor is off-limits */
    2740         412 :             memset(donemap, 0, nstates * sizeof(char));
    2741             :             assert(spredecessor->no < nstates);
    2742         412 :             donemap[spredecessor->no] = 1;
    2743             :         }
    2744             :     }
    2745             : 
    2746             :     /* Mark ssource as visited in the donemap */
    2747             :     assert(ssource->no < nstates);
    2748             :     assert(donemap[ssource->no] == 0);
    2749        3622 :     donemap[ssource->no] = 1;
    2750             : 
    2751             :     /*
    2752             :      * We proceed by first cloning all of ssource's outarcs, creating new
    2753             :      * clone states as needed but not doing more with them than that.  Then in
    2754             :      * a second pass, recurse to process the child clone states.  This allows
    2755             :      * us to have only one child clone state per reachable source state, even
    2756             :      * when there are multiple outarcs leading to the same state.  Also, when
    2757             :      * we do visit a child state, its set of inarcs is known exactly, which
    2758             :      * makes it safe to apply the constraint-is-already-checked optimization.
    2759             :      * Also, this ensures that we've merged all the states we can into the
    2760             :      * current clone before we recurse to any children, thus possibly saving
    2761             :      * them from making extra images of those states.
    2762             :      *
    2763             :      * While this function runs, child clone states of the current state are
    2764             :      * marked by setting their tmp fields to point to the original state they
    2765             :      * were cloned from.  This makes it possible to detect multiple outarcs
    2766             :      * leading to the same state, and also makes it easy to distinguish clone
    2767             :      * states from original states (which will have tmp == NULL).
    2768             :      */
    2769       29926 :     for (a = ssource->outs; a != NULL && !NISERR(); a = a->outchain)
    2770             :     {
    2771       26304 :         struct state *sto = a->to;
    2772             : 
    2773             :         /*
    2774             :          * We do not consider cloning successor states that have no constraint
    2775             :          * outarcs; just link to them as-is.  They cannot be part of a
    2776             :          * constraint loop so there is no need to make copies.  In particular,
    2777             :          * this rule keeps us from trying to clone the post state, which would
    2778             :          * be a bad idea.
    2779             :          */
    2780       26304 :         if (isconstraintarc(a) && hasconstraintout(sto))
    2781       10970 :         {
    2782             :             struct state *prevclone;
    2783             :             int         canmerge;
    2784             :             struct arc *a2;
    2785             : 
    2786             :             /*
    2787             :              * Back-link constraint arcs must not be followed.  Nor is there a
    2788             :              * need to revisit states previously merged into this clone.
    2789             :              */
    2790             :             assert(sto->no < nstates);
    2791       15786 :             if (donemap[sto->no] != 0)
    2792        4816 :                 continue;
    2793             : 
    2794             :             /*
    2795             :              * Check whether we already have a child clone state for this
    2796             :              * source state.
    2797             :              */
    2798       10970 :             prevclone = NULL;
    2799       37524 :             for (a2 = sclone->outs; a2 != NULL; a2 = a2->outchain)
    2800             :             {
    2801       34314 :                 if (a2->to->tmp == sto)
    2802             :                 {
    2803        7760 :                     prevclone = a2->to;
    2804        7760 :                     break;
    2805             :                 }
    2806             :             }
    2807             : 
    2808             :             /*
    2809             :              * If this arc is labeled the same as refarc, or the same as any
    2810             :              * arc we must have traversed to get to sclone, then no additional
    2811             :              * constraints need to be met to get to sto, so we should just
    2812             :              * merge its outarcs into sclone.
    2813             :              */
    2814       10970 :             if (refarc && a->type == refarc->type && a->co == refarc->co)
    2815        1802 :                 canmerge = 1;
    2816             :             else
    2817             :             {
    2818             :                 struct state *s;
    2819             : 
    2820        9168 :                 canmerge = 0;
    2821       45892 :                 for (s = sclone; s->ins; s = s->ins->from)
    2822             :                 {
    2823       36724 :                     if (s->nins == 1 &&
    2824          36 :                         a->type == s->ins->type && a->co == s->ins->co)
    2825             :                     {
    2826           0 :                         canmerge = 1;
    2827           0 :                         break;
    2828             :                     }
    2829             :                 }
    2830             :             }
    2831             : 
    2832       10970 :             if (canmerge)
    2833             :             {
    2834             :                 /*
    2835             :                  * We can merge into sclone.  If we previously made a child
    2836             :                  * clone state, drop it; there's no need to visit it.  (This
    2837             :                  * can happen if ssource has multiple pathways to sto, and we
    2838             :                  * only just now found one that is provably a no-op.)
    2839             :                  */
    2840        1802 :                 if (prevclone)
    2841           0 :                     dropstate(nfa, prevclone);  /* kills our outarc, too */
    2842             : 
    2843             :                 /* Recurse to merge sto's outarcs into sclone */
    2844        1802 :                 clonesuccessorstates(nfa,
    2845             :                                      sto,
    2846             :                                      sclone,
    2847             :                                      spredecessor,
    2848             :                                      refarc,
    2849             :                                      donemap,
    2850             :                                      outerdonemap,
    2851             :                                      nstates);
    2852             :                 /* sto should now be marked as previously visited */
    2853             :                 assert(NISERR() || donemap[sto->no] == 1);
    2854             :             }
    2855        9168 :             else if (prevclone)
    2856             :             {
    2857             :                 /*
    2858             :                  * We already have a clone state for this successor, so just
    2859             :                  * make another arc to it.
    2860             :                  */
    2861        7760 :                 cparc(nfa, a, sclone, prevclone);
    2862             :             }
    2863             :             else
    2864             :             {
    2865             :                 /*
    2866             :                  * We need to create a new successor clone state.
    2867             :                  */
    2868             :                 struct state *stoclone;
    2869             : 
    2870        1408 :                 stoclone = newstate(nfa);
    2871        1408 :                 if (stoclone == NULL)
    2872             :                 {
    2873             :                     assert(NISERR());
    2874           0 :                     break;
    2875             :                 }
    2876             :                 /* Mark it as to what it's a clone of */
    2877        1408 :                 stoclone->tmp = sto;
    2878             :                 /* ... and add the outarc leading to it */
    2879        1408 :                 cparc(nfa, a, sclone, stoclone);
    2880             :             }
    2881             :         }
    2882             :         else
    2883             :         {
    2884             :             /*
    2885             :              * Non-constraint outarcs just get copied to sclone, as do outarcs
    2886             :              * leading to states with no constraint outarc.
    2887             :              */
    2888       10518 :             cparc(nfa, a, sclone, sto);
    2889             :         }
    2890             :     }
    2891             : 
    2892             :     /*
    2893             :      * If we are at outer level for this clone state, recurse to all its child
    2894             :      * clone states, clearing their tmp fields as we go.  (If we're not
    2895             :      * outermost for sclone, leave this to be done by the outer call level.)
    2896             :      * Note that if we have multiple outarcs leading to the same clone state,
    2897             :      * it will only be recursed-to once.
    2898             :      */
    2899        3622 :     if (curdonemap == NULL)
    2900             :     {
    2901       16166 :         for (a = sclone->outs; a != NULL && !NISERR(); a = a->outchain)
    2902             :         {
    2903       14346 :             struct state *stoclone = a->to;
    2904       14346 :             struct state *sto = stoclone->tmp;
    2905             : 
    2906       14346 :             if (sto != NULL)
    2907             :             {
    2908        1408 :                 stoclone->tmp = NULL;
    2909        1408 :                 clonesuccessorstates(nfa,
    2910             :                                      sto,
    2911             :                                      stoclone,
    2912             :                                      spredecessor,
    2913             :                                      refarc,
    2914             :                                      NULL,
    2915             :                                      donemap,
    2916             :                                      nstates);
    2917             :             }
    2918             :         }
    2919             : 
    2920             :         /* Don't forget to free sclone's donemap when done with it */
    2921        1820 :         FREE(donemap);
    2922             :     }
    2923             : }
    2924             : 
    2925             : /*
    2926             :  * cleanup - clean up NFA after optimizations
    2927             :  */
    2928             : static void
    2929       35408 : cleanup(struct nfa *nfa)
    2930             : {
    2931             :     struct state *s;
    2932             :     struct state *nexts;
    2933             :     int         n;
    2934             : 
    2935       35408 :     if (NISERR())
    2936           6 :         return;
    2937             : 
    2938             :     /* clear out unreachable or dead-end states */
    2939             :     /* use pre to mark reachable, then post to mark can-reach-post */
    2940       35402 :     markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
    2941       35402 :     markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
    2942      661388 :     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2943             :     {
    2944      625986 :         nexts = s->next;
    2945      625986 :         if (s->tmp != nfa->post && !s->flag)
    2946        6240 :             dropstate(nfa, s);
    2947             :     }
    2948             :     assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
    2949       35402 :     cleartraverse(nfa, nfa->pre);
    2950             :     assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == NULL);
    2951             :     /* the nins==0 (final unreachable) case will be caught later */
    2952             : 
    2953             :     /* renumber surviving states */
    2954       35402 :     n = 0;
    2955      655148 :     for (s = nfa->states; s != NULL; s = s->next)
    2956      619746 :         s->no = n++;
    2957       35402 :     nfa->nstates = n;
    2958             : }
    2959             : 
    2960             : /*
    2961             :  * markreachable - recursive marking of reachable states
    2962             :  */
    2963             : static void
    2964     1724482 : markreachable(struct nfa *nfa,
    2965             :               struct state *s,
    2966             :               struct state *okay,   /* consider only states with this mark */
    2967             :               struct state *mark)   /* the value to mark with */
    2968             : {
    2969             :     struct arc *a;
    2970             : 
    2971             :     /* Since this is recursive, it could be driven to stack overflow */
    2972     1724482 :     if (STACK_TOO_DEEP(nfa->v->re))
    2973             :     {
    2974           0 :         NERR(REG_ETOOBIG);
    2975           0 :         return;
    2976             :     }
    2977             : 
    2978     1724482 :     if (s->tmp != okay)
    2979     1104744 :         return;
    2980      619738 :     s->tmp = mark;
    2981             : 
    2982     2308818 :     for (a = s->outs; a != NULL; a = a->outchain)
    2983     1689080 :         markreachable(nfa, a->to, okay, mark);
    2984             : }
    2985             : 
    2986             : /*
    2987             :  * markcanreach - recursive marking of states which can reach here
    2988             :  */
    2989             : static void
    2990     1725388 : markcanreach(struct nfa *nfa,
    2991             :              struct state *s,
    2992             :              struct state *okay,    /* consider only states with this mark */
    2993             :              struct state *mark)    /* the value to mark with */
    2994             : {
    2995             :     struct arc *a;
    2996             : 
    2997             :     /* Since this is recursive, it could be driven to stack overflow */
    2998     1725388 :     if (STACK_TOO_DEEP(nfa->v->re))
    2999             :     {
    3000           0 :         NERR(REG_ETOOBIG);
    3001           0 :         return;
    3002             :     }
    3003             : 
    3004     1725388 :     if (s->tmp != okay)
    3005     1105830 :         return;
    3006      619558 :     s->tmp = mark;
    3007             : 
    3008     2309544 :     for (a = s->ins; a != NULL; a = a->inchain)
    3009     1689986 :         markcanreach(nfa, a->from, okay, mark);
    3010             : }
    3011             : 
    3012             : /*
    3013             :  * analyze - ascertain potentially-useful facts about an optimized NFA
    3014             :  */
    3015             : static long                     /* re_info bits to be ORed in */
    3016       17704 : analyze(struct nfa *nfa)
    3017             : {
    3018             :     struct arc *a;
    3019             :     struct arc *aa;
    3020             : 
    3021       17704 :     if (NISERR())
    3022           6 :         return 0;
    3023             : 
    3024             :     /* Detect whether NFA can't match anything */
    3025       17698 :     if (nfa->pre->outs == NULL)
    3026          94 :         return REG_UIMPOSSIBLE;
    3027             : 
    3028             :     /* Detect whether NFA matches all strings (possibly with length bounds) */
    3029       17604 :     checkmatchall(nfa);
    3030             : 
    3031             :     /* Detect whether NFA can possibly match a zero-length string */
    3032       54176 :     for (a = nfa->pre->outs; a != NULL; a = a->outchain)
    3033     1539888 :         for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
    3034     1503316 :             if (aa->to == nfa->post)
    3035        2126 :                 return REG_UEMPTYMATCH;
    3036       15478 :     return 0;
    3037             : }
    3038             : 
    3039             : /*
    3040             :  * checkmatchall - does the NFA represent no more than a string length test?
    3041             :  *
    3042             :  * If so, set nfa->minmatchall and nfa->maxmatchall correctly (they are -1
    3043             :  * to begin with) and set the MATCHALL bit in nfa->flags.
    3044             :  *
    3045             :  * To succeed, we require all arcs to be PLAIN RAINBOW arcs, except for those
    3046             :  * for pseudocolors (i.e., BOS/BOL/EOS/EOL).  We must be able to reach the
    3047             :  * post state via RAINBOW arcs, and if there are any loops in the graph, they
    3048             :  * must be loop-to-self arcs, ensuring that each loop iteration consumes
    3049             :  * exactly one character.  (Longer loops are problematic because they create
    3050             :  * non-consecutive possible match lengths; we have no good way to represent
    3051             :  * that situation for lengths beyond the DUPINF limit.)
    3052             :  *
    3053             :  * Pseudocolor arcs complicate things a little.  We know that they can only
    3054             :  * appear as pre-state outarcs (for BOS/BOL) or post-state inarcs (for
    3055             :  * EOS/EOL).  There, they must exactly replicate the parallel RAINBOW arcs,
    3056             :  * e.g. if the pre state has one RAINBOW outarc to state 2, it must have BOS
    3057             :  * and BOL outarcs to state 2, and no others.  Missing or extra pseudocolor
    3058             :  * arcs can occur, meaning that the NFA involves some constraint on the
    3059             :  * adjacent characters, which makes it not a matchall NFA.
    3060             :  */
    3061             : static void
    3062       17604 : checkmatchall(struct nfa *nfa)
    3063             : {
    3064             :     bool      **haspaths;
    3065             :     struct state *s;
    3066             :     int         i;
    3067             : 
    3068             :     /*
    3069             :      * If there are too many states, don't bother trying to detect matchall.
    3070             :      * This limit serves to bound the time and memory we could consume below.
    3071             :      * Note that even if the graph is all-RAINBOW, if there are significantly
    3072             :      * more than DUPINF states then it's likely that there are paths of length
    3073             :      * more than DUPINF, which would force us to fail anyhow.  In practice,
    3074             :      * plausible ways of writing a matchall regex with maximum finite path
    3075             :      * length K tend not to have very many more than K states.
    3076             :      */
    3077       17604 :     if (nfa->nstates > DUPINF * 2)
    3078          12 :         return;
    3079             : 
    3080             :     /*
    3081             :      * First, scan all the states to verify that only RAINBOW arcs appear,
    3082             :      * plus pseudocolor arcs adjacent to the pre and post states.  This lets
    3083             :      * us quickly eliminate most cases that aren't matchall NFAs.
    3084             :      */
    3085       66122 :     for (s = nfa->states; s != NULL; s = s->next)
    3086             :     {
    3087             :         struct arc *a;
    3088             : 
    3089      190112 :         for (a = s->outs; a != NULL; a = a->outchain)
    3090             :         {
    3091      141582 :             if (a->type != PLAIN)
    3092          90 :                 return;         /* any LACONs make it non-matchall */
    3093      141492 :             if (a->co != RAINBOW)
    3094             :             {
    3095       57978 :                 if (nfa->cm->cd[a->co].flags & PSEUDO)
    3096             :                 {
    3097             :                     /*
    3098             :                      * Pseudocolor arc: verify it's in a valid place (this
    3099             :                      * seems quite unlikely to fail, but let's be sure).
    3100             :                      */
    3101       42022 :                     if (s == nfa->pre &&
    3102       31158 :                         (a->co == nfa->bos[0] || a->co == nfa->bos[1]))
    3103             :                          /* okay BOS/BOL arc */ ;
    3104       10864 :                     else if (a->to == nfa->post &&
    3105       10864 :                              (a->co == nfa->eos[0] || a->co == nfa->eos[1]))
    3106             :                          /* okay EOS/EOL arc */ ;
    3107             :                     else
    3108           0 :                         return; /* unexpected pseudocolor arc */
    3109             :                     /* We'll check these arcs some more below. */
    3110             :                 }
    3111             :                 else
    3112       15956 :                     return;     /* any other color makes it non-matchall */
    3113             :             }
    3114             :         }
    3115             :         /* Also, assert that the tmp fields are available for use. */
    3116             :         assert(s->tmp == NULL);
    3117             :     }
    3118             : 
    3119             :     /*
    3120             :      * The next cheapest check we can make is to verify that the BOS/BOL
    3121             :      * outarcs of the pre state reach the same states as its RAINBOW outarcs.
    3122             :      * If they don't, the NFA expresses some constraints on the character
    3123             :      * before the matched string, making it non-matchall.  Likewise, the
    3124             :      * EOS/EOL inarcs of the post state must match its RAINBOW inarcs.
    3125             :      */
    3126        1546 :     if (!check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[0]) ||
    3127        1540 :         !check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[1]) ||
    3128        1204 :         !check_in_colors_match(nfa->post, RAINBOW, nfa->eos[0]) ||
    3129        1196 :         !check_in_colors_match(nfa->post, RAINBOW, nfa->eos[1]))
    3130         558 :         return;
    3131             : 
    3132             :     /*
    3133             :      * Initialize an array of path-length arrays, in which
    3134             :      * checkmatchall_recurse will return per-state results.  This lets us
    3135             :      * memo-ize the recursive search and avoid exponential time consumption.
    3136             :      */
    3137         988 :     haspaths = (bool **) MALLOC(nfa->nstates * sizeof(bool *));
    3138         988 :     if (haspaths == NULL)
    3139           0 :         return;                 /* fail quietly */
    3140         988 :     memset(haspaths, 0, nfa->nstates * sizeof(bool *));
    3141             : 
    3142             :     /*
    3143             :      * Recursively search the graph for all-RAINBOW paths to the "post" state,
    3144             :      * starting at the "pre" state, and computing the lengths of the paths.
    3145             :      * (Given the preceding checks, there should be at least one such path.
    3146             :      * However we could get back a false result anyway, in case there are
    3147             :      * multi-state loops, paths exceeding DUPINF+1 length, or non-algorithmic
    3148             :      * failures such as ENOMEM.)
    3149             :      */
    3150         988 :     if (checkmatchall_recurse(nfa, nfa->pre, haspaths))
    3151             :     {
    3152             :         /* The useful result is the path length array for the pre state */
    3153         964 :         bool       *haspath = haspaths[nfa->pre->no];
    3154             :         int         minmatch,
    3155             :                     maxmatch,
    3156             :                     morematch;
    3157             : 
    3158             :         assert(haspath != NULL);
    3159             : 
    3160             :         /*
    3161             :          * haspath[] now represents the set of possible path lengths; but we
    3162             :          * want to reduce that to a min and max value, because it doesn't seem
    3163             :          * worth complicating regexec.c to deal with nonconsecutive possible
    3164             :          * match lengths.  Find min and max of first run of lengths, then
    3165             :          * verify there are no nonconsecutive lengths.
    3166             :          */
    3167        3966 :         for (minmatch = 0; minmatch <= DUPINF + 1; minmatch++)
    3168             :         {
    3169        3966 :             if (haspath[minmatch])
    3170         964 :                 break;
    3171             :         }
    3172             :         assert(minmatch <= DUPINF + 1); /* else checkmatchall_recurse lied */
    3173       63566 :         for (maxmatch = minmatch; maxmatch < DUPINF + 1; maxmatch++)
    3174             :         {
    3175       63326 :             if (!haspath[maxmatch + 1])
    3176         724 :                 break;
    3177             :         }
    3178      180060 :         for (morematch = maxmatch + 1; morematch <= DUPINF + 1; morematch++)
    3179             :         {
    3180      179108 :             if (haspath[morematch])
    3181             :             {
    3182          12 :                 haspath = NULL; /* fail, there are nonconsecutive lengths */
    3183          12 :                 break;
    3184             :             }
    3185             :         }
    3186             : 
    3187         964 :         if (haspath != NULL)
    3188             :         {
    3189             :             /*
    3190             :              * Success, so record the info.  Here we have a fine point: the
    3191             :              * path length from the pre state includes the pre-to-initial
    3192             :              * transition, so it's one more than the actually matched string
    3193             :              * length.  (We avoided counting the final-to-post transition
    3194             :              * within checkmatchall_recurse, but not this one.)  This is why
    3195             :              * checkmatchall_recurse allows one more level of path length than
    3196             :              * might seem necessary.  This decrement also takes care of
    3197             :              * converting checkmatchall_recurse's definition of "infinity" as
    3198             :              * "DUPINF+1" to our normal representation as "DUPINF".
    3199             :              */
    3200             :             assert(minmatch > 0);    /* else pre and post states were adjacent */
    3201         952 :             nfa->minmatchall = minmatch - 1;
    3202         952 :             nfa->maxmatchall = maxmatch - 1;
    3203         952 :             nfa->flags |= MATCHALL;
    3204             :         }
    3205             :     }
    3206             : 
    3207             :     /* Clean up */
    3208       10396 :     for (i = 0; i < nfa->nstates; i++)
    3209             :     {
    3210        9408 :         if (haspaths[i] != NULL)
    3211        8420 :             FREE(haspaths[i]);
    3212             :     }
    3213         988 :     FREE(haspaths);
    3214             : }
    3215             : 
    3216             : /*
    3217             :  * checkmatchall_recurse - recursive search for checkmatchall
    3218             :  *
    3219             :  * s is the state to be examined in this recursion level.
    3220             :  * haspaths[] is an array of per-state exit path length arrays.
    3221             :  *
    3222             :  * We return true if the search was performed successfully, false if
    3223             :  * we had to fail because of multi-state loops or other internal reasons.
    3224             :  * (Because "dead" states that can't reach the post state have been
    3225             :  * eliminated, and we already verified that only RAINBOW and matching
    3226             :  * pseudocolor arcs exist, every state should have RAINBOW path(s) to
    3227             :  * the post state.  Hence we take a false result from recursive calls
    3228             :  * as meaning that we'd better fail altogether, not just that that
    3229             :  * particular state can't reach the post state.)
    3230             :  *
    3231             :  * On success, we store a malloc'd result array in haspaths[s->no],
    3232             :  * showing the possible path lengths from s to the post state.
    3233             :  * Each state's haspath[] array is of length DUPINF+2.  The entries from
    3234             :  * k = 0 to DUPINF are true if there is an all-RAINBOW path of length k
    3235             :  * from this state to the string end.  haspath[DUPINF+1] is true if all
    3236             :  * path lengths >= DUPINF+1 are possible.  (Situations that cannot be
    3237             :  * represented under these rules cause failure.)
    3238             :  *
    3239             :  * checkmatchall is responsible for eventually freeing the haspath[] arrays.
    3240             :  */
    3241             : static bool
    3242        8420 : checkmatchall_recurse(struct nfa *nfa, struct state *s, bool **haspaths)
    3243             : {
    3244        8420 :     bool        result = false;
    3245        8420 :     bool        foundloop = false;
    3246             :     bool       *haspath;
    3247             :     struct arc *a;
    3248             : 
    3249             :     /*
    3250             :      * Since this is recursive, it could be driven to stack overflow.  But we
    3251             :      * need not treat that as a hard failure; just deem the NFA non-matchall.
    3252             :      */
    3253        8420 :     if (STACK_TOO_DEEP(nfa->v->re))
    3254           0 :         return false;
    3255             : 
    3256             :     /* In case the search takes a long time, check for cancel */
    3257        8420 :     INTERRUPT(nfa->v->re);
    3258             : 
    3259             :     /* Create a haspath array for this state */
    3260        8420 :     haspath = (bool *) MALLOC((DUPINF + 2) * sizeof(bool));
    3261        8420 :     if (haspath == NULL)
    3262           0 :         return false;           /* again, treat as non-matchall */
    3263        8420 :     memset(haspath, 0, (DUPINF + 2) * sizeof(bool));
    3264             : 
    3265             :     /* Mark this state as being visited */
    3266             :     assert(s->tmp == NULL);
    3267        8420 :     s->tmp = s;
    3268             : 
    3269       82884 :     for (a = s->outs; a != NULL; a = a->outchain)
    3270             :     {
    3271       74560 :         if (a->co != RAINBOW)
    3272        6420 :             continue;           /* ignore pseudocolor arcs */
    3273       68140 :         if (a->to == nfa->post)
    3274             :         {
    3275             :             /* We found an all-RAINBOW path to the post state */
    3276         976 :             result = true;
    3277             : 
    3278             :             /*
    3279             :              * Mark this state as being zero steps away from the string end
    3280             :              * (the transition to the post state isn't counted).
    3281             :              */
    3282         976 :             haspath[0] = true;
    3283             :         }
    3284       67164 :         else if (a->to == s)
    3285             :         {
    3286             :             /* We found a cycle of length 1, which we'll deal with below. */
    3287         246 :             foundloop = true;
    3288             :         }
    3289       66918 :         else if (a->to->tmp != NULL)
    3290             :         {
    3291             :             /* It's busy, so we found a cycle of length > 1, so fail. */
    3292          12 :             result = false;
    3293          12 :             break;
    3294             :         }
    3295             :         else
    3296             :         {
    3297             :             /* Consider paths forward through this to-state. */
    3298             :             bool       *nexthaspath;
    3299             :             int         i;
    3300             : 
    3301             :             /* If to-state was not already visited, recurse */
    3302       66906 :             if (haspaths[a->to->no] == NULL)
    3303             :             {
    3304        7432 :                 result = checkmatchall_recurse(nfa, a->to, haspaths);
    3305             :                 /* Fail if any recursive path fails */
    3306        7432 :                 if (!result)
    3307          72 :                     break;
    3308             :             }
    3309             :             else
    3310             :             {
    3311             :                 /* The previous visit must have found path(s) to the end */
    3312       59474 :                 result = true;
    3313             :             }
    3314             :             assert(a->to->tmp == NULL);
    3315       66834 :             nexthaspath = haspaths[a->to->no];
    3316             :             assert(nexthaspath != NULL);
    3317             : 
    3318             :             /*
    3319             :              * Now, for every path of length i from a->to to the string end,
    3320             :              * there is a path of length i + 1 from s to the string end.
    3321             :              */
    3322       66834 :             if (nexthaspath[DUPINF] != nexthaspath[DUPINF + 1])
    3323             :             {
    3324             :                 /*
    3325             :                  * a->to has a path of length exactly DUPINF, but not longer;
    3326             :                  * or it has paths of all lengths > DUPINF but not one of
    3327             :                  * exactly that length.  In either case, we cannot represent
    3328             :                  * the possible path lengths from s correctly, so fail.
    3329             :                  */
    3330          12 :                 result = false;
    3331          12 :                 break;
    3332             :             }
    3333             :             /* Merge knowledge of these path lengths into what we have */
    3334    17173254 :             for (i = 0; i < DUPINF; i++)
    3335    17106432 :                 haspath[i + 1] |= nexthaspath[i];
    3336             :             /* Infinity + 1 is still infinity */
    3337       66822 :             haspath[DUPINF + 1] |= nexthaspath[DUPINF + 1];
    3338             :         }
    3339             :     }
    3340             : 
    3341        8420 :     if (result && foundloop)
    3342             :     {
    3343             :         /*
    3344             :          * If there is a length-1 loop at this state, then find the shortest
    3345             :          * known path length to the end.  The loop means that every larger
    3346             :          * path length is possible, too.  (It doesn't matter whether any of
    3347             :          * the longer lengths were already known possible.)
    3348             :          */
    3349             :         int         i;
    3350             : 
    3351         312 :         for (i = 0; i <= DUPINF; i++)
    3352             :         {
    3353         312 :             if (haspath[i])
    3354         246 :                 break;
    3355             :         }
    3356       63402 :         for (i++; i <= DUPINF + 1; i++)
    3357       63156 :             haspath[i] = true;
    3358             :     }
    3359             : 
    3360             :     /* Report out the completed path length map */
    3361             :     assert(s->no < nfa->nstates);
    3362             :     assert(haspaths[s->no] == NULL);
    3363        8420 :     haspaths[s->no] = haspath;
    3364             : 
    3365             :     /* Mark state no longer busy */
    3366        8420 :     s->tmp = NULL;
    3367             : 
    3368        8420 :     return result;
    3369             : }
    3370             : 
    3371             : /*
    3372             :  * check_out_colors_match - subroutine for checkmatchall
    3373             :  *
    3374             :  * Check whether the set of states reachable from s by arcs of color co1
    3375             :  * is equivalent to the set reachable by arcs of color co2.
    3376             :  * checkmatchall already verified that all of the NFA's arcs are PLAIN,
    3377             :  * so we need not examine arc types here.
    3378             :  */
    3379             : static bool
    3380        3086 : check_out_colors_match(struct state *s, color co1, color co2)
    3381             : {
    3382        3086 :     bool        result = true;
    3383             :     struct arc *a;
    3384             : 
    3385             :     /*
    3386             :      * To do this in linear time, we assume that the NFA contains no duplicate
    3387             :      * arcs.  Run through the out-arcs, marking states reachable by arcs of
    3388             :      * color co1.  Run through again, un-marking states reachable by arcs of
    3389             :      * color co2; if we see a not-marked state, we know this co2 arc is
    3390             :      * unmatched.  Then run through again, checking for still-marked states,
    3391             :      * and in any case leaving all the tmp fields reset to NULL.
    3392             :      */
    3393       18906 :     for (a = s->outs; a != NULL; a = a->outchain)
    3394             :     {
    3395       15820 :         if (a->co == co1)
    3396             :         {
    3397             :             assert(a->to->tmp == NULL);
    3398        5028 :             a->to->tmp = a->to;
    3399             :         }
    3400             :     }
    3401       18906 :     for (a = s->outs; a != NULL; a = a->outchain)
    3402             :     {
    3403       15820 :         if (a->co == co2)
    3404             :         {
    3405        5396 :             if (a->to->tmp != NULL)
    3406        5024 :                 a->to->tmp = NULL;
    3407             :             else
    3408         372 :                 result = false; /* unmatched co2 arc */
    3409             :         }
    3410             :     }
    3411       18906 :     for (a = s->outs; a != NULL; a = a->outchain)
    3412             :     {
    3413       15820 :         if (a->co == co1)
    3414             :         {
    3415        5028 :             if (a->to->tmp != NULL)
    3416             :             {
    3417           4 :                 result = false; /* unmatched co1 arc */
    3418           4 :                 a->to->tmp = NULL;
    3419             :             }
    3420             :         }
    3421             :     }
    3422        3086 :     return result;
    3423             : }
    3424             : 
    3425             : /*
    3426             :  * check_in_colors_match - subroutine for checkmatchall
    3427             :  *
    3428             :  * Check whether the set of states that can reach s by arcs of color co1
    3429             :  * is equivalent to the set that can reach s by arcs of color co2.
    3430             :  * checkmatchall already verified that all of the NFA's arcs are PLAIN,
    3431             :  * so we need not examine arc types here.
    3432             :  */
    3433             : static bool
    3434        2400 : check_in_colors_match(struct state *s, color co1, color co2)
    3435             : {
    3436        2400 :     bool        result = true;
    3437             :     struct arc *a;
    3438             : 
    3439             :     /*
    3440             :      * Identical algorithm to check_out_colors_match, except examine the
    3441             :      * from-states of s' inarcs.
    3442             :      */
    3443        8876 :     for (a = s->ins; a != NULL; a = a->inchain)
    3444             :     {
    3445        6476 :         if (a->co == co1)
    3446             :         {
    3447             :             assert(a->from->tmp == NULL);
    3448        2008 :             a->from->tmp = a->from;
    3449             :         }
    3450             :     }
    3451        8876 :     for (a = s->ins; a != NULL; a = a->inchain)
    3452             :     {
    3453        6476 :         if (a->co == co2)
    3454             :         {
    3455        2234 :             if (a->from->tmp != NULL)
    3456        2004 :                 a->from->tmp = NULL;
    3457             :             else
    3458         230 :                 result = false; /* unmatched co2 arc */
    3459             :         }
    3460             :     }
    3461        8876 :     for (a = s->ins; a != NULL; a = a->inchain)
    3462             :     {
    3463        6476 :         if (a->co == co1)
    3464             :         {
    3465        2008 :             if (a->from->tmp != NULL)
    3466             :             {
    3467           4 :                 result = false; /* unmatched co1 arc */
    3468           4 :                 a->from->tmp = NULL;
    3469             :             }
    3470             :         }
    3471             :     }
    3472        2400 :     return result;
    3473             : }
    3474             : 
    3475             : /*
    3476             :  * compact - construct the compact representation of an NFA
    3477             :  */
    3478             : static void
    3479       17698 : compact(struct nfa *nfa,
    3480             :         struct cnfa *cnfa)
    3481             : {
    3482             :     struct state *s;
    3483             :     struct arc *a;
    3484             :     size_t      nstates;
    3485             :     size_t      narcs;
    3486             :     struct carc *ca;
    3487             :     struct carc *first;
    3488             : 
    3489             :     assert(!NISERR());
    3490             : 
    3491       17698 :     nstates = 0;
    3492       17698 :     narcs = 0;
    3493      236016 :     for (s = nfa->states; s != NULL; s = s->next)
    3494             :     {
    3495      218318 :         nstates++;
    3496      218318 :         narcs += s->nouts + 1;   /* need one extra for endmarker */
    3497             :     }
    3498             : 
    3499       17698 :     cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
    3500       17698 :     cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
    3501       17698 :     cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
    3502       17698 :     if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
    3503             :     {
    3504           0 :         if (cnfa->stflags != NULL)
    3505           0 :             FREE(cnfa->stflags);
    3506           0 :         if (cnfa->states != NULL)
    3507           0 :             FREE(cnfa->states);
    3508           0 :         if (cnfa->arcs != NULL)
    3509           0 :             FREE(cnfa->arcs);
    3510           0 :         NERR(REG_ESPACE);
    3511           0 :         return;
    3512             :     }
    3513       17698 :     cnfa->nstates = nstates;
    3514       17698 :     cnfa->pre = nfa->pre->no;
    3515       17698 :     cnfa->post = nfa->post->no;
    3516       17698 :     cnfa->bos[0] = nfa->bos[0];
    3517       17698 :     cnfa->bos[1] = nfa->bos[1];
    3518       17698 :     cnfa->eos[0] = nfa->eos[0];
    3519       17698 :     cnfa->eos[1] = nfa->eos[1];
    3520       17698 :     cnfa->ncolors = maxcolor(nfa->cm) + 1;
    3521       17698 :     cnfa->flags = nfa->flags;
    3522       17698 :     cnfa->minmatchall = nfa->minmatchall;
    3523       17698 :     cnfa->maxmatchall = nfa->maxmatchall;
    3524             : 
    3525       17698 :     ca = cnfa->arcs;
    3526      236016 :     for (s = nfa->states; s != NULL; s = s->next)
    3527             :     {
    3528             :         assert((size_t) s->no < nstates);
    3529      218318 :         cnfa->stflags[s->no] = 0;
    3530      218318 :         cnfa->states[s->no] = ca;
    3531      218318 :         first = ca;
    3532     1740016 :         for (a = s->outs; a != NULL; a = a->outchain)
    3533     1521698 :             switch (a->type)
    3534             :             {
    3535     1521500 :                 case PLAIN:
    3536     1521500 :                     ca->co = a->co;
    3537     1521500 :                     ca->to = a->to->no;
    3538     1521500 :                     ca++;
    3539     1521500 :                     break;
    3540         198 :                 case LACON:
    3541             :                     assert(s->no != cnfa->pre);
    3542             :                     assert(a->co >= 0);
    3543         198 :                     ca->co = (color) (cnfa->ncolors + a->co);
    3544         198 :                     ca->to = a->to->no;
    3545         198 :                     ca++;
    3546         198 :                     cnfa->flags |= HASLACONS;
    3547         198 :                     break;
    3548           0 :                 default:
    3549           0 :                     NERR(REG_ASSERT);
    3550           0 :                     return;
    3551             :             }
    3552      218318 :         carcsort(first, ca - first);
    3553      218318 :         ca->co = COLORLESS;
    3554      218318 :         ca->to = 0;
    3555      218318 :         ca++;
    3556             :     }
    3557             :     assert(ca == &cnfa->arcs[narcs]);
    3558             :     assert(cnfa->nstates != 0);
    3559             : 
    3560             :     /* mark no-progress states */
    3561       76260 :     for (a = nfa->pre->outs; a != NULL; a = a->outchain)
    3562       58562 :         cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
    3563       17698 :     cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
    3564             : }
    3565             : 
    3566             : /*
    3567             :  * carcsort - sort compacted-NFA arcs by color
    3568             :  */
    3569             : static void
    3570      218318 : carcsort(struct carc *first, size_t n)
    3571             : {
    3572      218318 :     if (n > 1)
    3573       43792 :         qsort(first, n, sizeof(struct carc), carc_cmp);
    3574      218318 : }
    3575             : 
    3576             : static int
    3577    15440116 : carc_cmp(const void *a, const void *b)
    3578             : {
    3579    15440116 :     const struct carc *aa = (const struct carc *) a;
    3580    15440116 :     const struct carc *bb = (const struct carc *) b;
    3581             : 
    3582    15440116 :     if (aa->co < bb->co)
    3583      136598 :         return -1;
    3584    15303518 :     if (aa->co > bb->co)
    3585      193774 :         return +1;
    3586    15109744 :     if (aa->to < bb->to)
    3587    10362664 :         return -1;
    3588     4747080 :     if (aa->to > bb->to)
    3589     4747080 :         return +1;
    3590             :     /* This is unreached, since there should be no duplicate arcs now: */
    3591           0 :     return 0;
    3592             : }
    3593             : 
    3594             : /*
    3595             :  * freecnfa - free a compacted NFA
    3596             :  */
    3597             : static void
    3598        4288 : freecnfa(struct cnfa *cnfa)
    3599             : {
    3600             :     assert(!NULLCNFA(*cnfa));   /* not empty already */
    3601        4288 :     FREE(cnfa->stflags);
    3602        4288 :     FREE(cnfa->states);
    3603        4288 :     FREE(cnfa->arcs);
    3604        4288 :     ZAPCNFA(*cnfa);
    3605        4288 : }
    3606             : 
    3607             : /*
    3608             :  * dumpnfa - dump an NFA in human-readable form
    3609             :  */
    3610             : static void
    3611           0 : dumpnfa(struct nfa *nfa,
    3612             :         FILE *f)
    3613             : {
    3614             : #ifdef REG_DEBUG
    3615             :     struct state *s;
    3616             :     int         nstates = 0;
    3617             :     int         narcs = 0;
    3618             : 
    3619             :     fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
    3620             :     if (nfa->bos[0] != COLORLESS)
    3621             :         fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
    3622             :     if (nfa->bos[1] != COLORLESS)
    3623             :         fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
    3624             :     if (nfa->eos[0] != COLORLESS)
    3625             :         fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
    3626             :     if (nfa->eos[1] != COLORLESS)
    3627             :         fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
    3628             :     if (nfa->flags & HASLACONS)
    3629             :         fprintf(f, ", haslacons");
    3630             :     if (nfa->flags & MATCHALL)
    3631             :     {
    3632             :         fprintf(f, ", minmatchall %d", nfa->minmatchall);
    3633             :         if (nfa->maxmatchall == DUPINF)
    3634             :             fprintf(f, ", maxmatchall inf");
    3635             :         else
    3636             :             fprintf(f, ", maxmatchall %d", nfa->maxmatchall);
    3637             :     }
    3638             :     fprintf(f, "\n");
    3639             :     for (s = nfa->states; s != NULL; s = s->next)
    3640             :     {
    3641             :         dumpstate(s, f);
    3642             :         nstates++;
    3643             :         narcs += s->nouts;
    3644             :     }
    3645             :     fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
    3646             :     if (nfa->parent == NULL)
    3647             :         dumpcolors(nfa->cm, f);
    3648             :     fflush(f);
    3649             : #endif
    3650           0 : }
    3651             : 
    3652             : #ifdef REG_DEBUG                /* subordinates of dumpnfa */
    3653             : 
    3654             : /*
    3655             :  * dumpstate - dump an NFA state in human-readable form
    3656             :  */
    3657             : static void
    3658             : dumpstate(struct state *s,
    3659             :           FILE *f)
    3660             : {
    3661             :     struct arc *a;
    3662             : 
    3663             :     fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
    3664             :             (s->flag) ? s->flag : '.');
    3665             :     if (s->prev != NULL && s->prev->next != s)
    3666             :         fprintf(f, "\tstate chain bad\n");
    3667             :     if (s->nouts == 0)
    3668             :         fprintf(f, "\tno out arcs\n");
    3669             :     else
    3670             :         dumparcs(s, f);
    3671             :     for (a = s->ins; a != NULL; a = a->inchain)
    3672             :     {
    3673             :         if (a->to != s)
    3674             :             fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
    3675             :                     a->from->no, a->to->no, s->no);
    3676             :     }
    3677             :     fflush(f);
    3678             : }
    3679             : 
    3680             : /*
    3681             :  * dumparcs - dump out-arcs in human-readable form
    3682             :  */
    3683             : static void
    3684             : dumparcs(struct state *s,
    3685             :          FILE *f)
    3686             : {
    3687             :     int         pos;
    3688             :     struct arc *a;
    3689             : 
    3690             :     /* printing oldest arcs first is usually clearer */
    3691             :     a = s->outs;
    3692             :     assert(a != NULL);
    3693             :     while (a->outchain != NULL)
    3694             :         a = a->outchain;
    3695             :     pos = 1;
    3696             :     do
    3697             :     {
    3698             :         dumparc(a, s, f);
    3699             :         if (pos == 5)
    3700             :         {
    3701             :             fprintf(f, "\n");
    3702             :             pos = 1;
    3703             :         }
    3704             :         else
    3705             :             pos++;
    3706             :         a = a->outchainRev;
    3707             :     } while (a != NULL);
    3708             :     if (pos != 1)
    3709             :         fprintf(f, "\n");
    3710             : }
    3711             : 
    3712             : /*
    3713             :  * dumparc - dump one outarc in readable form, including prefixing tab
    3714             :  */
    3715             : static void
    3716             : dumparc(struct arc *a,
    3717             :         struct state *s,
    3718             :         FILE *f)
    3719             : {
    3720             :     struct arc *aa;
    3721             : 
    3722             :     fprintf(f, "\t");
    3723             :     switch (a->type)
    3724             :     {
    3725             :         case PLAIN:
    3726             :             if (a->co == RAINBOW)
    3727             :                 fprintf(f, "[*]");
    3728             :             else
    3729             :                 fprintf(f, "[%ld]", (long) a->co);
    3730             :             break;
    3731             :         case AHEAD:
    3732             :             if (a->co == RAINBOW)
    3733             :                 fprintf(f, ">*>");
    3734             :             else
    3735             :                 fprintf(f, ">%ld>", (long) a->co);
    3736             :             break;
    3737             :         case BEHIND:
    3738             :             if (a->co == RAINBOW)
    3739             :                 fprintf(f, "<*<");
    3740             :             else
    3741             :                 fprintf(f, "<%ld<", (long) a->co);
    3742             :             break;
    3743             :         case LACON:
    3744             :             fprintf(f, ":%ld:", (long) a->co);
    3745             :             break;
    3746             :         case '^':
    3747             :         case '$':
    3748             :             fprintf(f, "%c%d", a->type, (int) a->co);
    3749             :             break;
    3750             :         case EMPTY:
    3751             :             break;
    3752             :         default:
    3753             :             fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
    3754             :             break;
    3755             :     }
    3756             :     if (a->from != s)
    3757             :         fprintf(f, "?%d?", a->from->no);
    3758             :     for (aa = a->from->outs; aa != NULL; aa = aa->outchain)
    3759             :         if (aa == a)
    3760             :             break;              /* NOTE BREAK OUT */
    3761             :     if (aa == NULL)
    3762             :         fprintf(f, "?!?");        /* missing from out-chain */
    3763             :     fprintf(f, "->");
    3764             :     if (a->to == NULL)
    3765             :     {
    3766             :         fprintf(f, "NULL");
    3767             :         return;
    3768             :     }
    3769             :     fprintf(f, "%d", a->to->no);
    3770             :     for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
    3771             :         if (aa == a)
    3772             :             break;              /* NOTE BREAK OUT */
    3773             :     if (aa == NULL)
    3774             :         fprintf(f, "?!?");        /* missing from in-chain */
    3775             : }
    3776             : #endif                          /* REG_DEBUG */
    3777             : 
    3778             : /*
    3779             :  * dumpcnfa - dump a compacted NFA in human-readable form
    3780             :  */
    3781             : #ifdef REG_DEBUG
    3782             : static void
    3783             : dumpcnfa(struct cnfa *cnfa,
    3784             :          FILE *f)
    3785             : {
    3786             :     int         st;
    3787             : 
    3788             :     fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
    3789             :     if (cnfa->bos[0] != COLORLESS)
    3790             :         fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
    3791             :     if (cnfa->bos[1] != COLORLESS)
    3792             :         fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
    3793             :     if (cnfa->eos[0] != COLORLESS)
    3794             :         fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
    3795             :     if (cnfa->eos[1] != COLORLESS)
    3796             :         fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
    3797             :     if (cnfa->flags & HASLACONS)
    3798             :         fprintf(f, ", haslacons");
    3799             :     if (cnfa->flags & MATCHALL)
    3800             :     {
    3801             :         fprintf(f, ", minmatchall %d", cnfa->minmatchall);
    3802             :         if (cnfa->maxmatchall == DUPINF)
    3803             :             fprintf(f, ", maxmatchall inf");
    3804             :         else
    3805             :             fprintf(f, ", maxmatchall %d", cnfa->maxmatchall);
    3806             :     }
    3807             :     fprintf(f, "\n");
    3808             :     for (st = 0; st < cnfa->nstates; st++)
    3809             :         dumpcstate(st, cnfa, f);
    3810             :     fflush(f);
    3811             : }
    3812             : #endif
    3813             : 
    3814             : #ifdef REG_DEBUG                /* subordinates of dumpcnfa */
    3815             : 
    3816             : /*
    3817             :  * dumpcstate - dump a compacted-NFA state in human-readable form
    3818             :  */
    3819             : static void
    3820             : dumpcstate(int st,
    3821             :            struct cnfa *cnfa,
    3822             :            FILE *f)
    3823             : {
    3824             :     struct carc *ca;
    3825             :     int         pos;
    3826             : 
    3827             :     fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
    3828             :     pos = 1;
    3829             :     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
    3830             :     {
    3831             :         if (ca->co == RAINBOW)
    3832             :             fprintf(f, "\t[*]->%d", ca->to);
    3833             :         else if (ca->co < cnfa->ncolors)
    3834             :             fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
    3835             :         else
    3836             :             fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
    3837             :         if (pos == 5)
    3838             :         {
    3839             :             fprintf(f, "\n");
    3840             :             pos = 1;
    3841             :         }
    3842             :         else
    3843             :             pos++;
    3844             :     }
    3845             :     if (ca == cnfa->states[st] || pos != 1)
    3846             :         fprintf(f, "\n");
    3847             :     fflush(f);
    3848             : }
    3849             : 
    3850             : #endif                          /* REG_DEBUG */

Generated by: LCOV version 1.14