LCOV - code coverage report
Current view: top level - src/backend/regex - regexport.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 57 65 87.7 %
Date: 2019-11-21 14:06:36 Functions: 10 11 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * regexport.c
       4             :  *    Functions for exporting info about a regex's NFA
       5             :  *
       6             :  * In this implementation, the NFA defines a necessary but not sufficient
       7             :  * condition for a string to match the regex: that is, there can be strings
       8             :  * that match the NFA but don't match the full regex, but not vice versa.
       9             :  * Thus, for example, it is okay for the functions below to treat lookaround
      10             :  * constraints as no-ops, since they merely constrain the string some more.
      11             :  *
      12             :  * Notice that these functions return info into caller-provided arrays
      13             :  * rather than doing their own malloc's.  This simplifies the APIs by
      14             :  * eliminating a class of error conditions, and in the case of colors
      15             :  * allows the caller to decide how big is too big to bother with.
      16             :  *
      17             :  *
      18             :  * Portions Copyright (c) 2013-2019, PostgreSQL Global Development Group
      19             :  * Portions Copyright (c) 1998, 1999 Henry Spencer
      20             :  *
      21             :  * IDENTIFICATION
      22             :  *    src/backend/regex/regexport.c
      23             :  *
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : 
      27             : #include "regex/regguts.h"
      28             : 
      29             : #include "regex/regexport.h"
      30             : 
      31             : 
      32             : /*
      33             :  * Get total number of NFA states.
      34             :  */
      35             : int
      36           0 : pg_reg_getnumstates(const regex_t *regex)
      37             : {
      38             :     struct cnfa *cnfa;
      39             : 
      40             :     assert(regex != NULL && regex->re_magic == REMAGIC);
      41           0 :     cnfa = &((struct guts *) regex->re_guts)->search;
      42             : 
      43           0 :     return cnfa->nstates;
      44             : }
      45             : 
      46             : /*
      47             :  * Get initial state of NFA.
      48             :  */
      49             : int
      50         124 : pg_reg_getinitialstate(const regex_t *regex)
      51             : {
      52             :     struct cnfa *cnfa;
      53             : 
      54             :     assert(regex != NULL && regex->re_magic == REMAGIC);
      55         124 :     cnfa = &((struct guts *) regex->re_guts)->search;
      56             : 
      57         124 :     return cnfa->pre;
      58             : }
      59             : 
      60             : /*
      61             :  * Get final state of NFA.
      62             :  */
      63             : int
      64        1902 : pg_reg_getfinalstate(const regex_t *regex)
      65             : {
      66             :     struct cnfa *cnfa;
      67             : 
      68             :     assert(regex != NULL && regex->re_magic == REMAGIC);
      69        1902 :     cnfa = &((struct guts *) regex->re_guts)->search;
      70             : 
      71        1902 :     return cnfa->post;
      72             : }
      73             : 
      74             : /*
      75             :  * pg_reg_getnumoutarcs() and pg_reg_getoutarcs() mask the existence of LACON
      76             :  * arcs from the caller, treating any LACON as being automatically satisfied.
      77             :  * Since the output representation does not support arcs that consume no
      78             :  * character when traversed, we have to recursively traverse LACON arcs here,
      79             :  * and report whatever normal arcs are reachable by traversing LACON arcs.
      80             :  * Note that this wouldn't work if it were possible to reach the final state
      81             :  * via LACON traversal, but the regex library never builds NFAs that have
      82             :  * LACON arcs leading directly to the final state.  (This is because the
      83             :  * regex executor is designed to consume one character beyond the nominal
      84             :  * match end --- possibly an EOS indicator --- so there is always a set of
      85             :  * ordinary arcs leading to the final state.)
      86             :  *
      87             :  * traverse_lacons is a recursive subroutine used by both exported functions
      88             :  * to count and then emit the reachable regular arcs.  *arcs_count is
      89             :  * incremented by the number of reachable arcs, and as many as will fit in
      90             :  * arcs_len (possibly 0) are emitted into arcs[].
      91             :  */
      92             : static void
      93        6504 : traverse_lacons(struct cnfa *cnfa, int st,
      94             :                 int *arcs_count,
      95             :                 regex_arc_t *arcs, int arcs_len)
      96             : {
      97             :     struct carc *ca;
      98             : 
      99             :     /*
     100             :      * Since this function recurses, it could theoretically be driven to stack
     101             :      * overflow.  In practice, this is mostly useful to backstop against a
     102             :      * failure of the regex compiler to remove a loop of LACON arcs.
     103             :      */
     104        6504 :     check_stack_depth();
     105             : 
     106       21212 :     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
     107             :     {
     108       14708 :         if (ca->co < cnfa->ncolors)
     109             :         {
     110             :             /* Ordinary arc, so count and possibly emit it */
     111       14696 :             int         ndx = (*arcs_count)++;
     112             : 
     113       14696 :             if (ndx < arcs_len)
     114             :             {
     115        7348 :                 arcs[ndx].co = ca->co;
     116        7348 :                 arcs[ndx].to = ca->to;
     117             :             }
     118             :         }
     119             :         else
     120             :         {
     121             :             /* LACON arc --- assume it's satisfied and recurse... */
     122             :             /* ... but first, assert it doesn't lead directly to post state */
     123             :             Assert(ca->to != cnfa->post);
     124             : 
     125          12 :             traverse_lacons(cnfa, ca->to, arcs_count, arcs, arcs_len);
     126             :         }
     127             :     }
     128        6504 : }
     129             : 
     130             : /*
     131             :  * Get number of outgoing NFA arcs of state number "st".
     132             :  */
     133             : int
     134        3246 : pg_reg_getnumoutarcs(const regex_t *regex, int st)
     135             : {
     136             :     struct cnfa *cnfa;
     137             :     int         arcs_count;
     138             : 
     139             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     140        3246 :     cnfa = &((struct guts *) regex->re_guts)->search;
     141             : 
     142        3246 :     if (st < 0 || st >= cnfa->nstates)
     143           0 :         return 0;
     144        3246 :     arcs_count = 0;
     145        3246 :     traverse_lacons(cnfa, st, &arcs_count, NULL, 0);
     146        3246 :     return arcs_count;
     147             : }
     148             : 
     149             : /*
     150             :  * Write array of outgoing NFA arcs of state number "st" into arcs[],
     151             :  * whose length arcs_len must be at least as long as indicated by
     152             :  * pg_reg_getnumoutarcs(), else not all arcs will be returned.
     153             :  */
     154             : void
     155        3246 : pg_reg_getoutarcs(const regex_t *regex, int st,
     156             :                   regex_arc_t *arcs, int arcs_len)
     157             : {
     158             :     struct cnfa *cnfa;
     159             :     int         arcs_count;
     160             : 
     161             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     162        3246 :     cnfa = &((struct guts *) regex->re_guts)->search;
     163             : 
     164        3246 :     if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
     165           0 :         return;
     166        3246 :     arcs_count = 0;
     167        3246 :     traverse_lacons(cnfa, st, &arcs_count, arcs, arcs_len);
     168             : }
     169             : 
     170             : /*
     171             :  * Get total number of colors.
     172             :  */
     173             : int
     174         124 : pg_reg_getnumcolors(const regex_t *regex)
     175             : {
     176             :     struct colormap *cm;
     177             : 
     178             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     179         124 :     cm = &((struct guts *) regex->re_guts)->cmap;
     180             : 
     181         124 :     return cm->max + 1;
     182             : }
     183             : 
     184             : /*
     185             :  * Check if color is beginning of line/string.
     186             :  *
     187             :  * (We might at some point need to offer more refined handling of pseudocolors,
     188             :  * but this will do for now.)
     189             :  */
     190             : int
     191        4302 : pg_reg_colorisbegin(const regex_t *regex, int co)
     192             : {
     193             :     struct cnfa *cnfa;
     194             : 
     195             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     196        4302 :     cnfa = &((struct guts *) regex->re_guts)->search;
     197             : 
     198        4302 :     if (co == cnfa->bos[0] || co == cnfa->bos[1])
     199         484 :         return true;
     200             :     else
     201        3818 :         return false;
     202             : }
     203             : 
     204             : /*
     205             :  * Check if color is end of line/string.
     206             :  */
     207             : int
     208        3818 : pg_reg_colorisend(const regex_t *regex, int co)
     209             : {
     210             :     struct cnfa *cnfa;
     211             : 
     212             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     213        3818 :     cnfa = &((struct guts *) regex->re_guts)->search;
     214             : 
     215        3818 :     if (co == cnfa->eos[0] || co == cnfa->eos[1])
     216         320 :         return true;
     217             :     else
     218        3498 :         return false;
     219             : }
     220             : 
     221             : /*
     222             :  * Get number of member chrs of color number "co".
     223             :  *
     224             :  * Note: we return -1 if the color number is invalid, or if it is a special
     225             :  * color (WHITE or a pseudocolor), or if the number of members is uncertain.
     226             :  * Callers should not try to extract the members if -1 is returned.
     227             :  */
     228             : int
     229        1024 : pg_reg_getnumcharacters(const regex_t *regex, int co)
     230             : {
     231             :     struct colormap *cm;
     232             : 
     233             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     234        1024 :     cm = &((struct guts *) regex->re_guts)->cmap;
     235             : 
     236        1024 :     if (co <= 0 || co > cm->max)   /* we reject 0 which is WHITE */
     237         124 :         return -1;
     238         900 :     if (cm->cd[co].flags & PSEUDO)   /* also pseudocolors (BOS etc) */
     239         496 :         return -1;
     240             : 
     241             :     /*
     242             :      * If the color appears anywhere in the high colormap, treat its number of
     243             :      * members as uncertain.  In principle we could determine all the specific
     244             :      * chrs corresponding to each such entry, but it would be expensive
     245             :      * (particularly if character class tests are required) and it doesn't
     246             :      * seem worth it.
     247             :      */
     248         404 :     if (cm->cd[co].nuchrs != 0)
     249           0 :         return -1;
     250             : 
     251             :     /* OK, return the known number of member chrs */
     252         404 :     return cm->cd[co].nschrs;
     253             : }
     254             : 
     255             : /*
     256             :  * Write array of member chrs of color number "co" into chars[],
     257             :  * whose length chars_len must be at least as long as indicated by
     258             :  * pg_reg_getnumcharacters(), else not all chars will be returned.
     259             :  *
     260             :  * Fetching the members of WHITE or a pseudocolor is not supported.
     261             :  *
     262             :  * Caution: this is a relatively expensive operation.
     263             :  */
     264             : void
     265         404 : pg_reg_getcharacters(const regex_t *regex, int co,
     266             :                      pg_wchar *chars, int chars_len)
     267             : {
     268             :     struct colormap *cm;
     269             :     chr         c;
     270             : 
     271             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     272         404 :     cm = &((struct guts *) regex->re_guts)->cmap;
     273             : 
     274         404 :     if (co <= 0 || co > cm->max || chars_len <= 0)
     275           0 :         return;
     276         404 :     if (cm->cd[co].flags & PSEUDO)
     277           0 :         return;
     278             : 
     279             :     /*
     280             :      * We need only examine the low character map; there should not be any
     281             :      * matching entries in the high map.
     282             :      */
     283       39596 :     for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
     284             :     {
     285       39596 :         if (cm->locolormap[c - CHR_MIN] == co)
     286             :         {
     287        1536 :             *chars++ = c;
     288        1536 :             if (--chars_len == 0)
     289         404 :                 break;
     290             :         }
     291             :     }
     292             : }

Generated by: LCOV version 1.13