LCOV - code coverage report
Current view: top level - src/backend/regex - regexport.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 58 65 89.2 %
Date: 2025-01-18 03:14:54 Functions: 10 11 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * regexport.c
       4             :  *    Functions for exporting info about a regex's NFA
       5             :  *
       6             :  * In this implementation, the NFA defines a necessary but not sufficient
       7             :  * condition for a string to match the regex: that is, there can be strings
       8             :  * that match the NFA but don't match the full regex, but not vice versa.
       9             :  * Thus, for example, it is okay for the functions below to treat lookaround
      10             :  * constraints as no-ops, since they merely constrain the string some more.
      11             :  *
      12             :  * Notice that these functions return info into caller-provided arrays
      13             :  * rather than doing their own malloc's.  This simplifies the APIs by
      14             :  * eliminating a class of error conditions, and in the case of colors
      15             :  * allows the caller to decide how big is too big to bother with.
      16             :  *
      17             :  *
      18             :  * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
      19             :  * Portions Copyright (c) 1998, 1999 Henry Spencer
      20             :  *
      21             :  * IDENTIFICATION
      22             :  *    src/backend/regex/regexport.c
      23             :  *
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : 
      27             : #include "regex/regguts.h"
      28             : 
      29             : #include "regex/regexport.h"
      30             : 
      31             : 
      32             : /*
      33             :  * Get total number of NFA states.
      34             :  */
      35             : int
      36           0 : pg_reg_getnumstates(const regex_t *regex)
      37             : {
      38             :     struct cnfa *cnfa;
      39             : 
      40             :     assert(regex != NULL && regex->re_magic == REMAGIC);
      41           0 :     cnfa = &((struct guts *) regex->re_guts)->search;
      42             : 
      43           0 :     return cnfa->nstates;
      44             : }
      45             : 
      46             : /*
      47             :  * Get initial state of NFA.
      48             :  */
      49             : int
      50         130 : pg_reg_getinitialstate(const regex_t *regex)
      51             : {
      52             :     struct cnfa *cnfa;
      53             : 
      54             :     assert(regex != NULL && regex->re_magic == REMAGIC);
      55         130 :     cnfa = &((struct guts *) regex->re_guts)->search;
      56             : 
      57         130 :     return cnfa->pre;
      58             : }
      59             : 
      60             : /*
      61             :  * Get final state of NFA.
      62             :  */
      63             : int
      64        1926 : pg_reg_getfinalstate(const regex_t *regex)
      65             : {
      66             :     struct cnfa *cnfa;
      67             : 
      68             :     assert(regex != NULL && regex->re_magic == REMAGIC);
      69        1926 :     cnfa = &((struct guts *) regex->re_guts)->search;
      70             : 
      71        1926 :     return cnfa->post;
      72             : }
      73             : 
      74             : /*
      75             :  * pg_reg_getnumoutarcs() and pg_reg_getoutarcs() mask the existence of LACON
      76             :  * arcs from the caller, treating any LACON as being automatically satisfied.
      77             :  * Since the output representation does not support arcs that consume no
      78             :  * character when traversed, we have to recursively traverse LACON arcs here,
      79             :  * and report whatever normal arcs are reachable by traversing LACON arcs.
      80             :  * Note that this wouldn't work if it were possible to reach the final state
      81             :  * via LACON traversal, but the regex library never builds NFAs that have
      82             :  * LACON arcs leading directly to the final state.  (This is because the
      83             :  * regex executor is designed to consume one character beyond the nominal
      84             :  * match end --- possibly an EOS indicator --- so there is always a set of
      85             :  * ordinary arcs leading to the final state.)
      86             :  *
      87             :  * traverse_lacons is a recursive subroutine used by both exported functions
      88             :  * to count and then emit the reachable regular arcs.  *arcs_count is
      89             :  * incremented by the number of reachable arcs, and as many as will fit in
      90             :  * arcs_len (possibly 0) are emitted into arcs[].
      91             :  */
      92             : static void
      93        6580 : traverse_lacons(struct cnfa *cnfa, int st,
      94             :                 int *arcs_count,
      95             :                 regex_arc_t *arcs, int arcs_len)
      96             : {
      97             :     struct carc *ca;
      98             : 
      99             :     /*
     100             :      * Since this function recurses, it could theoretically be driven to stack
     101             :      * overflow.  In practice, this is mostly useful to backstop against a
     102             :      * failure of the regex compiler to remove a loop of LACON arcs.
     103             :      */
     104        6580 :     check_stack_depth();
     105             : 
     106       17336 :     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
     107             :     {
     108       10756 :         if (ca->co < cnfa->ncolors)
     109             :         {
     110             :             /* Ordinary arc, so count and possibly emit it */
     111       10744 :             int         ndx = (*arcs_count)++;
     112             : 
     113       10744 :             if (ndx < arcs_len)
     114             :             {
     115        5372 :                 arcs[ndx].co = ca->co;
     116        5372 :                 arcs[ndx].to = ca->to;
     117             :             }
     118             :         }
     119             :         else
     120             :         {
     121             :             /* LACON arc --- assume it's satisfied and recurse... */
     122             :             /* ... but first, assert it doesn't lead directly to post state */
     123             :             Assert(ca->to != cnfa->post);
     124             : 
     125          12 :             traverse_lacons(cnfa, ca->to, arcs_count, arcs, arcs_len);
     126             :         }
     127             :     }
     128        6580 : }
     129             : 
     130             : /*
     131             :  * Get number of outgoing NFA arcs of state number "st".
     132             :  */
     133             : int
     134        3288 : pg_reg_getnumoutarcs(const regex_t *regex, int st)
     135             : {
     136             :     struct cnfa *cnfa;
     137             :     int         arcs_count;
     138             : 
     139             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     140        3288 :     cnfa = &((struct guts *) regex->re_guts)->search;
     141             : 
     142        3288 :     if (st < 0 || st >= cnfa->nstates)
     143           0 :         return 0;
     144        3288 :     arcs_count = 0;
     145        3288 :     traverse_lacons(cnfa, st, &arcs_count, NULL, 0);
     146        3288 :     return arcs_count;
     147             : }
     148             : 
     149             : /*
     150             :  * Write array of outgoing NFA arcs of state number "st" into arcs[],
     151             :  * whose length arcs_len must be at least as long as indicated by
     152             :  * pg_reg_getnumoutarcs(), else not all arcs will be returned.
     153             :  */
     154             : void
     155        3288 : pg_reg_getoutarcs(const regex_t *regex, int st,
     156             :                   regex_arc_t *arcs, int arcs_len)
     157             : {
     158             :     struct cnfa *cnfa;
     159             :     int         arcs_count;
     160             : 
     161             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     162        3288 :     cnfa = &((struct guts *) regex->re_guts)->search;
     163             : 
     164        3288 :     if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
     165           8 :         return;
     166        3280 :     arcs_count = 0;
     167        3280 :     traverse_lacons(cnfa, st, &arcs_count, arcs, arcs_len);
     168             : }
     169             : 
     170             : /*
     171             :  * Get total number of colors.
     172             :  */
     173             : int
     174         130 : pg_reg_getnumcolors(const regex_t *regex)
     175             : {
     176             :     struct colormap *cm;
     177             : 
     178             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     179         130 :     cm = &((struct guts *) regex->re_guts)->cmap;
     180             : 
     181         130 :     return cm->max + 1;
     182             : }
     183             : 
     184             : /*
     185             :  * Check if color is beginning of line/string.
     186             :  *
     187             :  * (We might at some point need to offer more refined handling of pseudocolors,
     188             :  * but this will do for now.)
     189             :  */
     190             : int
     191        3010 : pg_reg_colorisbegin(const regex_t *regex, int co)
     192             : {
     193             :     struct cnfa *cnfa;
     194             : 
     195             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     196        3010 :     cnfa = &((struct guts *) regex->re_guts)->search;
     197             : 
     198        3010 :     if (co == cnfa->bos[0] || co == cnfa->bos[1])
     199         492 :         return true;
     200             :     else
     201        2518 :         return false;
     202             : }
     203             : 
     204             : /*
     205             :  * Check if color is end of line/string.
     206             :  */
     207             : int
     208        2518 : pg_reg_colorisend(const regex_t *regex, int co)
     209             : {
     210             :     struct cnfa *cnfa;
     211             : 
     212             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     213        2518 :     cnfa = &((struct guts *) regex->re_guts)->search;
     214             : 
     215        2518 :     if (co == cnfa->eos[0] || co == cnfa->eos[1])
     216         324 :         return true;
     217             :     else
     218        2194 :         return false;
     219             : }
     220             : 
     221             : /*
     222             :  * Get number of member chrs of color number "co".
     223             :  *
     224             :  * Note: we return -1 if the color number is invalid, or if it is a special
     225             :  * color (WHITE, RAINBOW, or a pseudocolor), or if the number of members is
     226             :  * uncertain.
     227             :  * Callers should not try to extract the members if -1 is returned.
     228             :  */
     229             : int
     230        1062 : pg_reg_getnumcharacters(const regex_t *regex, int co)
     231             : {
     232             :     struct colormap *cm;
     233             : 
     234             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     235        1062 :     cm = &((struct guts *) regex->re_guts)->cmap;
     236             : 
     237        1062 :     if (co <= 0 || co > cm->max)   /* <= 0 rejects WHITE and RAINBOW */
     238         130 :         return -1;
     239         932 :     if (cm->cd[co].flags & PSEUDO)   /* also pseudocolors (BOS etc) */
     240         520 :         return -1;
     241             : 
     242             :     /*
     243             :      * If the color appears anywhere in the high colormap, treat its number of
     244             :      * members as uncertain.  In principle we could determine all the specific
     245             :      * chrs corresponding to each such entry, but it would be expensive
     246             :      * (particularly if character class tests are required) and it doesn't
     247             :      * seem worth it.
     248             :      */
     249         412 :     if (cm->cd[co].nuchrs != 0)
     250           0 :         return -1;
     251             : 
     252             :     /* OK, return the known number of member chrs */
     253         412 :     return cm->cd[co].nschrs;
     254             : }
     255             : 
     256             : /*
     257             :  * Write array of member chrs of color number "co" into chars[],
     258             :  * whose length chars_len must be at least as long as indicated by
     259             :  * pg_reg_getnumcharacters(), else not all chars will be returned.
     260             :  *
     261             :  * Fetching the members of WHITE, RAINBOW, or a pseudocolor is not supported.
     262             :  *
     263             :  * Caution: this is a relatively expensive operation.
     264             :  */
     265             : void
     266         412 : pg_reg_getcharacters(const regex_t *regex, int co,
     267             :                      pg_wchar *chars, int chars_len)
     268             : {
     269             :     struct colormap *cm;
     270             :     chr         c;
     271             : 
     272             :     assert(regex != NULL && regex->re_magic == REMAGIC);
     273         412 :     cm = &((struct guts *) regex->re_guts)->cmap;
     274             : 
     275         412 :     if (co <= 0 || co > cm->max || chars_len <= 0)
     276           0 :         return;
     277         412 :     if (cm->cd[co].flags & PSEUDO)
     278           0 :         return;
     279             : 
     280             :     /*
     281             :      * We need only examine the low character map; there should not be any
     282             :      * matching entries in the high map.
     283             :      */
     284       40416 :     for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
     285             :     {
     286       40416 :         if (cm->locolormap[c - CHR_MIN] == co)
     287             :         {
     288        1570 :             *chars++ = c;
     289        1570 :             if (--chars_len == 0)
     290         412 :                 break;
     291             :         }
     292             :     }
     293             : }

Generated by: LCOV version 1.14