LCOV - PostgreSQL 19devel - src/backend/regex/regc_pg

LCOV - code coverage report

Current view:	top level - src/backend/regex - regc_pg_locale.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 19devel	Lines:	124	163	76.1 %
Date:	2025-07-29 03:18:01	Functions:	15	15	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * regc_pg_locale.c
       4             :  *    ctype functions adapted to work on pg_wchar (a/k/a chr),
       5             :  *    and functions to cache the results of wholesale ctype probing.
       6             :  *
       7             :  * This file is #included by regcomp.c; it's not meant to compile standalone.
       8             :  *
       9             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/regex/regc_pg_locale.c
      14             :  *
      15             :  *-------------------------------------------------------------------------
      16             :  */
      17             : 
      18             : #include "catalog/pg_collation.h"
      19             : #include "common/unicode_case.h"
      20             : #include "common/unicode_category.h"
      21             : #include "utils/pg_locale.h"
      22             : 
      23             : static pg_locale_t pg_regex_locale;
      24             : 
      25             : static struct pg_locale_struct dummy_c_locale = {
      26             :     .collate_is_c = true,
      27             :     .ctype_is_c = true,
      28             : };
      29             : 
      30             : /*
      31             :  * Hard-wired character properties for C locale
      32             :  */
      33             : #define PG_ISDIGIT  0x01
      34             : #define PG_ISALPHA  0x02
      35             : #define PG_ISALNUM  (PG_ISDIGIT | PG_ISALPHA)
      36             : #define PG_ISUPPER  0x04
      37             : #define PG_ISLOWER  0x08
      38             : #define PG_ISGRAPH  0x10
      39             : #define PG_ISPRINT  0x20
      40             : #define PG_ISPUNCT  0x40
      41             : #define PG_ISSPACE  0x80
      42             : 
      43             : static const unsigned char pg_char_properties[128] = {
      44             :      /* NUL */ 0,
      45             :      /* ^A */ 0,
      46             :      /* ^B */ 0,
      47             :      /* ^C */ 0,
      48             :      /* ^D */ 0,
      49             :      /* ^E */ 0,
      50             :      /* ^F */ 0,
      51             :      /* ^G */ 0,
      52             :      /* ^H */ 0,
      53             :      /* ^I */ PG_ISSPACE,
      54             :      /* ^J */ PG_ISSPACE,
      55             :      /* ^K */ PG_ISSPACE,
      56             :      /* ^L */ PG_ISSPACE,
      57             :      /* ^M */ PG_ISSPACE,
      58             :      /* ^N */ 0,
      59             :      /* ^O */ 0,
      60             :      /* ^P */ 0,
      61             :      /* ^Q */ 0,
      62             :      /* ^R */ 0,
      63             :      /* ^S */ 0,
      64             :      /* ^T */ 0,
      65             :      /* ^U */ 0,
      66             :      /* ^V */ 0,
      67             :      /* ^W */ 0,
      68             :      /* ^X */ 0,
      69             :      /* ^Y */ 0,
      70             :      /* ^Z */ 0,
      71             :      /* ^[ */ 0,
      72             :      /* ^\ */ 0,
      73             :      /* ^] */ 0,
      74             :      /* ^^ */ 0,
      75             :      /* ^_ */ 0,
      76             :      /* */ PG_ISPRINT | PG_ISSPACE,
      77             :      /* !  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      78             :      /* "  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      79             :      /* #  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      80             :      /* $  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      81             :      /* %  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      82             :      /* &  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      83             :      /* '  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      84             :      /* (  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      85             :      /* )  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      86             :      /* *  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      87             :      /* +  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      88             :      /* ,  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      89             :      /* -  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      90             :      /* .  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      91             :      /* /  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
      92             :      /* 0  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
      93             :      /* 1  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
      94             :      /* 2  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
      95             :      /* 3  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
      96             :      /* 4  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
      97             :      /* 5  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
      98             :      /* 6  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
      99             :      /* 7  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     100             :      /* 8  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     101             :      /* 9  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     102             :      /* :  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     103             :      /* ;  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     104             :      /* <  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     105             :      /* =  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     106             :      /* >  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     107             :      /* ?  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     108             :      /* @  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     109             :      /* A  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     110             :      /* B  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     111             :      /* C  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     112             :      /* D  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     113             :      /* E  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     114             :      /* F  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     115             :      /* G  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     116             :      /* H  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     117             :      /* I  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     118             :      /* J  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     119             :      /* K  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     120             :      /* L  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     121             :      /* M  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     122             :      /* N  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     123             :      /* O  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     124             :      /* P  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     125             :      /* Q  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     126             :      /* R  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     127             :      /* S  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     128             :      /* T  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     129             :      /* U  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     130             :      /* V  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     131             :      /* W  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     132             :      /* X  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     133             :      /* Y  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     134             :      /* Z  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     135             :      /* [  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     136             :      /* \  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     137             :      /* ]  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     138             :      /* ^  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     139             :      /* _  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     140             :      /* `  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     141             :      /* a  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     142             :      /* b  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     143             :      /* c  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     144             :      /* d  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     145             :      /* e  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     146             :      /* f  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     147             :      /* g  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     148             :      /* h  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     149             :      /* i  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     150             :      /* j  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     151             :      /* k  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     152             :      /* l  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     153             :      /* m  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     154             :      /* n  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     155             :      /* o  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     156             :      /* p  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     157             :      /* q  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     158             :      /* r  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     159             :      /* s  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     160             :      /* t  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     161             :      /* u  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     162             :      /* v  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     163             :      /* w  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     164             :      /* x  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     165             :      /* y  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     166             :      /* z  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     167             :      /* {  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     168             :      /* |  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     169             :      /* }  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     170             :      /* ~  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     171             :      /* DEL */ 0
     172             : };
     173             : 
     174             : 
     175             : /*
     176             :  * pg_set_regex_collation: set collation for these functions to obey
     177             :  *
     178             :  * This is called when beginning compilation or execution of a regexp.
     179             :  * Since there's no need for reentrancy of regexp operations, it's okay
     180             :  * to store the results in static variables.
     181             :  */
     182             : void
     183     8033096 : pg_set_regex_collation(Oid collation)
     184             : {
     185     8033096 :     pg_locale_t locale = 0;
     186             : 
     187     8033096 :     if (!OidIsValid(collation))
     188             :     {
     189             :         /*
     190             :          * This typically means that the parser could not resolve a conflict
     191             :          * of implicit collations, so report it that way.
     192             :          */
     193           0 :         ereport(ERROR,
     194             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     195             :                  errmsg("could not determine which collation to use for regular expression"),
     196             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     197             :     }
     198             : 
     199     8033096 :     if (collation == C_COLLATION_OID)
     200             :     {
     201             :         /*
     202             :          * Some callers expect regexes to work for C_COLLATION_OID before
     203             :          * catalog access is available, so we can't call
     204             :          * pg_newlocale_from_collation().
     205             :          */
     206      123126 :         locale = &dummy_c_locale;
     207             :     }
     208             :     else
     209             :     {
     210     7909970 :         locale = pg_newlocale_from_collation(collation);
     211             : 
     212     7909970 :         if (!locale->deterministic)
     213          24 :             ereport(ERROR,
     214             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     215             :                      errmsg("nondeterministic collations are not supported for regular expressions")));
     216             : 
     217     7909946 :         if (locale->ctype_is_c)
     218             :         {
     219             :             /*
     220             :              * C/POSIX collations use this path regardless of database
     221             :              * encoding
     222             :              */
     223         284 :             locale = &dummy_c_locale;
     224             :         }
     225             :     }
     226             : 
     227     8033072 :     pg_regex_locale = locale;
     228     8033072 : }
     229             : 
     230             : static int
     231      186918 : pg_wc_isdigit(pg_wchar c)
     232             : {
     233      186918 :     if (pg_regex_locale->ctype_is_c)
     234        4260 :         return (c <= (pg_wchar) 127 &&
     235        2130 :                 (pg_char_properties[c] & PG_ISDIGIT));
     236             :     else
     237      184788 :         return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale);
     238             : }
     239             : 
     240             : static int
     241       29718 : pg_wc_isalpha(pg_wchar c)
     242             : {
     243       29718 :     if (pg_regex_locale->ctype_is_c)
     244        1536 :         return (c <= (pg_wchar) 127 &&
     245         768 :                 (pg_char_properties[c] & PG_ISALPHA));
     246             :     else
     247       28950 :         return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale);
     248             : }
     249             : 
     250             : static int
     251       94986 : pg_wc_isalnum(pg_wchar c)
     252             : {
     253       94986 :     if (pg_regex_locale->ctype_is_c)
     254        1524 :         return (c <= (pg_wchar) 127 &&
     255         762 :                 (pg_char_properties[c] & PG_ISALNUM));
     256             :     else
     257       94224 :         return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale);
     258             : }
     259             : 
     260             : static int
     261       37634 : pg_wc_isword(pg_wchar c)
     262             : {
     263             :     /* We define word characters as alnum class plus underscore */
     264       37634 :     if (c == CHR('_'))
     265          24 :         return 1;
     266       37610 :     return pg_wc_isalnum(c);
     267             : }
     268             : 
     269             : static int
     270       40976 : pg_wc_isupper(pg_wchar c)
     271             : {
     272       40976 :     if (pg_regex_locale->ctype_is_c)
     273           0 :         return (c <= (pg_wchar) 127 &&
     274           0 :                 (pg_char_properties[c] & PG_ISUPPER));
     275             :     else
     276       40976 :         return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale);
     277             : }
     278             : 
     279             : static int
     280       16390 : pg_wc_islower(pg_wchar c)
     281             : {
     282       16390 :     if (pg_regex_locale->ctype_is_c)
     283           0 :         return (c <= (pg_wchar) 127 &&
     284           0 :                 (pg_char_properties[c] & PG_ISLOWER));
     285             :     else
     286       16390 :         return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale);
     287             : }
     288             : 
     289             : static int
     290       16390 : pg_wc_isgraph(pg_wchar c)
     291             : {
     292       16390 :     if (pg_regex_locale->ctype_is_c)
     293           0 :         return (c <= (pg_wchar) 127 &&
     294           0 :                 (pg_char_properties[c] & PG_ISGRAPH));
     295             :     else
     296       16390 :         return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale);
     297             : }
     298             : 
     299             : static int
     300       16390 : pg_wc_isprint(pg_wchar c)
     301             : {
     302       16390 :     if (pg_regex_locale->ctype_is_c)
     303           0 :         return (c <= (pg_wchar) 127 &&
     304           0 :                 (pg_char_properties[c] & PG_ISPRINT));
     305             :     else
     306       16390 :         return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale);
     307             : }
     308             : 
     309             : static int
     310       40966 : pg_wc_ispunct(pg_wchar c)
     311             : {
     312       40966 :     if (pg_regex_locale->ctype_is_c)
     313           0 :         return (c <= (pg_wchar) 127 &&
     314           0 :                 (pg_char_properties[c] & PG_ISPUNCT));
     315             :     else
     316       40966 :         return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale);
     317             : }
     318             : 
     319             : static int
     320       76386 : pg_wc_isspace(pg_wchar c)
     321             : {
     322       76386 :     if (pg_regex_locale->ctype_is_c)
     323           0 :         return (c <= (pg_wchar) 127 &&
     324           0 :                 (pg_char_properties[c] & PG_ISSPACE));
     325             :     else
     326       76386 :         return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale);
     327             : }
     328             : 
     329             : static pg_wchar
     330       10702 : pg_wc_toupper(pg_wchar c)
     331             : {
     332       10702 :     if (pg_regex_locale->ctype_is_c)
     333             :     {
     334         978 :         if (c <= (pg_wchar) 127)
     335         978 :             return pg_ascii_toupper((unsigned char) c);
     336           0 :         return c;
     337             :     }
     338             :     else
     339        9724 :         return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
     340             : }
     341             : 
     342             : static pg_wchar
     343       10706 : pg_wc_tolower(pg_wchar c)
     344             : {
     345       10706 :     if (pg_regex_locale->ctype_is_c)
     346             :     {
     347         978 :         if (c <= (pg_wchar) 127)
     348         978 :             return pg_ascii_tolower((unsigned char) c);
     349           0 :         return c;
     350             :     }
     351             :     else
     352        9728 :         return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
     353             : }
     354             : 
     355             : 
     356             : /*
     357             :  * These functions cache the results of probing libc's ctype behavior for
     358             :  * all character codes of interest in a given encoding/collation.  The
     359             :  * result is provided as a "struct cvec", but notice that the representation
     360             :  * is a touch different from a cvec created by regc_cvec.c: we allocate the
     361             :  * chrs[] and ranges[] arrays separately from the struct so that we can
     362             :  * realloc them larger at need.  This is okay since the cvecs made here
     363             :  * should never be freed by freecvec().
     364             :  *
     365             :  * We use malloc not palloc since we mustn't lose control on out-of-memory;
     366             :  * the main regex code expects us to return a failure indication instead.
     367             :  */
     368             : 
     369             : typedef int (*pg_wc_probefunc) (pg_wchar c);
     370             : 
     371             : typedef struct pg_ctype_cache
     372             : {
     373             :     pg_wc_probefunc probefunc;  /* pg_wc_isalpha or a sibling */
     374             :     pg_locale_t locale;         /* locale this entry is for */
     375             :     struct cvec cv;             /* cache entry contents */
     376             :     struct pg_ctype_cache *next;    /* chain link */
     377             : } pg_ctype_cache;
     378             : 
     379             : static pg_ctype_cache *pg_ctype_cache_list = NULL;
     380             : 
     381             : /*
     382             :  * Add a chr or range to pcc->cv; return false if run out of memory
     383             :  */
     384             : static bool
     385       11794 : store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
     386             : {
     387             :     chr        *newchrs;
     388             : 
     389       11794 :     if (nchrs > 1)
     390             :     {
     391        3720 :         if (pcc->cv.nranges >= pcc->cv.rangespace)
     392             :         {
     393           0 :             pcc->cv.rangespace *= 2;
     394           0 :             newchrs = (chr *) realloc(pcc->cv.ranges,
     395           0 :                                       pcc->cv.rangespace * sizeof(chr) * 2);
     396           0 :             if (newchrs == NULL)
     397           0 :                 return false;
     398           0 :             pcc->cv.ranges = newchrs;
     399             :         }
     400        3720 :         pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
     401        3720 :         pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
     402        3720 :         pcc->cv.nranges++;
     403             :     }
     404             :     else
     405             :     {
     406             :         assert(nchrs == 1);
     407        8074 :         if (pcc->cv.nchrs >= pcc->cv.chrspace)
     408             :         {
     409          28 :             pcc->cv.chrspace *= 2;
     410          28 :             newchrs = (chr *) realloc(pcc->cv.chrs,
     411          28 :                                       pcc->cv.chrspace * sizeof(chr));
     412          28 :             if (newchrs == NULL)
     413           0 :                 return false;
     414          28 :             pcc->cv.chrs = newchrs;
     415             :         }
     416        8074 :         pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
     417             :     }
     418       11794 :     return true;
     419             : }
     420             : 
     421             : /*
     422             :  * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
     423             :  * chrs satisfying the probe function.  The active collation is the one
     424             :  * previously set by pg_set_regex_collation.  Return NULL if out of memory.
     425             :  *
     426             :  * Note that the result must not be freed or modified by caller.
     427             :  */
     428             : static struct cvec *
     429         878 : pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
     430             : {
     431             :     pg_ctype_cache *pcc;
     432             :     pg_wchar    max_chr;
     433             :     pg_wchar    cur_chr;
     434             :     int         nmatches;
     435             :     chr        *newchrs;
     436             : 
     437             :     /*
     438             :      * Do we already have the answer cached?
     439             :      */
     440        2040 :     for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
     441             :     {
     442        1762 :         if (pcc->probefunc == probefunc &&
     443         672 :             pcc->locale == pg_regex_locale)
     444         600 :             return &pcc->cv;
     445             :     }
     446             : 
     447             :     /*
     448             :      * Nope, so initialize some workspace ...
     449             :      */
     450         278 :     pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
     451         278 :     if (pcc == NULL)
     452           0 :         return NULL;
     453         278 :     pcc->probefunc = probefunc;
     454         278 :     pcc->locale = pg_regex_locale;
     455         278 :     pcc->cv.nchrs = 0;
     456         278 :     pcc->cv.chrspace = 128;
     457         278 :     pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
     458         278 :     pcc->cv.nranges = 0;
     459         278 :     pcc->cv.rangespace = 64;
     460         278 :     pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
     461         278 :     if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
     462           0 :         goto out_of_memory;
     463         278 :     pcc->cv.cclasscode = cclasscode;
     464             : 
     465             :     /*
     466             :      * Decide how many character codes we ought to look through.  In general
     467             :      * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
     468             :      * runtime using the "high colormap" mechanism.  However, in C locale
     469             :      * there's no need to go further than 127, and if we only have a 1-byte
     470             :      * <ctype.h> API there's no need to go further than that can handle.
     471             :      *
     472             :      * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
     473             :      * output cvec as not having any locale-dependent behavior, since there
     474             :      * will be no need to do any run-time locale checks.  (The #if's here
     475             :      * would always be true for production values of MAX_SIMPLE_CHR, but it's
     476             :      * useful to allow it to be small for testing purposes.)
     477             :      */
     478         278 :     if (pg_regex_locale->ctype_is_c)
     479             :     {
     480             : #if MAX_SIMPLE_CHR >= 127
     481          28 :         max_chr = (pg_wchar) 127;
     482          28 :         pcc->cv.cclasscode = -1;
     483             : #else
     484             :         max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     485             : #endif
     486             :     }
     487             :     else
     488             :     {
     489         250 :         if (pg_regex_locale->ctype->max_chr != 0 &&
     490           0 :             pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
     491             :         {
     492           0 :             max_chr = pg_regex_locale->ctype->max_chr;
     493           0 :             pcc->cv.cclasscode = -1;
     494             :         }
     495             :         else
     496         250 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     497             :     }
     498             : 
     499             :     /*
     500             :      * And scan 'em ...
     501             :      */
     502         278 :     nmatches = 0;               /* number of consecutive matches */
     503             : 
     504      515862 :     for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
     505             :     {
     506      515584 :         if ((*probefunc) (cur_chr))
     507      142080 :             nmatches++;
     508      373504 :         else if (nmatches > 0)
     509             :         {
     510       11770 :             if (!store_match(pcc, cur_chr - nmatches, nmatches))
     511           0 :                 goto out_of_memory;
     512       11770 :             nmatches = 0;
     513             :         }
     514             :     }
     515             : 
     516         278 :     if (nmatches > 0)
     517          24 :         if (!store_match(pcc, cur_chr - nmatches, nmatches))
     518           0 :             goto out_of_memory;
     519             : 
     520             :     /*
     521             :      * We might have allocated more memory than needed, if so free it
     522             :      */
     523         278 :     if (pcc->cv.nchrs == 0)
     524             :     {
     525         112 :         free(pcc->cv.chrs);
     526         112 :         pcc->cv.chrs = NULL;
     527         112 :         pcc->cv.chrspace = 0;
     528             :     }
     529         166 :     else if (pcc->cv.nchrs < pcc->cv.chrspace)
     530             :     {
     531         166 :         newchrs = (chr *) realloc(pcc->cv.chrs,
     532         166 :                                   pcc->cv.nchrs * sizeof(chr));
     533         166 :         if (newchrs == NULL)
     534           0 :             goto out_of_memory;
     535         166 :         pcc->cv.chrs = newchrs;
     536         166 :         pcc->cv.chrspace = pcc->cv.nchrs;
     537             :     }
     538         278 :     if (pcc->cv.nranges == 0)
     539             :     {
     540           0 :         free(pcc->cv.ranges);
     541           0 :         pcc->cv.ranges = NULL;
     542           0 :         pcc->cv.rangespace = 0;
     543             :     }
     544         278 :     else if (pcc->cv.nranges < pcc->cv.rangespace)
     545             :     {
     546         278 :         newchrs = (chr *) realloc(pcc->cv.ranges,
     547         278 :                                   pcc->cv.nranges * sizeof(chr) * 2);
     548         278 :         if (newchrs == NULL)
     549           0 :             goto out_of_memory;
     550         278 :         pcc->cv.ranges = newchrs;
     551         278 :         pcc->cv.rangespace = pcc->cv.nranges;
     552             :     }
     553             : 
     554             :     /*
     555             :      * Success, link it into cache chain
     556             :      */
     557         278 :     pcc->next = pg_ctype_cache_list;
     558         278 :     pg_ctype_cache_list = pcc;
     559             : 
     560         278 :     return &pcc->cv;
     561             : 
     562             :     /*
     563             :      * Failure, clean up
     564             :      */
     565           0 : out_of_memory:
     566           0 :     free(pcc->cv.chrs);
     567           0 :     free(pcc->cv.ranges);
     568           0 :     free(pcc);
     569             : 
     570           0 :     return NULL;
     571             : }

Generated by: LCOV version 1.16