LCOV - code coverage report
Current view: top level - src/backend/regex - regc_pg_locale.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 196 341 57.5 %
Date: 2024-04-26 06:11:47 Functions: 15 15 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * regc_pg_locale.c
       4             :  *    ctype functions adapted to work on pg_wchar (a/k/a chr),
       5             :  *    and functions to cache the results of wholesale ctype probing.
       6             :  *
       7             :  * This file is #included by regcomp.c; it's not meant to compile standalone.
       8             :  *
       9             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/regex/regc_pg_locale.c
      14             :  *
      15             :  *-------------------------------------------------------------------------
      16             :  */
      17             : 
      18             : #include "catalog/pg_collation.h"
      19             : #include "common/unicode_case.h"
      20             : #include "common/unicode_category.h"
      21             : #include "utils/pg_locale.h"
      22             : 
      23             : /*
      24             :  * To provide as much functionality as possible on a variety of platforms,
      25             :  * without going so far as to implement everything from scratch, we use
      26             :  * several implementation strategies depending on the situation:
      27             :  *
      28             :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      29             :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      30             :  * collations don't give a fig about multibyte characters.
      31             :  *
      32             :  * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
      33             :  *
      34             :  * 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
      35             :  * This assumes that every platform uses Unicode codepoints directly
      36             :  * as the wchar_t representation of Unicode.  On some platforms
      37             :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      38             :  *
      39             :  * 2b. In all other encodings, we use the <ctype.h> functions for pg_wchar
      40             :  * values up to 255, and punt for values above that.  This is 100% correct
      41             :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      42             :  * multibyte encodings are mostly Far Eastern character sets for which the
      43             :  * properties being tested here aren't very relevant for higher code values
      44             :  * anyway.  The difficulty with using the <wctype.h> functions with
      45             :  * non-Unicode multibyte encodings is that we can have no certainty that
      46             :  * the platform's wchar_t representation matches what we do in pg_wchar
      47             :  * conversions.
      48             :  *
      49             :  * 3. Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
      50             :  * functions, under exactly the same cases as #2.
      51             :  *
      52             :  * There is one notable difference between cases 2 and 3: in the "default"
      53             :  * collation we force ASCII letters to follow ASCII upcase/downcase rules,
      54             :  * while in a non-default collation we just let the library functions do what
      55             :  * they will.  The case where this matters is treatment of I/i in Turkish,
      56             :  * and the behavior is meant to match the upper()/lower() SQL functions.
      57             :  *
      58             :  * We store the active collation setting in static variables.  In principle
      59             :  * it could be passed down to here via the regex library's "struct vars" data
      60             :  * structure; but that would require somewhat invasive changes in the regex
      61             :  * library, and right now there's no real benefit to be gained from that.
      62             :  *
      63             :  * NB: the coding here assumes pg_wchar is an unsigned type.
      64             :  */
      65             : 
      66             : typedef enum
      67             : {
      68             :     PG_REGEX_LOCALE_C,          /* C locale (encoding independent) */
      69             :     PG_REGEX_BUILTIN,           /* built-in Unicode semantics */
      70             :     PG_REGEX_LOCALE_WIDE,       /* Use <wctype.h> functions */
      71             :     PG_REGEX_LOCALE_1BYTE,      /* Use <ctype.h> functions */
      72             :     PG_REGEX_LOCALE_WIDE_L,     /* Use locale_t <wctype.h> functions */
      73             :     PG_REGEX_LOCALE_1BYTE_L,    /* Use locale_t <ctype.h> functions */
      74             :     PG_REGEX_LOCALE_ICU,        /* Use ICU uchar.h functions */
      75             : } PG_Locale_Strategy;
      76             : 
      77             : static PG_Locale_Strategy pg_regex_strategy;
      78             : static pg_locale_t pg_regex_locale;
      79             : static Oid  pg_regex_collation;
      80             : 
      81             : /*
      82             :  * Hard-wired character properties for C locale
      83             :  */
      84             : #define PG_ISDIGIT  0x01
      85             : #define PG_ISALPHA  0x02
      86             : #define PG_ISALNUM  (PG_ISDIGIT | PG_ISALPHA)
      87             : #define PG_ISUPPER  0x04
      88             : #define PG_ISLOWER  0x08
      89             : #define PG_ISGRAPH  0x10
      90             : #define PG_ISPRINT  0x20
      91             : #define PG_ISPUNCT  0x40
      92             : #define PG_ISSPACE  0x80
      93             : 
      94             : static const unsigned char pg_char_properties[128] = {
      95             :      /* NUL */ 0,
      96             :      /* ^A */ 0,
      97             :      /* ^B */ 0,
      98             :      /* ^C */ 0,
      99             :      /* ^D */ 0,
     100             :      /* ^E */ 0,
     101             :      /* ^F */ 0,
     102             :      /* ^G */ 0,
     103             :      /* ^H */ 0,
     104             :      /* ^I */ PG_ISSPACE,
     105             :      /* ^J */ PG_ISSPACE,
     106             :      /* ^K */ PG_ISSPACE,
     107             :      /* ^L */ PG_ISSPACE,
     108             :      /* ^M */ PG_ISSPACE,
     109             :      /* ^N */ 0,
     110             :      /* ^O */ 0,
     111             :      /* ^P */ 0,
     112             :      /* ^Q */ 0,
     113             :      /* ^R */ 0,
     114             :      /* ^S */ 0,
     115             :      /* ^T */ 0,
     116             :      /* ^U */ 0,
     117             :      /* ^V */ 0,
     118             :      /* ^W */ 0,
     119             :      /* ^X */ 0,
     120             :      /* ^Y */ 0,
     121             :      /* ^Z */ 0,
     122             :      /* ^[ */ 0,
     123             :      /* ^\ */ 0,
     124             :      /* ^] */ 0,
     125             :      /* ^^ */ 0,
     126             :      /* ^_ */ 0,
     127             :      /* */ PG_ISPRINT | PG_ISSPACE,
     128             :      /* !  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     129             :      /* "  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     130             :      /* #  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     131             :      /* $  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     132             :      /* %  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     133             :      /* &  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     134             :      /* '  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     135             :      /* (  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     136             :      /* )  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     137             :      /* *  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     138             :      /* +  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     139             :      /* ,  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     140             :      /* -  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     141             :      /* .  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     142             :      /* /  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     143             :      /* 0  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     144             :      /* 1  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     145             :      /* 2  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     146             :      /* 3  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     147             :      /* 4  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     148             :      /* 5  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     149             :      /* 6  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     150             :      /* 7  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     151             :      /* 8  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     152             :      /* 9  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     153             :      /* :  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     154             :      /* ;  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     155             :      /* <  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     156             :      /* =  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     157             :      /* >  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     158             :      /* ?  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     159             :      /* @  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     160             :      /* A  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     161             :      /* B  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     162             :      /* C  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     163             :      /* D  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     164             :      /* E  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     165             :      /* F  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     166             :      /* G  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     167             :      /* H  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     168             :      /* I  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     169             :      /* J  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     170             :      /* K  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     171             :      /* L  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     172             :      /* M  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     173             :      /* N  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     174             :      /* O  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     175             :      /* P  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     176             :      /* Q  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     177             :      /* R  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     178             :      /* S  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     179             :      /* T  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     180             :      /* U  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     181             :      /* V  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     182             :      /* W  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     183             :      /* X  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     184             :      /* Y  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     185             :      /* Z  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     186             :      /* [  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     187             :      /* \  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     188             :      /* ]  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     189             :      /* ^  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     190             :      /* _  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     191             :      /* `  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     192             :      /* a  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     193             :      /* b  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     194             :      /* c  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     195             :      /* d  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     196             :      /* e  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     197             :      /* f  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     198             :      /* g  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     199             :      /* h  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     200             :      /* i  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     201             :      /* j  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     202             :      /* k  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     203             :      /* l  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     204             :      /* m  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     205             :      /* n  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     206             :      /* o  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     207             :      /* p  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     208             :      /* q  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     209             :      /* r  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     210             :      /* s  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     211             :      /* t  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     212             :      /* u  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     213             :      /* v  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     214             :      /* w  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     215             :      /* x  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     216             :      /* y  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     217             :      /* z  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     218             :      /* {  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     219             :      /* |  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     220             :      /* }  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     221             :      /* ~  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     222             :      /* DEL */ 0
     223             : };
     224             : 
     225             : 
     226             : /*
     227             :  * pg_set_regex_collation: set collation for these functions to obey
     228             :  *
     229             :  * This is called when beginning compilation or execution of a regexp.
     230             :  * Since there's no need for reentrancy of regexp operations, it's okay
     231             :  * to store the results in static variables.
     232             :  */
     233             : void
     234     1889728 : pg_set_regex_collation(Oid collation)
     235             : {
     236     1889728 :     if (!OidIsValid(collation))
     237             :     {
     238             :         /*
     239             :          * This typically means that the parser could not resolve a conflict
     240             :          * of implicit collations, so report it that way.
     241             :          */
     242           0 :         ereport(ERROR,
     243             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     244             :                  errmsg("could not determine which collation to use for regular expression"),
     245             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     246             :     }
     247             : 
     248     1889728 :     if (lc_ctype_is_c(collation))
     249             :     {
     250             :         /* C/POSIX collations use this path regardless of database encoding */
     251      108374 :         pg_regex_strategy = PG_REGEX_LOCALE_C;
     252      108374 :         pg_regex_locale = 0;
     253      108374 :         pg_regex_collation = C_COLLATION_OID;
     254             :     }
     255             :     else
     256             :     {
     257     1781354 :         pg_regex_locale = pg_newlocale_from_collation(collation);
     258             : 
     259     1781354 :         if (!pg_locale_deterministic(pg_regex_locale))
     260          24 :             ereport(ERROR,
     261             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     262             :                      errmsg("nondeterministic collations are not supported for regular expressions")));
     263             : 
     264             : #ifdef USE_ICU
     265     1781330 :         if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
     266         942 :             pg_regex_strategy = PG_REGEX_LOCALE_ICU;
     267             :         else
     268             : #endif
     269     1780388 :         if (GetDatabaseEncoding() == PG_UTF8)
     270             :         {
     271     1780384 :             if (pg_regex_locale)
     272             :             {
     273      187994 :                 if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
     274      187994 :                     pg_regex_strategy = PG_REGEX_BUILTIN;
     275             :                 else
     276           0 :                     pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
     277             :             }
     278             :             else
     279     1592390 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
     280             :         }
     281             :         else
     282             :         {
     283           4 :             if (pg_regex_locale)
     284           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
     285             :             else
     286           4 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
     287             :         }
     288             : 
     289     1781330 :         pg_regex_collation = collation;
     290             :     }
     291     1889704 : }
     292             : 
     293             : static int
     294      145936 : pg_wc_isdigit(pg_wchar c)
     295             : {
     296      145936 :     switch (pg_regex_strategy)
     297             :     {
     298        2142 :         case PG_REGEX_LOCALE_C:
     299        4284 :             return (c <= (pg_wchar) 127 &&
     300        2142 :                     (pg_char_properties[c] & PG_ISDIGIT));
     301       45166 :         case PG_REGEX_BUILTIN:
     302       45166 :             return pg_u_isdigit(c, true);
     303       86340 :         case PG_REGEX_LOCALE_WIDE:
     304             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     305       86340 :                 return iswdigit((wint_t) c);
     306             :             /* FALL THRU */
     307             :         case PG_REGEX_LOCALE_1BYTE:
     308           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     309           0 :                     isdigit((unsigned char) c));
     310           0 :         case PG_REGEX_LOCALE_WIDE_L:
     311             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     312           0 :                 return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
     313             :             /* FALL THRU */
     314             :         case PG_REGEX_LOCALE_1BYTE_L:
     315           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     316           0 :                     isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
     317             :             break;
     318       12288 :         case PG_REGEX_LOCALE_ICU:
     319             : #ifdef USE_ICU
     320       12288 :             return u_isdigit(c);
     321             : #endif
     322             :             break;
     323             :     }
     324           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     325             : }
     326             : 
     327             : static int
     328       16660 : pg_wc_isalpha(pg_wchar c)
     329             : {
     330       16660 :     switch (pg_regex_strategy)
     331             :     {
     332           0 :         case PG_REGEX_LOCALE_C:
     333           0 :             return (c <= (pg_wchar) 127 &&
     334           0 :                     (pg_char_properties[c] & PG_ISALPHA));
     335          22 :         case PG_REGEX_BUILTIN:
     336          22 :             return pg_u_isalpha(c);
     337        4350 :         case PG_REGEX_LOCALE_WIDE:
     338             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     339        4350 :                 return iswalpha((wint_t) c);
     340             :             /* FALL THRU */
     341             :         case PG_REGEX_LOCALE_1BYTE:
     342           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     343           0 :                     isalpha((unsigned char) c));
     344           0 :         case PG_REGEX_LOCALE_WIDE_L:
     345             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     346           0 :                 return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
     347             :             /* FALL THRU */
     348             :         case PG_REGEX_LOCALE_1BYTE_L:
     349           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     350           0 :                     isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
     351             :             break;
     352       12288 :         case PG_REGEX_LOCALE_ICU:
     353             : #ifdef USE_ICU
     354       12288 :             return u_isalpha(c);
     355             : #endif
     356             :             break;
     357             :     }
     358           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     359             : }
     360             : 
     361             : static int
     362       66316 : pg_wc_isalnum(pg_wchar c)
     363             : {
     364       66316 :     switch (pg_regex_strategy)
     365             :     {
     366         762 :         case PG_REGEX_LOCALE_C:
     367        1524 :             return (c <= (pg_wchar) 127 &&
     368         762 :                     (pg_char_properties[c] & PG_ISALNUM));
     369       20476 :         case PG_REGEX_BUILTIN:
     370       20476 :             return pg_u_isalnum(c, true);
     371       32790 :         case PG_REGEX_LOCALE_WIDE:
     372             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     373       32790 :                 return iswalnum((wint_t) c);
     374             :             /* FALL THRU */
     375             :         case PG_REGEX_LOCALE_1BYTE:
     376           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     377           0 :                     isalnum((unsigned char) c));
     378           0 :         case PG_REGEX_LOCALE_WIDE_L:
     379             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     380           0 :                 return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
     381             :             /* FALL THRU */
     382             :         case PG_REGEX_LOCALE_1BYTE_L:
     383           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     384           0 :                     isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
     385             :             break;
     386       12288 :         case PG_REGEX_LOCALE_ICU:
     387             : #ifdef USE_ICU
     388       12288 :             return u_isalnum(c);
     389             : #endif
     390             :             break;
     391             :     }
     392           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     393             : }
     394             : 
     395             : static int
     396       33538 : pg_wc_isword(pg_wchar c)
     397             : {
     398             :     /* We define word characters as alnum class plus underscore */
     399       33538 :     if (c == CHR('_'))
     400          22 :         return 1;
     401       33516 :     return pg_wc_isalnum(c);
     402             : }
     403             : 
     404             : static int
     405       28688 : pg_wc_isupper(pg_wchar c)
     406             : {
     407       28688 :     switch (pg_regex_strategy)
     408             :     {
     409           0 :         case PG_REGEX_LOCALE_C:
     410           0 :             return (c <= (pg_wchar) 127 &&
     411           0 :                     (pg_char_properties[c] & PG_ISUPPER));
     412       12288 :         case PG_REGEX_BUILTIN:
     413       12288 :             return pg_u_isupper(c);
     414        4112 :         case PG_REGEX_LOCALE_WIDE:
     415             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     416        4112 :                 return iswupper((wint_t) c);
     417             :             /* FALL THRU */
     418             :         case PG_REGEX_LOCALE_1BYTE:
     419           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     420           0 :                     isupper((unsigned char) c));
     421           0 :         case PG_REGEX_LOCALE_WIDE_L:
     422             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     423           0 :                 return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
     424             :             /* FALL THRU */
     425             :         case PG_REGEX_LOCALE_1BYTE_L:
     426           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     427           0 :                     isupper_l((unsigned char) c, pg_regex_locale->info.lt));
     428             :             break;
     429       12288 :         case PG_REGEX_LOCALE_ICU:
     430             : #ifdef USE_ICU
     431       12288 :             return u_isupper(c);
     432             : #endif
     433             :             break;
     434             :     }
     435           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     436             : }
     437             : 
     438             : static int
     439       16390 : pg_wc_islower(pg_wchar c)
     440             : {
     441       16390 :     switch (pg_regex_strategy)
     442             :     {
     443           0 :         case PG_REGEX_LOCALE_C:
     444           0 :             return (c <= (pg_wchar) 127 &&
     445           0 :                     (pg_char_properties[c] & PG_ISLOWER));
     446           0 :         case PG_REGEX_BUILTIN:
     447           0 :             return pg_u_islower(c);
     448        4102 :         case PG_REGEX_LOCALE_WIDE:
     449             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     450        4102 :                 return iswlower((wint_t) c);
     451             :             /* FALL THRU */
     452             :         case PG_REGEX_LOCALE_1BYTE:
     453           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     454           0 :                     islower((unsigned char) c));
     455           0 :         case PG_REGEX_LOCALE_WIDE_L:
     456             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     457           0 :                 return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
     458             :             /* FALL THRU */
     459             :         case PG_REGEX_LOCALE_1BYTE_L:
     460           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     461           0 :                     islower_l((unsigned char) c, pg_regex_locale->info.lt));
     462             :             break;
     463       12288 :         case PG_REGEX_LOCALE_ICU:
     464             : #ifdef USE_ICU
     465       12288 :             return u_islower(c);
     466             : #endif
     467             :             break;
     468             :     }
     469           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     470             : }
     471             : 
     472             : static int
     473       16390 : pg_wc_isgraph(pg_wchar c)
     474             : {
     475       16390 :     switch (pg_regex_strategy)
     476             :     {
     477           0 :         case PG_REGEX_LOCALE_C:
     478           0 :             return (c <= (pg_wchar) 127 &&
     479           0 :                     (pg_char_properties[c] & PG_ISGRAPH));
     480           0 :         case PG_REGEX_BUILTIN:
     481           0 :             return pg_u_isgraph(c);
     482        4102 :         case PG_REGEX_LOCALE_WIDE:
     483             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     484        4102 :                 return iswgraph((wint_t) c);
     485             :             /* FALL THRU */
     486             :         case PG_REGEX_LOCALE_1BYTE:
     487           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     488           0 :                     isgraph((unsigned char) c));
     489           0 :         case PG_REGEX_LOCALE_WIDE_L:
     490             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     491           0 :                 return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
     492             :             /* FALL THRU */
     493             :         case PG_REGEX_LOCALE_1BYTE_L:
     494           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     495           0 :                     isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
     496             :             break;
     497       12288 :         case PG_REGEX_LOCALE_ICU:
     498             : #ifdef USE_ICU
     499       12288 :             return u_isgraph(c);
     500             : #endif
     501             :             break;
     502             :     }
     503           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     504             : }
     505             : 
     506             : static int
     507       16390 : pg_wc_isprint(pg_wchar c)
     508             : {
     509       16390 :     switch (pg_regex_strategy)
     510             :     {
     511           0 :         case PG_REGEX_LOCALE_C:
     512           0 :             return (c <= (pg_wchar) 127 &&
     513           0 :                     (pg_char_properties[c] & PG_ISPRINT));
     514           0 :         case PG_REGEX_BUILTIN:
     515           0 :             return pg_u_isprint(c);
     516        4102 :         case PG_REGEX_LOCALE_WIDE:
     517             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     518        4102 :                 return iswprint((wint_t) c);
     519             :             /* FALL THRU */
     520             :         case PG_REGEX_LOCALE_1BYTE:
     521           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     522           0 :                     isprint((unsigned char) c));
     523           0 :         case PG_REGEX_LOCALE_WIDE_L:
     524             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     525           0 :                 return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
     526             :             /* FALL THRU */
     527             :         case PG_REGEX_LOCALE_1BYTE_L:
     528           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     529           0 :                     isprint_l((unsigned char) c, pg_regex_locale->info.lt));
     530             :             break;
     531       12288 :         case PG_REGEX_LOCALE_ICU:
     532             : #ifdef USE_ICU
     533       12288 :             return u_isprint(c);
     534             : #endif
     535             :             break;
     536             :     }
     537           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     538             : }
     539             : 
     540             : static int
     541       28678 : pg_wc_ispunct(pg_wchar c)
     542             : {
     543       28678 :     switch (pg_regex_strategy)
     544             :     {
     545           0 :         case PG_REGEX_LOCALE_C:
     546           0 :             return (c <= (pg_wchar) 127 &&
     547           0 :                     (pg_char_properties[c] & PG_ISPUNCT));
     548       12288 :         case PG_REGEX_BUILTIN:
     549       12288 :             return pg_u_ispunct(c, true);
     550        4102 :         case PG_REGEX_LOCALE_WIDE:
     551             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     552        4102 :                 return iswpunct((wint_t) c);
     553             :             /* FALL THRU */
     554             :         case PG_REGEX_LOCALE_1BYTE:
     555           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     556           0 :                     ispunct((unsigned char) c));
     557           0 :         case PG_REGEX_LOCALE_WIDE_L:
     558             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     559           0 :                 return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
     560             :             /* FALL THRU */
     561             :         case PG_REGEX_LOCALE_1BYTE_L:
     562           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     563           0 :                     ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
     564             :             break;
     565       12288 :         case PG_REGEX_LOCALE_ICU:
     566             : #ifdef USE_ICU
     567       12288 :             return u_ispunct(c);
     568             : #endif
     569             :             break;
     570             :     }
     571           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     572             : }
     573             : 
     574             : static int
     575       76386 : pg_wc_isspace(pg_wchar c)
     576             : {
     577       76386 :     switch (pg_regex_strategy)
     578             :     {
     579           0 :         case PG_REGEX_LOCALE_C:
     580           0 :             return (c <= (pg_wchar) 127 &&
     581           0 :                     (pg_char_properties[c] & PG_ISSPACE));
     582       16398 :         case PG_REGEX_BUILTIN:
     583       16398 :             return pg_u_isspace(c);
     584       47700 :         case PG_REGEX_LOCALE_WIDE:
     585             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     586       47700 :                 return iswspace((wint_t) c);
     587             :             /* FALL THRU */
     588             :         case PG_REGEX_LOCALE_1BYTE:
     589           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     590           0 :                     isspace((unsigned char) c));
     591           0 :         case PG_REGEX_LOCALE_WIDE_L:
     592             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     593           0 :                 return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
     594             :             /* FALL THRU */
     595             :         case PG_REGEX_LOCALE_1BYTE_L:
     596           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     597           0 :                     isspace_l((unsigned char) c, pg_regex_locale->info.lt));
     598             :             break;
     599       12288 :         case PG_REGEX_LOCALE_ICU:
     600             : #ifdef USE_ICU
     601       12288 :             return u_isspace(c);
     602             : #endif
     603             :             break;
     604             :     }
     605           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     606             : }
     607             : 
     608             : static pg_wchar
     609       10546 : pg_wc_toupper(pg_wchar c)
     610             : {
     611       10546 :     switch (pg_regex_strategy)
     612             :     {
     613        1056 :         case PG_REGEX_LOCALE_C:
     614        1056 :             if (c <= (pg_wchar) 127)
     615        1056 :                 return pg_ascii_toupper((unsigned char) c);
     616           0 :             return c;
     617         372 :         case PG_REGEX_BUILTIN:
     618         372 :             return unicode_uppercase_simple(c);
     619        9010 :         case PG_REGEX_LOCALE_WIDE:
     620             :             /* force C behavior for ASCII characters, per comments above */
     621        9010 :             if (c <= (pg_wchar) 127)
     622         814 :                 return pg_ascii_toupper((unsigned char) c);
     623             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     624        8196 :                 return towupper((wint_t) c);
     625             :             /* FALL THRU */
     626             :         case PG_REGEX_LOCALE_1BYTE:
     627             :             /* force C behavior for ASCII characters, per comments above */
     628           0 :             if (c <= (pg_wchar) 127)
     629           0 :                 return pg_ascii_toupper((unsigned char) c);
     630           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     631           0 :                 return toupper((unsigned char) c);
     632           0 :             return c;
     633           0 :         case PG_REGEX_LOCALE_WIDE_L:
     634             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     635           0 :                 return towupper_l((wint_t) c, pg_regex_locale->info.lt);
     636             :             /* FALL THRU */
     637             :         case PG_REGEX_LOCALE_1BYTE_L:
     638           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     639           0 :                 return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
     640           0 :             return c;
     641         108 :         case PG_REGEX_LOCALE_ICU:
     642             : #ifdef USE_ICU
     643         108 :             return u_toupper(c);
     644             : #endif
     645             :             break;
     646             :     }
     647           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     648             : }
     649             : 
     650             : static pg_wchar
     651       10550 : pg_wc_tolower(pg_wchar c)
     652             : {
     653       10550 :     switch (pg_regex_strategy)
     654             :     {
     655        1056 :         case PG_REGEX_LOCALE_C:
     656        1056 :             if (c <= (pg_wchar) 127)
     657        1056 :                 return pg_ascii_tolower((unsigned char) c);
     658           0 :             return c;
     659         372 :         case PG_REGEX_BUILTIN:
     660         372 :             return unicode_lowercase_simple(c);
     661        9014 :         case PG_REGEX_LOCALE_WIDE:
     662             :             /* force C behavior for ASCII characters, per comments above */
     663        9014 :             if (c <= (pg_wchar) 127)
     664         818 :                 return pg_ascii_tolower((unsigned char) c);
     665             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     666        8196 :                 return towlower((wint_t) c);
     667             :             /* FALL THRU */
     668             :         case PG_REGEX_LOCALE_1BYTE:
     669             :             /* force C behavior for ASCII characters, per comments above */
     670           0 :             if (c <= (pg_wchar) 127)
     671           0 :                 return pg_ascii_tolower((unsigned char) c);
     672           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     673           0 :                 return tolower((unsigned char) c);
     674           0 :             return c;
     675           0 :         case PG_REGEX_LOCALE_WIDE_L:
     676             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     677           0 :                 return towlower_l((wint_t) c, pg_regex_locale->info.lt);
     678             :             /* FALL THRU */
     679             :         case PG_REGEX_LOCALE_1BYTE_L:
     680           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     681           0 :                 return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
     682           0 :             return c;
     683         108 :         case PG_REGEX_LOCALE_ICU:
     684             : #ifdef USE_ICU
     685         108 :             return u_tolower(c);
     686             : #endif
     687             :             break;
     688             :     }
     689           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     690             : }
     691             : 
     692             : 
     693             : /*
     694             :  * These functions cache the results of probing libc's ctype behavior for
     695             :  * all character codes of interest in a given encoding/collation.  The
     696             :  * result is provided as a "struct cvec", but notice that the representation
     697             :  * is a touch different from a cvec created by regc_cvec.c: we allocate the
     698             :  * chrs[] and ranges[] arrays separately from the struct so that we can
     699             :  * realloc them larger at need.  This is okay since the cvecs made here
     700             :  * should never be freed by freecvec().
     701             :  *
     702             :  * We use malloc not palloc since we mustn't lose control on out-of-memory;
     703             :  * the main regex code expects us to return a failure indication instead.
     704             :  */
     705             : 
     706             : typedef int (*pg_wc_probefunc) (pg_wchar c);
     707             : 
     708             : typedef struct pg_ctype_cache
     709             : {
     710             :     pg_wc_probefunc probefunc;  /* pg_wc_isalpha or a sibling */
     711             :     Oid         collation;      /* collation this entry is for */
     712             :     struct cvec cv;             /* cache entry contents */
     713             :     struct pg_ctype_cache *next;    /* chain link */
     714             : } pg_ctype_cache;
     715             : 
     716             : static pg_ctype_cache *pg_ctype_cache_list = NULL;
     717             : 
     718             : /*
     719             :  * Add a chr or range to pcc->cv; return false if run out of memory
     720             :  */
     721             : static bool
     722        8966 : store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
     723             : {
     724             :     chr        *newchrs;
     725             : 
     726        8966 :     if (nchrs > 1)
     727             :     {
     728        2728 :         if (pcc->cv.nranges >= pcc->cv.rangespace)
     729             :         {
     730           0 :             pcc->cv.rangespace *= 2;
     731           0 :             newchrs = (chr *) realloc(pcc->cv.ranges,
     732           0 :                                       pcc->cv.rangespace * sizeof(chr) * 2);
     733           0 :             if (newchrs == NULL)
     734           0 :                 return false;
     735           0 :             pcc->cv.ranges = newchrs;
     736             :         }
     737        2728 :         pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
     738        2728 :         pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
     739        2728 :         pcc->cv.nranges++;
     740             :     }
     741             :     else
     742             :     {
     743             :         assert(nchrs == 1);
     744        6238 :         if (pcc->cv.nchrs >= pcc->cv.chrspace)
     745             :         {
     746          22 :             pcc->cv.chrspace *= 2;
     747          22 :             newchrs = (chr *) realloc(pcc->cv.chrs,
     748          22 :                                       pcc->cv.chrspace * sizeof(chr));
     749          22 :             if (newchrs == NULL)
     750           0 :                 return false;
     751          22 :             pcc->cv.chrs = newchrs;
     752             :         }
     753        6238 :         pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
     754             :     }
     755        8966 :     return true;
     756             : }
     757             : 
     758             : /*
     759             :  * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
     760             :  * chrs satisfying the probe function.  The active collation is the one
     761             :  * previously set by pg_set_regex_collation.  Return NULL if out of memory.
     762             :  *
     763             :  * Note that the result must not be freed or modified by caller.
     764             :  */
     765             : static struct cvec *
     766         702 : pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
     767             : {
     768             :     pg_ctype_cache *pcc;
     769             :     pg_wchar    max_chr;
     770             :     pg_wchar    cur_chr;
     771             :     int         nmatches;
     772             :     chr        *newchrs;
     773             : 
     774             :     /*
     775             :      * Do we already have the answer cached?
     776             :      */
     777        1634 :     for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
     778             :     {
     779        1414 :         if (pcc->probefunc == probefunc &&
     780         530 :             pcc->collation == pg_regex_collation)
     781         482 :             return &pcc->cv;
     782             :     }
     783             : 
     784             :     /*
     785             :      * Nope, so initialize some workspace ...
     786             :      */
     787         220 :     pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
     788         220 :     if (pcc == NULL)
     789           0 :         return NULL;
     790         220 :     pcc->probefunc = probefunc;
     791         220 :     pcc->collation = pg_regex_collation;
     792         220 :     pcc->cv.nchrs = 0;
     793         220 :     pcc->cv.chrspace = 128;
     794         220 :     pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
     795         220 :     pcc->cv.nranges = 0;
     796         220 :     pcc->cv.rangespace = 64;
     797         220 :     pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
     798         220 :     if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
     799           0 :         goto out_of_memory;
     800         220 :     pcc->cv.cclasscode = cclasscode;
     801             : 
     802             :     /*
     803             :      * Decide how many character codes we ought to look through.  In general
     804             :      * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
     805             :      * runtime using the "high colormap" mechanism.  However, in C locale
     806             :      * there's no need to go further than 127, and if we only have a 1-byte
     807             :      * <ctype.h> API there's no need to go further than that can handle.
     808             :      *
     809             :      * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
     810             :      * output cvec as not having any locale-dependent behavior, since there
     811             :      * will be no need to do any run-time locale checks.  (The #if's here
     812             :      * would always be true for production values of MAX_SIMPLE_CHR, but it's
     813             :      * useful to allow it to be small for testing purposes.)
     814             :      */
     815         220 :     switch (pg_regex_strategy)
     816             :     {
     817          22 :         case PG_REGEX_LOCALE_C:
     818             : #if MAX_SIMPLE_CHR >= 127
     819          22 :             max_chr = (pg_wchar) 127;
     820          22 :             pcc->cv.cclasscode = -1;
     821             : #else
     822             :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     823             : #endif
     824          22 :             break;
     825          52 :         case PG_REGEX_BUILTIN:
     826          52 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     827          52 :             break;
     828          92 :         case PG_REGEX_LOCALE_WIDE:
     829             :         case PG_REGEX_LOCALE_WIDE_L:
     830          92 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     831          92 :             break;
     832           0 :         case PG_REGEX_LOCALE_1BYTE:
     833             :         case PG_REGEX_LOCALE_1BYTE_L:
     834             : #if MAX_SIMPLE_CHR >= UCHAR_MAX
     835           0 :             max_chr = (pg_wchar) UCHAR_MAX;
     836           0 :             pcc->cv.cclasscode = -1;
     837             : #else
     838             :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     839             : #endif
     840           0 :             break;
     841          54 :         case PG_REGEX_LOCALE_ICU:
     842          54 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     843          54 :             break;
     844           0 :         default:
     845             :             Assert(false);
     846           0 :             max_chr = 0;        /* can't get here, but keep compiler quiet */
     847           0 :             break;
     848             :     }
     849             : 
     850             :     /*
     851             :      * And scan 'em ...
     852             :      */
     853         220 :     nmatches = 0;               /* number of consecutive matches */
     854             : 
     855      408540 :     for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
     856             :     {
     857      408320 :         if ((*probefunc) (cur_chr))
     858      106678 :             nmatches++;
     859      301642 :         else if (nmatches > 0)
     860             :         {
     861        8942 :             if (!store_match(pcc, cur_chr - nmatches, nmatches))
     862           0 :                 goto out_of_memory;
     863        8942 :             nmatches = 0;
     864             :         }
     865             :     }
     866             : 
     867         220 :     if (nmatches > 0)
     868          24 :         if (!store_match(pcc, cur_chr - nmatches, nmatches))
     869           0 :             goto out_of_memory;
     870             : 
     871             :     /*
     872             :      * We might have allocated more memory than needed, if so free it
     873             :      */
     874         220 :     if (pcc->cv.nchrs == 0)
     875             :     {
     876          86 :         free(pcc->cv.chrs);
     877          86 :         pcc->cv.chrs = NULL;
     878          86 :         pcc->cv.chrspace = 0;
     879             :     }
     880         134 :     else if (pcc->cv.nchrs < pcc->cv.chrspace)
     881             :     {
     882         134 :         newchrs = (chr *) realloc(pcc->cv.chrs,
     883         134 :                                   pcc->cv.nchrs * sizeof(chr));
     884         134 :         if (newchrs == NULL)
     885           0 :             goto out_of_memory;
     886         134 :         pcc->cv.chrs = newchrs;
     887         134 :         pcc->cv.chrspace = pcc->cv.nchrs;
     888             :     }
     889         220 :     if (pcc->cv.nranges == 0)
     890             :     {
     891           0 :         free(pcc->cv.ranges);
     892           0 :         pcc->cv.ranges = NULL;
     893           0 :         pcc->cv.rangespace = 0;
     894             :     }
     895         220 :     else if (pcc->cv.nranges < pcc->cv.rangespace)
     896             :     {
     897         220 :         newchrs = (chr *) realloc(pcc->cv.ranges,
     898         220 :                                   pcc->cv.nranges * sizeof(chr) * 2);
     899         220 :         if (newchrs == NULL)
     900           0 :             goto out_of_memory;
     901         220 :         pcc->cv.ranges = newchrs;
     902         220 :         pcc->cv.rangespace = pcc->cv.nranges;
     903             :     }
     904             : 
     905             :     /*
     906             :      * Success, link it into cache chain
     907             :      */
     908         220 :     pcc->next = pg_ctype_cache_list;
     909         220 :     pg_ctype_cache_list = pcc;
     910             : 
     911         220 :     return &pcc->cv;
     912             : 
     913             :     /*
     914             :      * Failure, clean up
     915             :      */
     916           0 : out_of_memory:
     917           0 :     free(pcc->cv.chrs);
     918           0 :     free(pcc->cv.ranges);
     919           0 :     free(pcc);
     920             : 
     921           0 :     return NULL;
     922             : }

Generated by: LCOV version 1.14