LCOV - code coverage report
Current view: top level - src/backend/regex - regc_pg_locale.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 101 261 38.7 %
Date: 2019-11-13 22:07:24 Functions: 9 14 64.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * regc_pg_locale.c
       4             :  *    ctype functions adapted to work on pg_wchar (a/k/a chr),
       5             :  *    and functions to cache the results of wholesale ctype probing.
       6             :  *
       7             :  * This file is #included by regcomp.c; it's not meant to compile standalone.
       8             :  *
       9             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/regex/regc_pg_locale.c
      14             :  *
      15             :  *-------------------------------------------------------------------------
      16             :  */
      17             : 
      18             : #include "catalog/pg_collation.h"
      19             : #include "utils/pg_locale.h"
      20             : 
      21             : /*
      22             :  * To provide as much functionality as possible on a variety of platforms,
      23             :  * without going so far as to implement everything from scratch, we use
      24             :  * several implementation strategies depending on the situation:
      25             :  *
      26             :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      27             :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      28             :  * collations don't give a fig about multibyte characters.
      29             :  *
      30             :  * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
      31             :  *
      32             :  * 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
      33             :  * This assumes that every platform uses Unicode codepoints directly
      34             :  * as the wchar_t representation of Unicode.  On some platforms
      35             :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      36             :  *
      37             :  * 2b. In all other encodings, we use the <ctype.h> functions for pg_wchar
      38             :  * values up to 255, and punt for values above that.  This is 100% correct
      39             :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      40             :  * multibyte encodings are mostly Far Eastern character sets for which the
      41             :  * properties being tested here aren't very relevant for higher code values
      42             :  * anyway.  The difficulty with using the <wctype.h> functions with
      43             :  * non-Unicode multibyte encodings is that we can have no certainty that
      44             :  * the platform's wchar_t representation matches what we do in pg_wchar
      45             :  * conversions.
      46             :  *
      47             :  * 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
      48             :  * Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
      49             :  * functions, under exactly the same cases as #2.
      50             :  *
      51             :  * There is one notable difference between cases 2 and 3: in the "default"
      52             :  * collation we force ASCII letters to follow ASCII upcase/downcase rules,
      53             :  * while in a non-default collation we just let the library functions do what
      54             :  * they will.  The case where this matters is treatment of I/i in Turkish,
      55             :  * and the behavior is meant to match the upper()/lower() SQL functions.
      56             :  *
      57             :  * We store the active collation setting in static variables.  In principle
      58             :  * it could be passed down to here via the regex library's "struct vars" data
      59             :  * structure; but that would require somewhat invasive changes in the regex
      60             :  * library, and right now there's no real benefit to be gained from that.
      61             :  *
      62             :  * NB: the coding here assumes pg_wchar is an unsigned type.
      63             :  */
      64             : 
      65             : typedef enum
      66             : {
      67             :     PG_REGEX_LOCALE_C,          /* C locale (encoding independent) */
      68             :     PG_REGEX_LOCALE_WIDE,       /* Use <wctype.h> functions */
      69             :     PG_REGEX_LOCALE_1BYTE,      /* Use <ctype.h> functions */
      70             :     PG_REGEX_LOCALE_WIDE_L,     /* Use locale_t <wctype.h> functions */
      71             :     PG_REGEX_LOCALE_1BYTE_L,    /* Use locale_t <ctype.h> functions */
      72             :     PG_REGEX_LOCALE_ICU         /* Use ICU uchar.h functions */
      73             : } PG_Locale_Strategy;
      74             : 
      75             : static PG_Locale_Strategy pg_regex_strategy;
      76             : static pg_locale_t pg_regex_locale;
      77             : static Oid  pg_regex_collation;
      78             : 
      79             : /*
      80             :  * Hard-wired character properties for C locale
      81             :  */
      82             : #define PG_ISDIGIT  0x01
      83             : #define PG_ISALPHA  0x02
      84             : #define PG_ISALNUM  (PG_ISDIGIT | PG_ISALPHA)
      85             : #define PG_ISUPPER  0x04
      86             : #define PG_ISLOWER  0x08
      87             : #define PG_ISGRAPH  0x10
      88             : #define PG_ISPRINT  0x20
      89             : #define PG_ISPUNCT  0x40
      90             : #define PG_ISSPACE  0x80
      91             : 
      92             : static const unsigned char pg_char_properties[128] = {
      93             :      /* NUL */ 0,
      94             :      /* ^A */ 0,
      95             :      /* ^B */ 0,
      96             :      /* ^C */ 0,
      97             :      /* ^D */ 0,
      98             :      /* ^E */ 0,
      99             :      /* ^F */ 0,
     100             :      /* ^G */ 0,
     101             :      /* ^H */ 0,
     102             :      /* ^I */ PG_ISSPACE,
     103             :      /* ^J */ PG_ISSPACE,
     104             :      /* ^K */ PG_ISSPACE,
     105             :      /* ^L */ PG_ISSPACE,
     106             :      /* ^M */ PG_ISSPACE,
     107             :      /* ^N */ 0,
     108             :      /* ^O */ 0,
     109             :      /* ^P */ 0,
     110             :      /* ^Q */ 0,
     111             :      /* ^R */ 0,
     112             :      /* ^S */ 0,
     113             :      /* ^T */ 0,
     114             :      /* ^U */ 0,
     115             :      /* ^V */ 0,
     116             :      /* ^W */ 0,
     117             :      /* ^X */ 0,
     118             :      /* ^Y */ 0,
     119             :      /* ^Z */ 0,
     120             :      /* ^[ */ 0,
     121             :      /* ^\ */ 0,
     122             :      /* ^] */ 0,
     123             :      /* ^^ */ 0,
     124             :      /* ^_ */ 0,
     125             :      /* */ PG_ISPRINT | PG_ISSPACE,
     126             :      /* !  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     127             :      /* "  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     128             :      /* #  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     129             :      /* $  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     130             :      /* %  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     131             :      /* &  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     132             :      /* '  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     133             :      /* (  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     134             :      /* )  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     135             :      /* *  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     136             :      /* +  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     137             :      /* ,  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     138             :      /* -  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     139             :      /* .  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     140             :      /* /  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     141             :      /* 0  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     142             :      /* 1  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     143             :      /* 2  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     144             :      /* 3  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     145             :      /* 4  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     146             :      /* 5  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     147             :      /* 6  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     148             :      /* 7  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     149             :      /* 8  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     150             :      /* 9  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     151             :      /* :  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     152             :      /* ;  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     153             :      /* <  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     154             :      /* =  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     155             :      /* >  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     156             :      /* ?  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     157             :      /* @  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     158             :      /* A  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     159             :      /* B  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     160             :      /* C  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     161             :      /* D  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     162             :      /* E  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     163             :      /* F  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     164             :      /* G  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     165             :      /* H  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     166             :      /* I  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     167             :      /* J  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     168             :      /* K  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     169             :      /* L  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     170             :      /* M  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     171             :      /* N  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     172             :      /* O  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     173             :      /* P  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     174             :      /* Q  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     175             :      /* R  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     176             :      /* S  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     177             :      /* T  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     178             :      /* U  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     179             :      /* V  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     180             :      /* W  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     181             :      /* X  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     182             :      /* Y  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     183             :      /* Z  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     184             :      /* [  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     185             :      /* \  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     186             :      /* ]  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     187             :      /* ^  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     188             :      /* _  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     189             :      /* `  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     190             :      /* a  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     191             :      /* b  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     192             :      /* c  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     193             :      /* d  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     194             :      /* e  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     195             :      /* f  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     196             :      /* g  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     197             :      /* h  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     198             :      /* i  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     199             :      /* j  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     200             :      /* k  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     201             :      /* l  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     202             :      /* m  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     203             :      /* n  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     204             :      /* o  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     205             :      /* p  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     206             :      /* q  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     207             :      /* r  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     208             :      /* s  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     209             :      /* t  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     210             :      /* u  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     211             :      /* v  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     212             :      /* w  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     213             :      /* x  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     214             :      /* y  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     215             :      /* z  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     216             :      /* {  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     217             :      /* |  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     218             :      /* }  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     219             :      /* ~  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     220             :      /* DEL */ 0
     221             : };
     222             : 
     223             : 
     224             : /*
     225             :  * pg_set_regex_collation: set collation for these functions to obey
     226             :  *
     227             :  * This is called when beginning compilation or execution of a regexp.
     228             :  * Since there's no need for reentrancy of regexp operations, it's okay
     229             :  * to store the results in static variables.
     230             :  */
     231             : void
     232     1417390 : pg_set_regex_collation(Oid collation)
     233             : {
     234     1417390 :     if (lc_ctype_is_c(collation))
     235             :     {
     236             :         /* C/POSIX collations use this path regardless of database encoding */
     237       57642 :         pg_regex_strategy = PG_REGEX_LOCALE_C;
     238       57642 :         pg_regex_locale = 0;
     239       57642 :         pg_regex_collation = C_COLLATION_OID;
     240             :     }
     241             :     else
     242             :     {
     243     1359748 :         if (collation == DEFAULT_COLLATION_OID)
     244     1359748 :             pg_regex_locale = 0;
     245           0 :         else if (OidIsValid(collation))
     246             :         {
     247             :             /*
     248             :              * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T;
     249             :              * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not
     250             :              * have to be considered below.
     251             :              */
     252           0 :             pg_regex_locale = pg_newlocale_from_collation(collation);
     253             :         }
     254             :         else
     255             :         {
     256             :             /*
     257             :              * This typically means that the parser could not resolve a
     258             :              * conflict of implicit collations, so report it that way.
     259             :              */
     260           0 :             ereport(ERROR,
     261             :                     (errcode(ERRCODE_INDETERMINATE_COLLATION),
     262             :                      errmsg("could not determine which collation to use for regular expression"),
     263             :                      errhint("Use the COLLATE clause to set the collation explicitly.")));
     264             :         }
     265             : 
     266     1359748 :         if (pg_regex_locale && !pg_regex_locale->deterministic)
     267           0 :             ereport(ERROR,
     268             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     269             :                      errmsg("nondeterministic collations are not supported for regular expressions")));
     270             : 
     271             : #ifdef USE_ICU
     272             :         if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
     273             :             pg_regex_strategy = PG_REGEX_LOCALE_ICU;
     274             :         else
     275             : #endif
     276     1359748 :         if (GetDatabaseEncoding() == PG_UTF8)
     277             :         {
     278     1359748 :             if (pg_regex_locale)
     279           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
     280             :             else
     281     1359748 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
     282             :         }
     283             :         else
     284             :         {
     285           0 :             if (pg_regex_locale)
     286           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
     287             :             else
     288           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
     289             :         }
     290             : 
     291     1359748 :         pg_regex_collation = collation;
     292             :     }
     293     1417390 : }
     294             : 
     295             : static int
     296       30230 : pg_wc_isdigit(pg_wchar c)
     297             : {
     298       30230 :     switch (pg_regex_strategy)
     299             :     {
     300             :         case PG_REGEX_LOCALE_C:
     301        2740 :             return (c <= (pg_wchar) 127 &&
     302        1370 :                     (pg_char_properties[c] & PG_ISDIGIT));
     303             :         case PG_REGEX_LOCALE_WIDE:
     304             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     305       28860 :                 return iswdigit((wint_t) c);
     306             :             /* FALL THRU */
     307             :         case PG_REGEX_LOCALE_1BYTE:
     308           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     309           0 :                     isdigit((unsigned char) c));
     310             :         case PG_REGEX_LOCALE_WIDE_L:
     311             : #ifdef HAVE_LOCALE_T
     312             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     313           0 :                 return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
     314             : #endif
     315             :             /* FALL THRU */
     316             :         case PG_REGEX_LOCALE_1BYTE_L:
     317             : #ifdef HAVE_LOCALE_T
     318           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     319           0 :                     isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
     320             : #endif
     321             :             break;
     322             :         case PG_REGEX_LOCALE_ICU:
     323             : #ifdef USE_ICU
     324             :             return u_isdigit(c);
     325             : #endif
     326           0 :             break;
     327             :     }
     328           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     329             : }
     330             : 
     331             : static int
     332          36 : pg_wc_isalpha(pg_wchar c)
     333             : {
     334          36 :     switch (pg_regex_strategy)
     335             :     {
     336             :         case PG_REGEX_LOCALE_C:
     337           0 :             return (c <= (pg_wchar) 127 &&
     338           0 :                     (pg_char_properties[c] & PG_ISALPHA));
     339             :         case PG_REGEX_LOCALE_WIDE:
     340             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     341          36 :                 return iswalpha((wint_t) c);
     342             :             /* FALL THRU */
     343             :         case PG_REGEX_LOCALE_1BYTE:
     344           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     345           0 :                     isalpha((unsigned char) c));
     346             :         case PG_REGEX_LOCALE_WIDE_L:
     347             : #ifdef HAVE_LOCALE_T
     348             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     349           0 :                 return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
     350             : #endif
     351             :             /* FALL THRU */
     352             :         case PG_REGEX_LOCALE_1BYTE_L:
     353             : #ifdef HAVE_LOCALE_T
     354           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     355           0 :                     isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
     356             : #endif
     357             :             break;
     358             :         case PG_REGEX_LOCALE_ICU:
     359             : #ifdef USE_ICU
     360             :             return u_isalpha(c);
     361             : #endif
     362           0 :             break;
     363             :     }
     364           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     365             : }
     366             : 
     367             : static int
     368        9394 : pg_wc_isalnum(pg_wchar c)
     369             : {
     370        9394 :     switch (pg_regex_strategy)
     371             :     {
     372             :         case PG_REGEX_LOCALE_C:
     373        2040 :             return (c <= (pg_wchar) 127 &&
     374        1020 :                     (pg_char_properties[c] & PG_ISALNUM));
     375             :         case PG_REGEX_LOCALE_WIDE:
     376             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     377        8374 :                 return iswalnum((wint_t) c);
     378             :             /* FALL THRU */
     379             :         case PG_REGEX_LOCALE_1BYTE:
     380           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     381           0 :                     isalnum((unsigned char) c));
     382             :         case PG_REGEX_LOCALE_WIDE_L:
     383             : #ifdef HAVE_LOCALE_T
     384             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     385           0 :                 return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
     386             : #endif
     387             :             /* FALL THRU */
     388             :         case PG_REGEX_LOCALE_1BYTE_L:
     389             : #ifdef HAVE_LOCALE_T
     390           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     391           0 :                     isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
     392             : #endif
     393             :             break;
     394             :         case PG_REGEX_LOCALE_ICU:
     395             : #ifdef USE_ICU
     396             :             return u_isalnum(c);
     397             : #endif
     398           0 :             break;
     399             :     }
     400           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     401             : }
     402             : 
     403             : static int
     404           0 : pg_wc_isupper(pg_wchar c)
     405             : {
     406           0 :     switch (pg_regex_strategy)
     407             :     {
     408             :         case PG_REGEX_LOCALE_C:
     409           0 :             return (c <= (pg_wchar) 127 &&
     410           0 :                     (pg_char_properties[c] & PG_ISUPPER));
     411             :         case PG_REGEX_LOCALE_WIDE:
     412             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     413           0 :                 return iswupper((wint_t) c);
     414             :             /* FALL THRU */
     415             :         case PG_REGEX_LOCALE_1BYTE:
     416           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     417           0 :                     isupper((unsigned char) c));
     418             :         case PG_REGEX_LOCALE_WIDE_L:
     419             : #ifdef HAVE_LOCALE_T
     420             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     421           0 :                 return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
     422             : #endif
     423             :             /* FALL THRU */
     424             :         case PG_REGEX_LOCALE_1BYTE_L:
     425             : #ifdef HAVE_LOCALE_T
     426           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     427           0 :                     isupper_l((unsigned char) c, pg_regex_locale->info.lt));
     428             : #endif
     429             :             break;
     430             :         case PG_REGEX_LOCALE_ICU:
     431             : #ifdef USE_ICU
     432             :             return u_isupper(c);
     433             : #endif
     434           0 :             break;
     435             :     }
     436           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     437             : }
     438             : 
     439             : static int
     440           0 : pg_wc_islower(pg_wchar c)
     441             : {
     442           0 :     switch (pg_regex_strategy)
     443             :     {
     444             :         case PG_REGEX_LOCALE_C:
     445           0 :             return (c <= (pg_wchar) 127 &&
     446           0 :                     (pg_char_properties[c] & PG_ISLOWER));
     447             :         case PG_REGEX_LOCALE_WIDE:
     448             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     449           0 :                 return iswlower((wint_t) c);
     450             :             /* FALL THRU */
     451             :         case PG_REGEX_LOCALE_1BYTE:
     452           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     453           0 :                     islower((unsigned char) c));
     454             :         case PG_REGEX_LOCALE_WIDE_L:
     455             : #ifdef HAVE_LOCALE_T
     456             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     457           0 :                 return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
     458             : #endif
     459             :             /* FALL THRU */
     460             :         case PG_REGEX_LOCALE_1BYTE_L:
     461             : #ifdef HAVE_LOCALE_T
     462           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     463           0 :                     islower_l((unsigned char) c, pg_regex_locale->info.lt));
     464             : #endif
     465             :             break;
     466             :         case PG_REGEX_LOCALE_ICU:
     467             : #ifdef USE_ICU
     468             :             return u_islower(c);
     469             : #endif
     470           0 :             break;
     471             :     }
     472           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     473             : }
     474             : 
     475             : static int
     476           0 : pg_wc_isgraph(pg_wchar c)
     477             : {
     478           0 :     switch (pg_regex_strategy)
     479             :     {
     480             :         case PG_REGEX_LOCALE_C:
     481           0 :             return (c <= (pg_wchar) 127 &&
     482           0 :                     (pg_char_properties[c] & PG_ISGRAPH));
     483             :         case PG_REGEX_LOCALE_WIDE:
     484             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     485           0 :                 return iswgraph((wint_t) c);
     486             :             /* FALL THRU */
     487             :         case PG_REGEX_LOCALE_1BYTE:
     488           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     489           0 :                     isgraph((unsigned char) c));
     490             :         case PG_REGEX_LOCALE_WIDE_L:
     491             : #ifdef HAVE_LOCALE_T
     492             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     493           0 :                 return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
     494             : #endif
     495             :             /* FALL THRU */
     496             :         case PG_REGEX_LOCALE_1BYTE_L:
     497             : #ifdef HAVE_LOCALE_T
     498           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     499           0 :                     isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
     500             : #endif
     501             :             break;
     502             :         case PG_REGEX_LOCALE_ICU:
     503             : #ifdef USE_ICU
     504             :             return u_isgraph(c);
     505             : #endif
     506           0 :             break;
     507             :     }
     508           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     509             : }
     510             : 
     511             : static int
     512           0 : pg_wc_isprint(pg_wchar c)
     513             : {
     514           0 :     switch (pg_regex_strategy)
     515             :     {
     516             :         case PG_REGEX_LOCALE_C:
     517           0 :             return (c <= (pg_wchar) 127 &&
     518           0 :                     (pg_char_properties[c] & PG_ISPRINT));
     519             :         case PG_REGEX_LOCALE_WIDE:
     520             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     521           0 :                 return iswprint((wint_t) c);
     522             :             /* FALL THRU */
     523             :         case PG_REGEX_LOCALE_1BYTE:
     524           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     525           0 :                     isprint((unsigned char) c));
     526             :         case PG_REGEX_LOCALE_WIDE_L:
     527             : #ifdef HAVE_LOCALE_T
     528             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     529           0 :                 return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
     530             : #endif
     531             :             /* FALL THRU */
     532             :         case PG_REGEX_LOCALE_1BYTE_L:
     533             : #ifdef HAVE_LOCALE_T
     534           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     535           0 :                     isprint_l((unsigned char) c, pg_regex_locale->info.lt));
     536             : #endif
     537             :             break;
     538             :         case PG_REGEX_LOCALE_ICU:
     539             : #ifdef USE_ICU
     540             :             return u_isprint(c);
     541             : #endif
     542           0 :             break;
     543             :     }
     544           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     545             : }
     546             : 
     547             : static int
     548           0 : pg_wc_ispunct(pg_wchar c)
     549             : {
     550           0 :     switch (pg_regex_strategy)
     551             :     {
     552             :         case PG_REGEX_LOCALE_C:
     553           0 :             return (c <= (pg_wchar) 127 &&
     554           0 :                     (pg_char_properties[c] & PG_ISPUNCT));
     555             :         case PG_REGEX_LOCALE_WIDE:
     556             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     557           0 :                 return iswpunct((wint_t) c);
     558             :             /* FALL THRU */
     559             :         case PG_REGEX_LOCALE_1BYTE:
     560           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     561           0 :                     ispunct((unsigned char) c));
     562             :         case PG_REGEX_LOCALE_WIDE_L:
     563             : #ifdef HAVE_LOCALE_T
     564             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     565           0 :                 return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
     566             : #endif
     567             :             /* FALL THRU */
     568             :         case PG_REGEX_LOCALE_1BYTE_L:
     569             : #ifdef HAVE_LOCALE_T
     570           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     571           0 :                     ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
     572             : #endif
     573             :             break;
     574             :         case PG_REGEX_LOCALE_ICU:
     575             : #ifdef USE_ICU
     576             :             return u_ispunct(c);
     577             : #endif
     578           0 :             break;
     579             :     }
     580           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     581             : }
     582             : 
     583             : static int
     584       28672 : pg_wc_isspace(pg_wchar c)
     585             : {
     586       28672 :     switch (pg_regex_strategy)
     587             :     {
     588             :         case PG_REGEX_LOCALE_C:
     589           0 :             return (c <= (pg_wchar) 127 &&
     590           0 :                     (pg_char_properties[c] & PG_ISSPACE));
     591             :         case PG_REGEX_LOCALE_WIDE:
     592             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     593       28672 :                 return iswspace((wint_t) c);
     594             :             /* FALL THRU */
     595             :         case PG_REGEX_LOCALE_1BYTE:
     596           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     597           0 :                     isspace((unsigned char) c));
     598             :         case PG_REGEX_LOCALE_WIDE_L:
     599             : #ifdef HAVE_LOCALE_T
     600             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     601           0 :                 return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
     602             : #endif
     603             :             /* FALL THRU */
     604             :         case PG_REGEX_LOCALE_1BYTE_L:
     605             : #ifdef HAVE_LOCALE_T
     606           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     607           0 :                     isspace_l((unsigned char) c, pg_regex_locale->info.lt));
     608             : #endif
     609             :             break;
     610             :         case PG_REGEX_LOCALE_ICU:
     611             : #ifdef USE_ICU
     612             :             return u_isspace(c);
     613             : #endif
     614           0 :             break;
     615             :     }
     616           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     617             : }
     618             : 
     619             : static pg_wchar
     620        1280 : pg_wc_toupper(pg_wchar c)
     621             : {
     622        1280 :     switch (pg_regex_strategy)
     623             :     {
     624             :         case PG_REGEX_LOCALE_C:
     625        1000 :             if (c <= (pg_wchar) 127)
     626        1000 :                 return pg_ascii_toupper((unsigned char) c);
     627           0 :             return c;
     628             :         case PG_REGEX_LOCALE_WIDE:
     629             :             /* force C behavior for ASCII characters, per comments above */
     630         280 :             if (c <= (pg_wchar) 127)
     631         280 :                 return pg_ascii_toupper((unsigned char) c);
     632             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     633           0 :                 return towupper((wint_t) c);
     634             :             /* FALL THRU */
     635             :         case PG_REGEX_LOCALE_1BYTE:
     636             :             /* force C behavior for ASCII characters, per comments above */
     637           0 :             if (c <= (pg_wchar) 127)
     638           0 :                 return pg_ascii_toupper((unsigned char) c);
     639           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     640           0 :                 return toupper((unsigned char) c);
     641           0 :             return c;
     642             :         case PG_REGEX_LOCALE_WIDE_L:
     643             : #ifdef HAVE_LOCALE_T
     644             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     645           0 :                 return towupper_l((wint_t) c, pg_regex_locale->info.lt);
     646             : #endif
     647             :             /* FALL THRU */
     648             :         case PG_REGEX_LOCALE_1BYTE_L:
     649             : #ifdef HAVE_LOCALE_T
     650           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     651           0 :                 return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
     652             : #endif
     653           0 :             return c;
     654             :         case PG_REGEX_LOCALE_ICU:
     655             : #ifdef USE_ICU
     656             :             return u_toupper(c);
     657             : #endif
     658           0 :             break;
     659             :     }
     660           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     661             : }
     662             : 
     663             : static pg_wchar
     664        1280 : pg_wc_tolower(pg_wchar c)
     665             : {
     666        1280 :     switch (pg_regex_strategy)
     667             :     {
     668             :         case PG_REGEX_LOCALE_C:
     669        1000 :             if (c <= (pg_wchar) 127)
     670        1000 :                 return pg_ascii_tolower((unsigned char) c);
     671           0 :             return c;
     672             :         case PG_REGEX_LOCALE_WIDE:
     673             :             /* force C behavior for ASCII characters, per comments above */
     674         280 :             if (c <= (pg_wchar) 127)
     675         280 :                 return pg_ascii_tolower((unsigned char) c);
     676             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     677           0 :                 return towlower((wint_t) c);
     678             :             /* FALL THRU */
     679             :         case PG_REGEX_LOCALE_1BYTE:
     680             :             /* force C behavior for ASCII characters, per comments above */
     681           0 :             if (c <= (pg_wchar) 127)
     682           0 :                 return pg_ascii_tolower((unsigned char) c);
     683           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     684           0 :                 return tolower((unsigned char) c);
     685           0 :             return c;
     686             :         case PG_REGEX_LOCALE_WIDE_L:
     687             : #ifdef HAVE_LOCALE_T
     688             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     689           0 :                 return towlower_l((wint_t) c, pg_regex_locale->info.lt);
     690             : #endif
     691             :             /* FALL THRU */
     692             :         case PG_REGEX_LOCALE_1BYTE_L:
     693             : #ifdef HAVE_LOCALE_T
     694           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     695           0 :                 return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
     696             : #endif
     697           0 :             return c;
     698             :         case PG_REGEX_LOCALE_ICU:
     699             : #ifdef USE_ICU
     700             :             return u_tolower(c);
     701             : #endif
     702           0 :             break;
     703             :     }
     704           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     705             : }
     706             : 
     707             : 
     708             : /*
     709             :  * These functions cache the results of probing libc's ctype behavior for
     710             :  * all character codes of interest in a given encoding/collation.  The
     711             :  * result is provided as a "struct cvec", but notice that the representation
     712             :  * is a touch different from a cvec created by regc_cvec.c: we allocate the
     713             :  * chrs[] and ranges[] arrays separately from the struct so that we can
     714             :  * realloc them larger at need.  This is okay since the cvecs made here
     715             :  * should never be freed by freecvec().
     716             :  *
     717             :  * We use malloc not palloc since we mustn't lose control on out-of-memory;
     718             :  * the main regex code expects us to return a failure indication instead.
     719             :  */
     720             : 
     721             : typedef int (*pg_wc_probefunc) (pg_wchar c);
     722             : 
     723             : typedef struct pg_ctype_cache
     724             : {
     725             :     pg_wc_probefunc probefunc;  /* pg_wc_isalpha or a sibling */
     726             :     Oid         collation;      /* collation this entry is for */
     727             :     struct cvec cv;             /* cache entry contents */
     728             :     struct pg_ctype_cache *next;    /* chain link */
     729             : } pg_ctype_cache;
     730             : 
     731             : static pg_ctype_cache *pg_ctype_cache_list = NULL;
     732             : 
     733             : /*
     734             :  * Add a chr or range to pcc->cv; return false if run out of memory
     735             :  */
     736             : static bool
     737         256 : store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
     738             : {
     739             :     chr        *newchrs;
     740             : 
     741         256 :     if (nchrs > 1)
     742             :     {
     743         186 :         if (pcc->cv.nranges >= pcc->cv.rangespace)
     744             :         {
     745           0 :             pcc->cv.rangespace *= 2;
     746           0 :             newchrs = (chr *) realloc(pcc->cv.ranges,
     747           0 :                                       pcc->cv.rangespace * sizeof(chr) * 2);
     748           0 :             if (newchrs == NULL)
     749           0 :                 return false;
     750           0 :             pcc->cv.ranges = newchrs;
     751             :         }
     752         186 :         pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
     753         186 :         pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
     754         186 :         pcc->cv.nranges++;
     755             :     }
     756             :     else
     757             :     {
     758             :         assert(nchrs == 1);
     759          70 :         if (pcc->cv.nchrs >= pcc->cv.chrspace)
     760             :         {
     761           0 :             pcc->cv.chrspace *= 2;
     762           0 :             newchrs = (chr *) realloc(pcc->cv.chrs,
     763           0 :                                       pcc->cv.chrspace * sizeof(chr));
     764           0 :             if (newchrs == NULL)
     765           0 :                 return false;
     766           0 :             pcc->cv.chrs = newchrs;
     767             :         }
     768          70 :         pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
     769             :     }
     770         256 :     return true;
     771             : }
     772             : 
     773             : /*
     774             :  * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
     775             :  * chrs satisfying the probe function.  The active collation is the one
     776             :  * previously set by pg_set_regex_collation.  Return NULL if out of memory.
     777             :  *
     778             :  * Note that the result must not be freed or modified by caller.
     779             :  */
     780             : static struct cvec *
     781         120 : pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
     782             : {
     783             :     pg_ctype_cache *pcc;
     784             :     pg_wchar    max_chr;
     785             :     pg_wchar    cur_chr;
     786             :     int         nmatches;
     787             :     chr        *newchrs;
     788             : 
     789             :     /*
     790             :      * Do we already have the answer cached?
     791             :      */
     792         156 :     for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
     793             :     {
     794         216 :         if (pcc->probefunc == probefunc &&
     795         106 :             pcc->collation == pg_regex_collation)
     796          74 :             return &pcc->cv;
     797             :     }
     798             : 
     799             :     /*
     800             :      * Nope, so initialize some workspace ...
     801             :      */
     802          46 :     pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
     803          46 :     if (pcc == NULL)
     804           0 :         return NULL;
     805          46 :     pcc->probefunc = probefunc;
     806          46 :     pcc->collation = pg_regex_collation;
     807          46 :     pcc->cv.nchrs = 0;
     808          46 :     pcc->cv.chrspace = 128;
     809          46 :     pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
     810          46 :     pcc->cv.nranges = 0;
     811          46 :     pcc->cv.rangespace = 64;
     812          46 :     pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
     813          46 :     if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
     814             :         goto out_of_memory;
     815          46 :     pcc->cv.cclasscode = cclasscode;
     816             : 
     817             :     /*
     818             :      * Decide how many character codes we ought to look through.  In general
     819             :      * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
     820             :      * runtime using the "high colormap" mechanism.  However, in C locale
     821             :      * there's no need to go further than 127, and if we only have a 1-byte
     822             :      * <ctype.h> API there's no need to go further than that can handle.
     823             :      *
     824             :      * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
     825             :      * output cvec as not having any locale-dependent behavior, since there
     826             :      * will be no need to do any run-time locale checks.  (The #if's here
     827             :      * would always be true for production values of MAX_SIMPLE_CHR, but it's
     828             :      * useful to allow it to be small for testing purposes.)
     829             :      */
     830          46 :     switch (pg_regex_strategy)
     831             :     {
     832             :         case PG_REGEX_LOCALE_C:
     833             : #if MAX_SIMPLE_CHR >= 127
     834          14 :             max_chr = (pg_wchar) 127;
     835          14 :             pcc->cv.cclasscode = -1;
     836             : #else
     837             :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     838             : #endif
     839          14 :             break;
     840             :         case PG_REGEX_LOCALE_WIDE:
     841             :         case PG_REGEX_LOCALE_WIDE_L:
     842          32 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     843          32 :             break;
     844             :         case PG_REGEX_LOCALE_1BYTE:
     845             :         case PG_REGEX_LOCALE_1BYTE_L:
     846             : #if MAX_SIMPLE_CHR >= UCHAR_MAX
     847           0 :             max_chr = (pg_wchar) UCHAR_MAX;
     848           0 :             pcc->cv.cclasscode = -1;
     849             : #else
     850             :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     851             : #endif
     852           0 :             break;
     853             :         case PG_REGEX_LOCALE_ICU:
     854           0 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     855           0 :             break;
     856             :         default:
     857           0 :             max_chr = 0;        /* can't get here, but keep compiler quiet */
     858           0 :             break;
     859             :     }
     860             : 
     861             :     /*
     862             :      * And scan 'em ...
     863             :      */
     864          46 :     nmatches = 0;               /* number of consecutive matches */
     865             : 
     866       67374 :     for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
     867             :     {
     868       67328 :         if ((*probefunc) (cur_chr))
     869        6828 :             nmatches++;
     870       60500 :         else if (nmatches > 0)
     871             :         {
     872         256 :             if (!store_match(pcc, cur_chr - nmatches, nmatches))
     873           0 :                 goto out_of_memory;
     874         256 :             nmatches = 0;
     875             :         }
     876             :     }
     877             : 
     878          46 :     if (nmatches > 0)
     879           0 :         if (!store_match(pcc, cur_chr - nmatches, nmatches))
     880           0 :             goto out_of_memory;
     881             : 
     882             :     /*
     883             :      * We might have allocated more memory than needed, if so free it
     884             :      */
     885          46 :     if (pcc->cv.nchrs == 0)
     886             :     {
     887          28 :         free(pcc->cv.chrs);
     888          28 :         pcc->cv.chrs = NULL;
     889          28 :         pcc->cv.chrspace = 0;
     890             :     }
     891          18 :     else if (pcc->cv.nchrs < pcc->cv.chrspace)
     892             :     {
     893          18 :         newchrs = (chr *) realloc(pcc->cv.chrs,
     894          18 :                                   pcc->cv.nchrs * sizeof(chr));
     895          18 :         if (newchrs == NULL)
     896           0 :             goto out_of_memory;
     897          18 :         pcc->cv.chrs = newchrs;
     898          18 :         pcc->cv.chrspace = pcc->cv.nchrs;
     899             :     }
     900          46 :     if (pcc->cv.nranges == 0)
     901             :     {
     902           0 :         free(pcc->cv.ranges);
     903           0 :         pcc->cv.ranges = NULL;
     904           0 :         pcc->cv.rangespace = 0;
     905             :     }
     906          46 :     else if (pcc->cv.nranges < pcc->cv.rangespace)
     907             :     {
     908          46 :         newchrs = (chr *) realloc(pcc->cv.ranges,
     909          46 :                                   pcc->cv.nranges * sizeof(chr) * 2);
     910          46 :         if (newchrs == NULL)
     911           0 :             goto out_of_memory;
     912          46 :         pcc->cv.ranges = newchrs;
     913          46 :         pcc->cv.rangespace = pcc->cv.nranges;
     914             :     }
     915             : 
     916             :     /*
     917             :      * Success, link it into cache chain
     918             :      */
     919          46 :     pcc->next = pg_ctype_cache_list;
     920          46 :     pg_ctype_cache_list = pcc;
     921             : 
     922          46 :     return &pcc->cv;
     923             : 
     924             :     /*
     925             :      * Failure, clean up
     926             :      */
     927             : out_of_memory:
     928           0 :     if (pcc->cv.chrs)
     929           0 :         free(pcc->cv.chrs);
     930           0 :     if (pcc->cv.ranges)
     931           0 :         free(pcc->cv.ranges);
     932           0 :     free(pcc);
     933             : 
     934           0 :     return NULL;
     935             : }

Generated by: LCOV version 1.13