LCOV - code coverage report
Current view: top level - src/backend/regex - regc_locale.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 156 172 90.7 %
Date: 2021-12-09 04:09:06 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * regc_locale.c --
       3             :  *
       4             :  *  This file contains locale-specific regexp routines.
       5             :  *  This file is #included by regcomp.c.
       6             :  *
       7             :  * Copyright (c) 1998 by Scriptics Corporation.
       8             :  *
       9             :  * This software is copyrighted by the Regents of the University of
      10             :  * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
      11             :  * Corporation and other parties.  The following terms apply to all files
      12             :  * associated with the software unless explicitly disclaimed in
      13             :  * individual files.
      14             :  *
      15             :  * The authors hereby grant permission to use, copy, modify, distribute,
      16             :  * and license this software and its documentation for any purpose, provided
      17             :  * that existing copyright notices are retained in all copies and that this
      18             :  * notice is included verbatim in any distributions. No written agreement,
      19             :  * license, or royalty fee is required for any of the authorized uses.
      20             :  * Modifications to this software may be copyrighted by their authors
      21             :  * and need not follow the licensing terms described here, provided that
      22             :  * the new terms are clearly indicated on the first page of each file where
      23             :  * they apply.
      24             :  *
      25             :  * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
      26             :  * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
      27             :  * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
      28             :  * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
      29             :  * POSSIBILITY OF SUCH DAMAGE.
      30             :  *
      31             :  * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
      32             :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
      33             :  * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
      34             :  * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
      35             :  * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
      36             :  * MODIFICATIONS.
      37             :  *
      38             :  * GOVERNMENT USE: If you are acquiring this software on behalf of the
      39             :  * U.S. government, the Government shall have only "Restricted Rights"
      40             :  * in the software and related documentation as defined in the Federal
      41             :  * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
      42             :  * are acquiring the software on behalf of the Department of Defense, the
      43             :  * software shall be classified as "Commercial Computer Software" and the
      44             :  * Government shall have only "Restricted Rights" as defined in Clause
      45             :  * 252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
      46             :  * authors grant the U.S. Government and others acting in its behalf
      47             :  * permission to use and distribute the software in accordance with the
      48             :  * terms specified in this license.
      49             :  *
      50             :  * src/backend/regex/regc_locale.c
      51             :  */
      52             : 
      53             : /* ASCII character-name table */
      54             : 
      55             : static const struct cname
      56             : {
      57             :     const char *name;
      58             :     const char  code;
      59             : }           cnames[] =
      60             : 
      61             : {
      62             :     {
      63             :         "NUL", '\0'
      64             :     },
      65             :     {
      66             :         "SOH", '\001'
      67             :     },
      68             :     {
      69             :         "STX", '\002'
      70             :     },
      71             :     {
      72             :         "ETX", '\003'
      73             :     },
      74             :     {
      75             :         "EOT", '\004'
      76             :     },
      77             :     {
      78             :         "ENQ", '\005'
      79             :     },
      80             :     {
      81             :         "ACK", '\006'
      82             :     },
      83             :     {
      84             :         "BEL", '\007'
      85             :     },
      86             :     {
      87             :         "alert", '\007'
      88             :     },
      89             :     {
      90             :         "BS", '\010'
      91             :     },
      92             :     {
      93             :         "backspace", '\b'
      94             :     },
      95             :     {
      96             :         "HT", '\011'
      97             :     },
      98             :     {
      99             :         "tab", '\t'
     100             :     },
     101             :     {
     102             :         "LF", '\012'
     103             :     },
     104             :     {
     105             :         "newline", '\n'
     106             :     },
     107             :     {
     108             :         "VT", '\013'
     109             :     },
     110             :     {
     111             :         "vertical-tab", '\v'
     112             :     },
     113             :     {
     114             :         "FF", '\014'
     115             :     },
     116             :     {
     117             :         "form-feed", '\f'
     118             :     },
     119             :     {
     120             :         "CR", '\015'
     121             :     },
     122             :     {
     123             :         "carriage-return", '\r'
     124             :     },
     125             :     {
     126             :         "SO", '\016'
     127             :     },
     128             :     {
     129             :         "SI", '\017'
     130             :     },
     131             :     {
     132             :         "DLE", '\020'
     133             :     },
     134             :     {
     135             :         "DC1", '\021'
     136             :     },
     137             :     {
     138             :         "DC2", '\022'
     139             :     },
     140             :     {
     141             :         "DC3", '\023'
     142             :     },
     143             :     {
     144             :         "DC4", '\024'
     145             :     },
     146             :     {
     147             :         "NAK", '\025'
     148             :     },
     149             :     {
     150             :         "SYN", '\026'
     151             :     },
     152             :     {
     153             :         "ETB", '\027'
     154             :     },
     155             :     {
     156             :         "CAN", '\030'
     157             :     },
     158             :     {
     159             :         "EM", '\031'
     160             :     },
     161             :     {
     162             :         "SUB", '\032'
     163             :     },
     164             :     {
     165             :         "ESC", '\033'
     166             :     },
     167             :     {
     168             :         "IS4", '\034'
     169             :     },
     170             :     {
     171             :         "FS", '\034'
     172             :     },
     173             :     {
     174             :         "IS3", '\035'
     175             :     },
     176             :     {
     177             :         "GS", '\035'
     178             :     },
     179             :     {
     180             :         "IS2", '\036'
     181             :     },
     182             :     {
     183             :         "RS", '\036'
     184             :     },
     185             :     {
     186             :         "IS1", '\037'
     187             :     },
     188             :     {
     189             :         "US", '\037'
     190             :     },
     191             :     {
     192             :         "space", ' '
     193             :     },
     194             :     {
     195             :         "exclamation-mark", '!'
     196             :     },
     197             :     {
     198             :         "quotation-mark", '"'
     199             :     },
     200             :     {
     201             :         "number-sign", '#'
     202             :     },
     203             :     {
     204             :         "dollar-sign", '$'
     205             :     },
     206             :     {
     207             :         "percent-sign", '%'
     208             :     },
     209             :     {
     210             :         "ampersand", '&'
     211             :     },
     212             :     {
     213             :         "apostrophe", '\''
     214             :     },
     215             :     {
     216             :         "left-parenthesis", '('
     217             :     },
     218             :     {
     219             :         "right-parenthesis", ')'
     220             :     },
     221             :     {
     222             :         "asterisk", '*'
     223             :     },
     224             :     {
     225             :         "plus-sign", '+'
     226             :     },
     227             :     {
     228             :         "comma", ','
     229             :     },
     230             :     {
     231             :         "hyphen", '-'
     232             :     },
     233             :     {
     234             :         "hyphen-minus", '-'
     235             :     },
     236             :     {
     237             :         "period", '.'
     238             :     },
     239             :     {
     240             :         "full-stop", '.'
     241             :     },
     242             :     {
     243             :         "slash", '/'
     244             :     },
     245             :     {
     246             :         "solidus", '/'
     247             :     },
     248             :     {
     249             :         "zero", '0'
     250             :     },
     251             :     {
     252             :         "one", '1'
     253             :     },
     254             :     {
     255             :         "two", '2'
     256             :     },
     257             :     {
     258             :         "three", '3'
     259             :     },
     260             :     {
     261             :         "four", '4'
     262             :     },
     263             :     {
     264             :         "five", '5'
     265             :     },
     266             :     {
     267             :         "six", '6'
     268             :     },
     269             :     {
     270             :         "seven", '7'
     271             :     },
     272             :     {
     273             :         "eight", '8'
     274             :     },
     275             :     {
     276             :         "nine", '9'
     277             :     },
     278             :     {
     279             :         "colon", ':'
     280             :     },
     281             :     {
     282             :         "semicolon", ';'
     283             :     },
     284             :     {
     285             :         "less-than-sign", '<'
     286             :     },
     287             :     {
     288             :         "equals-sign", '='
     289             :     },
     290             :     {
     291             :         "greater-than-sign", '>'
     292             :     },
     293             :     {
     294             :         "question-mark", '?'
     295             :     },
     296             :     {
     297             :         "commercial-at", '@'
     298             :     },
     299             :     {
     300             :         "left-square-bracket", '['
     301             :     },
     302             :     {
     303             :         "backslash", '\\'
     304             :     },
     305             :     {
     306             :         "reverse-solidus", '\\'
     307             :     },
     308             :     {
     309             :         "right-square-bracket", ']'
     310             :     },
     311             :     {
     312             :         "circumflex", '^'
     313             :     },
     314             :     {
     315             :         "circumflex-accent", '^'
     316             :     },
     317             :     {
     318             :         "underscore", '_'
     319             :     },
     320             :     {
     321             :         "low-line", '_'
     322             :     },
     323             :     {
     324             :         "grave-accent", '`'
     325             :     },
     326             :     {
     327             :         "left-brace", '{'
     328             :     },
     329             :     {
     330             :         "left-curly-bracket", '{'
     331             :     },
     332             :     {
     333             :         "vertical-line", '|'
     334             :     },
     335             :     {
     336             :         "right-brace", '}'
     337             :     },
     338             :     {
     339             :         "right-curly-bracket", '}'
     340             :     },
     341             :     {
     342             :         "tilde", '~'
     343             :     },
     344             :     {
     345             :         "DEL", '\177'
     346             :     },
     347             :     {
     348             :         NULL, 0
     349             :     }
     350             : };
     351             : 
     352             : /*
     353             :  * The following array defines the valid character class names.
     354             :  * The entries must match enum char_classes in regguts.h.
     355             :  */
     356             : static const char *const classNames[NUM_CCLASSES + 1] = {
     357             :     "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
     358             :     "lower", "print", "punct", "space", "upper", "xdigit", "word",
     359             :     NULL
     360             : };
     361             : 
     362             : /*
     363             :  * We do not use the hard-wired Unicode classification tables that Tcl does.
     364             :  * This is because (a) we need to deal with other encodings besides Unicode,
     365             :  * and (b) we want to track the behavior of the libc locale routines as
     366             :  * closely as possible.  For example, it wouldn't be unreasonable for a
     367             :  * locale to not consider every Unicode letter as a letter.  So we build
     368             :  * character classification cvecs by asking libc, even for Unicode.
     369             :  */
     370             : 
     371             : 
     372             : /*
     373             :  * element - map collating-element name to chr
     374             :  */
     375             : static chr
     376          48 : element(struct vars *v,         /* context */
     377             :         const chr *startp,      /* points to start of name */
     378             :         const chr *endp)        /* points just past end of name */
     379             : {
     380             :     const struct cname *cn;
     381             :     size_t      len;
     382             : 
     383             :     /* generic:  one-chr names stand for themselves */
     384             :     assert(startp < endp);
     385          48 :     len = endp - startp;
     386          48 :     if (len == 1)
     387          28 :         return *startp;
     388             : 
     389          20 :     NOTE(REG_ULOCALE);
     390             : 
     391             :     /* search table */
     392        1360 :     for (cn = cnames; cn->name != NULL; cn++)
     393             :     {
     394        1506 :         if (strlen(cn->name) == len &&
     395         154 :             pg_char_and_wchar_strncmp(cn->name, startp, len) == 0)
     396             :         {
     397          12 :             break;              /* NOTE BREAK OUT */
     398             :         }
     399             :     }
     400          20 :     if (cn->name != NULL)
     401          12 :         return CHR(cn->code);
     402             : 
     403             :     /* couldn't find it */
     404           8 :     ERR(REG_ECOLLATE);
     405           8 :     return 0;
     406             : }
     407             : 
     408             : /*
     409             :  * range - supply cvec for a range, including legality check
     410             :  */
     411             : static struct cvec *
     412         910 : range(struct vars *v,           /* context */
     413             :       chr a,                    /* range start */
     414             :       chr b,                    /* range end, might equal a */
     415             :       int cases)                /* case-independent? */
     416             : {
     417             :     int         nchrs;
     418             :     struct cvec *cv;
     419             :     chr         c,
     420             :                 cc;
     421             : 
     422         910 :     if (a != b && !before(a, b))
     423             :     {
     424           4 :         ERR(REG_ERANGE);
     425           4 :         return NULL;
     426             :     }
     427             : 
     428         906 :     if (!cases)
     429             :     {                           /* easy version */
     430         874 :         cv = getcvec(v, 0, 1);
     431         874 :         NOERRN();
     432         874 :         addrange(cv, a, b);
     433         874 :         return cv;
     434             :     }
     435             : 
     436             :     /*
     437             :      * When case-independent, it's hard to decide when cvec ranges are usable,
     438             :      * so for now at least, we won't try.  We use a range for the originally
     439             :      * specified chrs and then add on any case-equivalents that are outside
     440             :      * that range as individual chrs.
     441             :      *
     442             :      * To ensure sane behavior if someone specifies a very large range, limit
     443             :      * the allocation size to 100000 chrs (arbitrary) and check for overrun
     444             :      * inside the loop below.
     445             :      */
     446          32 :     nchrs = b - a + 1;
     447          32 :     if (nchrs <= 0 || nchrs > 100000)
     448           0 :         nchrs = 100000;
     449             : 
     450          32 :     cv = getcvec(v, nchrs, 1);
     451          32 :     NOERRN();
     452          32 :     addrange(cv, a, b);
     453             : 
     454        8814 :     for (c = a; c <= b; c++)
     455             :     {
     456        8782 :         cc = pg_wc_tolower(c);
     457        9766 :         if (cc != c &&
     458        1966 :             (before(cc, a) || before(b, cc)))
     459             :         {
     460         442 :             if (cv->nchrs >= cv->chrspace)
     461             :             {
     462           0 :                 ERR(REG_ETOOBIG);
     463           0 :                 return NULL;
     464             :             }
     465         442 :             addchr(cv, cc);
     466             :         }
     467        8782 :         cc = pg_wc_toupper(c);
     468        9740 :         if (cc != c &&
     469        1510 :             (before(cc, a) || before(b, cc)))
     470             :         {
     471         414 :             if (cv->nchrs >= cv->chrspace)
     472             :             {
     473           0 :                 ERR(REG_ETOOBIG);
     474           0 :                 return NULL;
     475             :             }
     476         414 :             addchr(cv, cc);
     477             :         }
     478        8782 :         if (CANCEL_REQUESTED(v->re))
     479             :         {
     480           0 :             ERR(REG_CANCEL);
     481           0 :             return NULL;
     482             :         }
     483             :     }
     484             : 
     485          32 :     return cv;
     486             : }
     487             : 
     488             : /*
     489             :  * before - is chr x before chr y, for purposes of range legality?
     490             :  */
     491             : static int                      /* predicate */
     492        4366 : before(chr x, chr y)
     493             : {
     494        4366 :     if (x < y)
     495        1742 :         return 1;
     496        2624 :     return 0;
     497             : }
     498             : 
     499             : /*
     500             :  * eclass - supply cvec for an equivalence class
     501             :  * Must include case counterparts on request.
     502             :  */
     503             : static struct cvec *
     504          20 : eclass(struct vars *v,          /* context */
     505             :        chr c,                   /* Collating element representing the
     506             :                                  * equivalence class. */
     507             :        int cases)               /* all cases? */
     508             : {
     509             :     struct cvec *cv;
     510             : 
     511             :     /* crude fake equivalence class for testing */
     512          20 :     if ((v->cflags & REG_FAKE) && c == 'x')
     513             :     {
     514          12 :         cv = getcvec(v, 4, 0);
     515          12 :         addchr(cv, CHR('x'));
     516          12 :         addchr(cv, CHR('y'));
     517          12 :         if (cases)
     518             :         {
     519           0 :             addchr(cv, CHR('X'));
     520           0 :             addchr(cv, CHR('Y'));
     521             :         }
     522          12 :         return cv;
     523             :     }
     524             : 
     525             :     /* otherwise, none */
     526           8 :     if (cases)
     527           4 :         return allcases(v, c);
     528           4 :     cv = getcvec(v, 1, 0);
     529             :     assert(cv != NULL);
     530           4 :     addchr(cv, c);
     531           4 :     return cv;
     532             : }
     533             : 
     534             : /*
     535             :  * lookupcclass - lookup a character class identified by name
     536             :  *
     537             :  * On failure, sets an error code in *v; the result is then garbage.
     538             :  */
     539             : static enum char_classes
     540         154 : lookupcclass(struct vars *v,    /* context (for returning errors) */
     541             :              const chr *startp, /* where the name starts */
     542             :              const chr *endp)   /* just past the end of the name */
     543             : {
     544             :     size_t      len;
     545             :     const char *const *namePtr;
     546             :     int         i;
     547             : 
     548             :     /*
     549             :      * Map the name to the corresponding enumerated value.
     550             :      */
     551         154 :     len = endp - startp;
     552         922 :     for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
     553             :     {
     554        1642 :         if (strlen(*namePtr) == len &&
     555         728 :             pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0)
     556         146 :             return (enum char_classes) i;
     557             :     }
     558             : 
     559           8 :     ERR(REG_ECTYPE);
     560           8 :     return (enum char_classes) 0;
     561             : }
     562             : 
     563             : /*
     564             :  * cclasscvec - supply cvec for a character class
     565             :  *
     566             :  * Must include case counterparts if "cases" is true.
     567             :  *
     568             :  * The returned cvec might be either a transient cvec gotten from getcvec(),
     569             :  * or a permanently cached one from pg_ctype_get_cache().  This is okay
     570             :  * because callers are not supposed to explicitly free the result either way.
     571             :  */
     572             : static struct cvec *
     573         596 : cclasscvec(struct vars *v,      /* context */
     574             :            enum char_classes cclasscode,    /* class to build a cvec for */
     575             :            int cases)           /* case-independent? */
     576             : {
     577         596 :     struct cvec *cv = NULL;
     578             : 
     579             :     /*
     580             :      * Remap lower and upper to alpha if the match is case insensitive.
     581             :      */
     582             : 
     583         596 :     if (cases &&
     584          18 :         (cclasscode == CC_LOWER ||
     585             :          cclasscode == CC_UPPER))
     586           2 :         cclasscode = CC_ALPHA;
     587             : 
     588             :     /*
     589             :      * Now compute the character class contents.  For classes that are based
     590             :      * on the behavior of a <wctype.h> or <ctype.h> function, we use
     591             :      * pg_ctype_get_cache so that we can cache the results.  Other classes
     592             :      * have definitions that are hard-wired here, and for those we just
     593             :      * construct a transient cvec on the fly.
     594             :      *
     595             :      * NB: keep this code in sync with cclass_column_index(), below.
     596             :      */
     597             : 
     598         596 :     switch (cclasscode)
     599             :     {
     600           2 :         case CC_PRINT:
     601           2 :             cv = pg_ctype_get_cache(pg_wc_isprint, cclasscode);
     602           2 :             break;
     603          10 :         case CC_ALNUM:
     604          10 :             cv = pg_ctype_get_cache(pg_wc_isalnum, cclasscode);
     605          10 :             break;
     606           4 :         case CC_ALPHA:
     607           4 :             cv = pg_ctype_get_cache(pg_wc_isalpha, cclasscode);
     608           4 :             break;
     609         230 :         case CC_WORD:
     610         230 :             cv = pg_ctype_get_cache(pg_wc_isword, cclasscode);
     611         230 :             break;
     612           2 :         case CC_ASCII:
     613             :             /* hard-wired meaning */
     614           2 :             cv = getcvec(v, 0, 1);
     615           2 :             if (cv)
     616           2 :                 addrange(cv, 0, 0x7f);
     617           2 :             break;
     618          70 :         case CC_BLANK:
     619             :             /* hard-wired meaning */
     620          70 :             cv = getcvec(v, 2, 0);
     621          70 :             addchr(cv, '\t');
     622          70 :             addchr(cv, ' ');
     623          70 :             break;
     624           2 :         case CC_CNTRL:
     625             :             /* hard-wired meaning */
     626           2 :             cv = getcvec(v, 0, 2);
     627           2 :             addrange(cv, 0x0, 0x1f);
     628           2 :             addrange(cv, 0x7f, 0x9f);
     629           2 :             break;
     630         182 :         case CC_DIGIT:
     631         182 :             cv = pg_ctype_get_cache(pg_wc_isdigit, cclasscode);
     632         182 :             break;
     633           2 :         case CC_PUNCT:
     634           2 :             cv = pg_ctype_get_cache(pg_wc_ispunct, cclasscode);
     635           2 :             break;
     636           4 :         case CC_XDIGIT:
     637             : 
     638             :             /*
     639             :              * It's not clear how to define this in non-western locales, and
     640             :              * even less clear that there's any particular use in trying. So
     641             :              * just hard-wire the meaning.
     642             :              */
     643           4 :             cv = getcvec(v, 0, 3);
     644           4 :             if (cv)
     645             :             {
     646           4 :                 addrange(cv, '0', '9');
     647           4 :                 addrange(cv, 'a', 'f');
     648           4 :                 addrange(cv, 'A', 'F');
     649             :             }
     650           4 :             break;
     651          78 :         case CC_SPACE:
     652          78 :             cv = pg_ctype_get_cache(pg_wc_isspace, cclasscode);
     653          78 :             break;
     654           2 :         case CC_LOWER:
     655           2 :             cv = pg_ctype_get_cache(pg_wc_islower, cclasscode);
     656           2 :             break;
     657           6 :         case CC_UPPER:
     658           6 :             cv = pg_ctype_get_cache(pg_wc_isupper, cclasscode);
     659           6 :             break;
     660           2 :         case CC_GRAPH:
     661           2 :             cv = pg_ctype_get_cache(pg_wc_isgraph, cclasscode);
     662           2 :             break;
     663             :     }
     664             : 
     665             :     /* If cv is NULL now, the reason must be "out of memory" */
     666         596 :     if (cv == NULL)
     667           0 :         ERR(REG_ESPACE);
     668         596 :     return cv;
     669             : }
     670             : 
     671             : /*
     672             :  * cclass_column_index - get appropriate high colormap column index for chr
     673             :  */
     674             : static int
     675          64 : cclass_column_index(struct colormap *cm, chr c)
     676             : {
     677          64 :     int         colnum = 0;
     678             : 
     679             :     /* Shouldn't go through all these pushups for simple chrs */
     680             :     assert(c > MAX_SIMPLE_CHR);
     681             : 
     682             :     /*
     683             :      * Note: we should not see requests to consider cclasses that are not
     684             :      * treated as locale-specific by cclasscvec(), above.
     685             :      */
     686          64 :     if (cm->classbits[CC_PRINT] && pg_wc_isprint(c))
     687           6 :         colnum |= cm->classbits[CC_PRINT];
     688          64 :     if (cm->classbits[CC_ALNUM] && pg_wc_isalnum(c))
     689          20 :         colnum |= cm->classbits[CC_ALNUM];
     690          64 :     if (cm->classbits[CC_ALPHA] && pg_wc_isalpha(c))
     691          10 :         colnum |= cm->classbits[CC_ALPHA];
     692          64 :     if (cm->classbits[CC_WORD] && pg_wc_isword(c))
     693           2 :         colnum |= cm->classbits[CC_WORD];
     694             :     assert(cm->classbits[CC_ASCII] == 0);
     695             :     assert(cm->classbits[CC_BLANK] == 0);
     696             :     assert(cm->classbits[CC_CNTRL] == 0);
     697          64 :     if (cm->classbits[CC_DIGIT] && pg_wc_isdigit(c))
     698           0 :         colnum |= cm->classbits[CC_DIGIT];
     699          64 :     if (cm->classbits[CC_PUNCT] && pg_wc_ispunct(c))
     700           0 :         colnum |= cm->classbits[CC_PUNCT];
     701             :     assert(cm->classbits[CC_XDIGIT] == 0);
     702          64 :     if (cm->classbits[CC_SPACE] && pg_wc_isspace(c))
     703           0 :         colnum |= cm->classbits[CC_SPACE];
     704          64 :     if (cm->classbits[CC_LOWER] && pg_wc_islower(c))
     705           0 :         colnum |= cm->classbits[CC_LOWER];
     706          64 :     if (cm->classbits[CC_UPPER] && pg_wc_isupper(c))
     707           0 :         colnum |= cm->classbits[CC_UPPER];
     708          64 :     if (cm->classbits[CC_GRAPH] && pg_wc_isgraph(c))
     709           6 :         colnum |= cm->classbits[CC_GRAPH];
     710             : 
     711          64 :     return colnum;
     712             : }
     713             : 
     714             : /*
     715             :  * allcases - supply cvec for all case counterparts of a chr (including itself)
     716             :  *
     717             :  * This is a shortcut, preferably an efficient one, for simple characters;
     718             :  * messy cases are done via range().
     719             :  */
     720             : static struct cvec *
     721        1138 : allcases(struct vars *v,        /* context */
     722             :          chr c)                 /* character to get case equivs of */
     723             : {
     724             :     struct cvec *cv;
     725             :     chr         lc,
     726             :                 uc;
     727             : 
     728        1138 :     lc = pg_wc_tolower(c);
     729        1138 :     uc = pg_wc_toupper(c);
     730             : 
     731        1138 :     cv = getcvec(v, 2, 0);
     732        1138 :     addchr(cv, lc);
     733        1138 :     if (lc != uc)
     734         936 :         addchr(cv, uc);
     735        1138 :     return cv;
     736             : }
     737             : 
     738             : /*
     739             :  * cmp - chr-substring compare
     740             :  *
     741             :  * Backrefs need this.  It should preferably be efficient.
     742             :  * Note that it does not need to report anything except equal/unequal.
     743             :  * Note also that the length is exact, and the comparison should not
     744             :  * stop at embedded NULs!
     745             :  */
     746             : static int                      /* 0 for equal, nonzero for unequal */
     747         944 : cmp(const chr *x, const chr *y, /* strings to compare */
     748             :     size_t len)                 /* exact length of comparison */
     749             : {
     750         944 :     return memcmp(VS(x), VS(y), len * sizeof(chr));
     751             : }
     752             : 
     753             : /*
     754             :  * casecmp - case-independent chr-substring compare
     755             :  *
     756             :  * REG_ICASE backrefs need this.  It should preferably be efficient.
     757             :  * Note that it does not need to report anything except equal/unequal.
     758             :  * Note also that the length is exact, and the comparison should not
     759             :  * stop at embedded NULs!
     760             :  */
     761             : static int                      /* 0 for equal, nonzero for unequal */
     762           2 : casecmp(const chr *x, const chr *y, /* strings to compare */
     763             :         size_t len)             /* exact length of comparison */
     764             : {
     765           4 :     for (; len > 0; len--, x++, y++)
     766             :     {
     767           2 :         if ((*x != *y) && (pg_wc_tolower(*x) != pg_wc_tolower(*y)))
     768           0 :             return 1;
     769             :     }
     770           2 :     return 0;
     771             : }

Generated by: LCOV version 1.14