LCOV - code coverage report
Current view: top level - src/backend/regex - regc_locale.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 92.4 % 170 157
Test Date: 2026-03-02 08:16:13 Functions: 100.0 % 10 10
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * regc_locale.c --
       3              :  *
       4              :  *  This file contains locale-specific regexp routines.
       5              :  *  This file is #included by regcomp.c.
       6              :  *
       7              :  * Copyright (c) 1998 by Scriptics Corporation.
       8              :  *
       9              :  * This software is copyrighted by the Regents of the University of
      10              :  * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
      11              :  * Corporation and other parties.  The following terms apply to all files
      12              :  * associated with the software unless explicitly disclaimed in
      13              :  * individual files.
      14              :  *
      15              :  * The authors hereby grant permission to use, copy, modify, distribute,
      16              :  * and license this software and its documentation for any purpose, provided
      17              :  * that existing copyright notices are retained in all copies and that this
      18              :  * notice is included verbatim in any distributions. No written agreement,
      19              :  * license, or royalty fee is required for any of the authorized uses.
      20              :  * Modifications to this software may be copyrighted by their authors
      21              :  * and need not follow the licensing terms described here, provided that
      22              :  * the new terms are clearly indicated on the first page of each file where
      23              :  * they apply.
      24              :  *
      25              :  * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
      26              :  * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
      27              :  * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
      28              :  * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
      29              :  * POSSIBILITY OF SUCH DAMAGE.
      30              :  *
      31              :  * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
      32              :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
      33              :  * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
      34              :  * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
      35              :  * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
      36              :  * MODIFICATIONS.
      37              :  *
      38              :  * GOVERNMENT USE: If you are acquiring this software on behalf of the
      39              :  * U.S. government, the Government shall have only "Restricted Rights"
      40              :  * in the software and related documentation as defined in the Federal
      41              :  * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
      42              :  * are acquiring the software on behalf of the Department of Defense, the
      43              :  * software shall be classified as "Commercial Computer Software" and the
      44              :  * Government shall have only "Restricted Rights" as defined in Clause
      45              :  * 252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
      46              :  * authors grant the U.S. Government and others acting in its behalf
      47              :  * permission to use and distribute the software in accordance with the
      48              :  * terms specified in this license.
      49              :  *
      50              :  * src/backend/regex/regc_locale.c
      51              :  */
      52              : 
      53              : /* ASCII character-name table */
      54              : 
      55              : static const struct cname
      56              : {
      57              :     const char *name;
      58              :     const char  code;
      59              : }           cnames[] =
      60              : 
      61              : {
      62              :     {
      63              :         "NUL", '\0'
      64              :     },
      65              :     {
      66              :         "SOH", '\001'
      67              :     },
      68              :     {
      69              :         "STX", '\002'
      70              :     },
      71              :     {
      72              :         "ETX", '\003'
      73              :     },
      74              :     {
      75              :         "EOT", '\004'
      76              :     },
      77              :     {
      78              :         "ENQ", '\005'
      79              :     },
      80              :     {
      81              :         "ACK", '\006'
      82              :     },
      83              :     {
      84              :         "BEL", '\007'
      85              :     },
      86              :     {
      87              :         "alert", '\007'
      88              :     },
      89              :     {
      90              :         "BS", '\010'
      91              :     },
      92              :     {
      93              :         "backspace", '\b'
      94              :     },
      95              :     {
      96              :         "HT", '\011'
      97              :     },
      98              :     {
      99              :         "tab", '\t'
     100              :     },
     101              :     {
     102              :         "LF", '\012'
     103              :     },
     104              :     {
     105              :         "newline", '\n'
     106              :     },
     107              :     {
     108              :         "VT", '\013'
     109              :     },
     110              :     {
     111              :         "vertical-tab", '\v'
     112              :     },
     113              :     {
     114              :         "FF", '\014'
     115              :     },
     116              :     {
     117              :         "form-feed", '\f'
     118              :     },
     119              :     {
     120              :         "CR", '\015'
     121              :     },
     122              :     {
     123              :         "carriage-return", '\r'
     124              :     },
     125              :     {
     126              :         "SO", '\016'
     127              :     },
     128              :     {
     129              :         "SI", '\017'
     130              :     },
     131              :     {
     132              :         "DLE", '\020'
     133              :     },
     134              :     {
     135              :         "DC1", '\021'
     136              :     },
     137              :     {
     138              :         "DC2", '\022'
     139              :     },
     140              :     {
     141              :         "DC3", '\023'
     142              :     },
     143              :     {
     144              :         "DC4", '\024'
     145              :     },
     146              :     {
     147              :         "NAK", '\025'
     148              :     },
     149              :     {
     150              :         "SYN", '\026'
     151              :     },
     152              :     {
     153              :         "ETB", '\027'
     154              :     },
     155              :     {
     156              :         "CAN", '\030'
     157              :     },
     158              :     {
     159              :         "EM", '\031'
     160              :     },
     161              :     {
     162              :         "SUB", '\032'
     163              :     },
     164              :     {
     165              :         "ESC", '\033'
     166              :     },
     167              :     {
     168              :         "IS4", '\034'
     169              :     },
     170              :     {
     171              :         "FS", '\034'
     172              :     },
     173              :     {
     174              :         "IS3", '\035'
     175              :     },
     176              :     {
     177              :         "GS", '\035'
     178              :     },
     179              :     {
     180              :         "IS2", '\036'
     181              :     },
     182              :     {
     183              :         "RS", '\036'
     184              :     },
     185              :     {
     186              :         "IS1", '\037'
     187              :     },
     188              :     {
     189              :         "US", '\037'
     190              :     },
     191              :     {
     192              :         "space", ' '
     193              :     },
     194              :     {
     195              :         "exclamation-mark", '!'
     196              :     },
     197              :     {
     198              :         "quotation-mark", '"'
     199              :     },
     200              :     {
     201              :         "number-sign", '#'
     202              :     },
     203              :     {
     204              :         "dollar-sign", '$'
     205              :     },
     206              :     {
     207              :         "percent-sign", '%'
     208              :     },
     209              :     {
     210              :         "ampersand", '&'
     211              :     },
     212              :     {
     213              :         "apostrophe", '\''
     214              :     },
     215              :     {
     216              :         "left-parenthesis", '('
     217              :     },
     218              :     {
     219              :         "right-parenthesis", ')'
     220              :     },
     221              :     {
     222              :         "asterisk", '*'
     223              :     },
     224              :     {
     225              :         "plus-sign", '+'
     226              :     },
     227              :     {
     228              :         "comma", ','
     229              :     },
     230              :     {
     231              :         "hyphen", '-'
     232              :     },
     233              :     {
     234              :         "hyphen-minus", '-'
     235              :     },
     236              :     {
     237              :         "period", '.'
     238              :     },
     239              :     {
     240              :         "full-stop", '.'
     241              :     },
     242              :     {
     243              :         "slash", '/'
     244              :     },
     245              :     {
     246              :         "solidus", '/'
     247              :     },
     248              :     {
     249              :         "zero", '0'
     250              :     },
     251              :     {
     252              :         "one", '1'
     253              :     },
     254              :     {
     255              :         "two", '2'
     256              :     },
     257              :     {
     258              :         "three", '3'
     259              :     },
     260              :     {
     261              :         "four", '4'
     262              :     },
     263              :     {
     264              :         "five", '5'
     265              :     },
     266              :     {
     267              :         "six", '6'
     268              :     },
     269              :     {
     270              :         "seven", '7'
     271              :     },
     272              :     {
     273              :         "eight", '8'
     274              :     },
     275              :     {
     276              :         "nine", '9'
     277              :     },
     278              :     {
     279              :         "colon", ':'
     280              :     },
     281              :     {
     282              :         "semicolon", ';'
     283              :     },
     284              :     {
     285              :         "less-than-sign", '<'
     286              :     },
     287              :     {
     288              :         "equals-sign", '='
     289              :     },
     290              :     {
     291              :         "greater-than-sign", '>'
     292              :     },
     293              :     {
     294              :         "question-mark", '?'
     295              :     },
     296              :     {
     297              :         "commercial-at", '@'
     298              :     },
     299              :     {
     300              :         "left-square-bracket", '['
     301              :     },
     302              :     {
     303              :         "backslash", '\\'
     304              :     },
     305              :     {
     306              :         "reverse-solidus", '\\'
     307              :     },
     308              :     {
     309              :         "right-square-bracket", ']'
     310              :     },
     311              :     {
     312              :         "circumflex", '^'
     313              :     },
     314              :     {
     315              :         "circumflex-accent", '^'
     316              :     },
     317              :     {
     318              :         "underscore", '_'
     319              :     },
     320              :     {
     321              :         "low-line", '_'
     322              :     },
     323              :     {
     324              :         "grave-accent", '`'
     325              :     },
     326              :     {
     327              :         "left-brace", '{'
     328              :     },
     329              :     {
     330              :         "left-curly-bracket", '{'
     331              :     },
     332              :     {
     333              :         "vertical-line", '|'
     334              :     },
     335              :     {
     336              :         "right-brace", '}'
     337              :     },
     338              :     {
     339              :         "right-curly-bracket", '}'
     340              :     },
     341              :     {
     342              :         "tilde", '~'
     343              :     },
     344              :     {
     345              :         "DEL", '\177'
     346              :     },
     347              :     {
     348              :         NULL, 0
     349              :     }
     350              : };
     351              : 
     352              : /*
     353              :  * The following array defines the valid character class names.
     354              :  * The entries must match enum char_classes in regguts.h.
     355              :  */
     356              : static const char *const classNames[NUM_CCLASSES + 1] = {
     357              :     "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
     358              :     "lower", "print", "punct", "space", "upper", "xdigit", "word",
     359              :     NULL
     360              : };
     361              : 
     362              : /*
     363              :  * We do not use the hard-wired Unicode classification tables that Tcl does.
     364              :  * This is because (a) we need to deal with other encodings besides Unicode,
     365              :  * and (b) we want to track the behavior of the libc locale routines as
     366              :  * closely as possible.  For example, it wouldn't be unreasonable for a
     367              :  * locale to not consider every Unicode letter as a letter.  So we build
     368              :  * character classification cvecs by asking libc, even for Unicode.
     369              :  */
     370              : 
     371              : 
     372              : /*
     373              :  * element - map collating-element name to chr
     374              :  */
     375              : static chr
     376           27 : element(struct vars *v,         /* context */
     377              :         const chr *startp,      /* points to start of name */
     378              :         const chr *endp)        /* points just past end of name */
     379              : {
     380              :     const struct cname *cn;
     381              :     size_t      len;
     382              : 
     383              :     /* generic:  one-chr names stand for themselves */
     384              :     assert(startp < endp);
     385           27 :     len = endp - startp;
     386           27 :     if (len == 1)
     387           14 :         return *startp;
     388              : 
     389           13 :     NOTE(REG_ULOCALE);
     390              : 
     391              :     /* search table */
     392          707 :     for (cn = cnames; cn->name != NULL; cn++)
     393              :     {
     394          783 :         if (strlen(cn->name) == len &&
     395           80 :             pg_char_and_wchar_strncmp(cn->name, startp, len) == 0)
     396              :         {
     397            9 :             break;              /* NOTE BREAK OUT */
     398              :         }
     399              :     }
     400           13 :     if (cn->name != NULL)
     401            9 :         return CHR(cn->code);
     402              : 
     403              :     /* couldn't find it */
     404            4 :     ERR(REG_ECOLLATE);
     405            4 :     return 0;
     406              : }
     407              : 
     408              : /*
     409              :  * range - supply cvec for a range, including legality check
     410              :  */
     411              : static struct cvec *
     412          314 : range(struct vars *v,           /* context */
     413              :       chr a,                    /* range start */
     414              :       chr b,                    /* range end, might equal a */
     415              :       int cases)                /* case-independent? */
     416              : {
     417              :     int         nchrs;
     418              :     struct cvec *cv;
     419              :     chr         c,
     420              :                 cc;
     421              : 
     422          314 :     if (a != b && !before(a, b))
     423              :     {
     424            2 :         ERR(REG_ERANGE);
     425            2 :         return NULL;
     426              :     }
     427              : 
     428          312 :     if (!cases)
     429              :     {                           /* easy version */
     430          272 :         cv = getcvec(v, 0, 1);
     431          272 :         NOERRN();
     432          272 :         addrange(cv, a, b);
     433          272 :         return cv;
     434              :     }
     435              : 
     436              :     /*
     437              :      * When case-independent, it's hard to decide when cvec ranges are usable,
     438              :      * so for now at least, we won't try.  We use a range for the originally
     439              :      * specified chrs and then add on any case-equivalents that are outside
     440              :      * that range as individual chrs.
     441              :      *
     442              :      * To ensure sane behavior if someone specifies a very large range, limit
     443              :      * the allocation size to 100000 chrs (arbitrary) and check for overrun
     444              :      * inside the loop below.
     445              :      */
     446           40 :     nchrs = b - a + 1;
     447           40 :     if (nchrs <= 0 || nchrs > 100000)
     448            0 :         nchrs = 100000;
     449              : 
     450           40 :     cv = getcvec(v, nchrs, 1);
     451           40 :     NOERRN();
     452           40 :     addrange(cv, a, b);
     453              : 
     454         4569 :     for (c = a; c <= b; c++)
     455              :     {
     456         4529 :         cc = regc_wc_tolower(c);
     457         5094 :         if (cc != c &&
     458         1129 :             (before(cc, a) || before(b, cc)))
     459              :         {
     460          293 :             if (cv->nchrs >= cv->chrspace)
     461              :             {
     462            0 :                 ERR(REG_ETOOBIG);
     463            0 :                 return NULL;
     464              :             }
     465          293 :             addchr(cv, cc);
     466              :         }
     467         4529 :         cc = regc_wc_toupper(c);
     468         5075 :         if (cc != c &&
     469          823 :             (before(cc, a) || before(b, cc)))
     470              :         {
     471          273 :             if (cv->nchrs >= cv->chrspace)
     472              :             {
     473            0 :                 ERR(REG_ETOOBIG);
     474            0 :                 return NULL;
     475              :             }
     476          273 :             addchr(cv, cc);
     477              :         }
     478         4529 :         INTERRUPT(v->re);
     479              :     }
     480              : 
     481           40 :     return cv;
     482              : }
     483              : 
     484              : /*
     485              :  * before - is chr x before chr y, for purposes of range legality?
     486              :  */
     487              : static int                      /* predicate */
     488         2253 : before(chr x, chr y)
     489              : {
     490         2253 :     if (x < y)
     491          865 :         return 1;
     492         1388 :     return 0;
     493              : }
     494              : 
     495              : /*
     496              :  * eclass - supply cvec for an equivalence class
     497              :  * Must include case counterparts on request.
     498              :  */
     499              : static struct cvec *
     500           10 : eclass(struct vars *v,          /* context */
     501              :        chr c,                   /* Collating element representing the
     502              :                                  * equivalence class. */
     503              :        int cases)               /* all cases? */
     504              : {
     505              :     struct cvec *cv;
     506              : 
     507              :     /* crude fake equivalence class for testing */
     508           10 :     if ((v->cflags & REG_FAKE) && c == 'x')
     509              :     {
     510            6 :         cv = getcvec(v, 4, 0);
     511            6 :         addchr(cv, CHR('x'));
     512            6 :         addchr(cv, CHR('y'));
     513            6 :         if (cases)
     514              :         {
     515            0 :             addchr(cv, CHR('X'));
     516            0 :             addchr(cv, CHR('Y'));
     517              :         }
     518            6 :         return cv;
     519              :     }
     520              : 
     521              :     /* otherwise, none */
     522            4 :     if (cases)
     523            2 :         return allcases(v, c);
     524            2 :     cv = getcvec(v, 1, 0);
     525              :     assert(cv != NULL);
     526            2 :     addchr(cv, c);
     527            2 :     return cv;
     528              : }
     529              : 
     530              : /*
     531              :  * lookupcclass - lookup a character class identified by name
     532              :  *
     533              :  * On failure, sets an error code in *v; the result is then garbage.
     534              :  */
     535              : static enum char_classes
     536          149 : lookupcclass(struct vars *v,    /* context (for returning errors) */
     537              :              const chr *startp, /* where the name starts */
     538              :              const chr *endp)   /* just past the end of the name */
     539              : {
     540              :     size_t      len;
     541              :     const char *const *namePtr;
     542              :     int         i;
     543              : 
     544              :     /*
     545              :      * Map the name to the corresponding enumerated value.
     546              :      */
     547          149 :     len = endp - startp;
     548          860 :     for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
     549              :     {
     550         1619 :         if (strlen(*namePtr) == len &&
     551          763 :             pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0)
     552          145 :             return (enum char_classes) i;
     553              :     }
     554              : 
     555            4 :     ERR(REG_ECTYPE);
     556            4 :     return (enum char_classes) 0;
     557              : }
     558              : 
     559              : /*
     560              :  * cclasscvec - supply cvec for a character class
     561              :  *
     562              :  * Must include case counterparts if "cases" is true.
     563              :  *
     564              :  * The returned cvec might be either a transient cvec gotten from getcvec(),
     565              :  * or a permanently cached one from regc_ctype_get_cache().  This is okay
     566              :  * because callers are not supposed to explicitly free the result either way.
     567              :  */
     568              : static struct cvec *
     569          478 : cclasscvec(struct vars *v,      /* context */
     570              :            enum char_classes cclasscode,    /* class to build a cvec for */
     571              :            int cases)           /* case-independent? */
     572              : {
     573          478 :     struct cvec *cv = NULL;
     574              : 
     575              :     /*
     576              :      * Remap lower and upper to alpha if the match is case insensitive.
     577              :      */
     578              : 
     579          478 :     if (cases &&
     580            9 :         (cclasscode == CC_LOWER ||
     581              :          cclasscode == CC_UPPER))
     582            1 :         cclasscode = CC_ALPHA;
     583              : 
     584              :     /*
     585              :      * Now compute the character class contents.  For classes that are based
     586              :      * on the behavior of a <wctype.h> or <ctype.h> function, we use
     587              :      * regc_ctype_get_cache so that we can cache the results.  Other classes
     588              :      * have definitions that are hard-wired here, and for those we just
     589              :      * construct a transient cvec on the fly.
     590              :      *
     591              :      * NB: keep this code in sync with cclass_column_index(), below.
     592              :      */
     593              : 
     594          478 :     switch (cclasscode)
     595              :     {
     596            4 :         case CC_PRINT:
     597            4 :             cv = regc_ctype_get_cache(regc_wc_isprint, cclasscode);
     598            4 :             break;
     599           29 :         case CC_ALNUM:
     600           29 :             cv = regc_ctype_get_cache(regc_wc_isalnum, cclasscode);
     601           29 :             break;
     602           11 :         case CC_ALPHA:
     603           11 :             cv = regc_ctype_get_cache(regc_wc_isalpha, cclasscode);
     604           11 :             break;
     605          130 :         case CC_WORD:
     606          130 :             cv = regc_ctype_get_cache(regc_wc_isword, cclasscode);
     607          130 :             break;
     608            1 :         case CC_ASCII:
     609              :             /* hard-wired meaning */
     610            1 :             cv = getcvec(v, 0, 1);
     611            1 :             if (cv)
     612            1 :                 addrange(cv, 0, 0x7f);
     613            1 :             break;
     614           35 :         case CC_BLANK:
     615              :             /* hard-wired meaning */
     616           35 :             cv = getcvec(v, 2, 0);
     617           35 :             addchr(cv, '\t');
     618           35 :             addchr(cv, ' ');
     619           35 :             break;
     620            1 :         case CC_CNTRL:
     621              :             /* hard-wired meaning */
     622            1 :             cv = getcvec(v, 0, 2);
     623            1 :             addrange(cv, 0x0, 0x1f);
     624            1 :             addrange(cv, 0x7f, 0x9f);
     625            1 :             break;
     626          185 :         case CC_DIGIT:
     627          185 :             cv = regc_ctype_get_cache(regc_wc_isdigit, cclasscode);
     628          185 :             break;
     629           10 :         case CC_PUNCT:
     630           10 :             cv = regc_ctype_get_cache(regc_wc_ispunct, cclasscode);
     631           10 :             break;
     632            2 :         case CC_XDIGIT:
     633              : 
     634              :             /*
     635              :              * It's not clear how to define this in non-western locales, and
     636              :              * even less clear that there's any particular use in trying. So
     637              :              * just hard-wire the meaning.
     638              :              */
     639            2 :             cv = getcvec(v, 0, 3);
     640            2 :             if (cv)
     641              :             {
     642            2 :                 addrange(cv, '0', '9');
     643            2 :                 addrange(cv, 'a', 'f');
     644            2 :                 addrange(cv, 'A', 'F');
     645              :             }
     646            2 :             break;
     647           50 :         case CC_SPACE:
     648           50 :             cv = regc_ctype_get_cache(regc_wc_isspace, cclasscode);
     649           50 :             break;
     650            4 :         case CC_LOWER:
     651            4 :             cv = regc_ctype_get_cache(regc_wc_islower, cclasscode);
     652            4 :             break;
     653           12 :         case CC_UPPER:
     654           12 :             cv = regc_ctype_get_cache(regc_wc_isupper, cclasscode);
     655           12 :             break;
     656            4 :         case CC_GRAPH:
     657            4 :             cv = regc_ctype_get_cache(regc_wc_isgraph, cclasscode);
     658            4 :             break;
     659              :     }
     660              : 
     661              :     /* If cv is NULL now, the reason must be "out of memory" */
     662          478 :     if (cv == NULL)
     663            0 :         ERR(REG_ESPACE);
     664          478 :     return cv;
     665              : }
     666              : 
     667              : /*
     668              :  * cclass_column_index - get appropriate high colormap column index for chr
     669              :  */
     670              : static int
     671           38 : cclass_column_index(struct colormap *cm, chr c)
     672              : {
     673           38 :     int         colnum = 0;
     674              : 
     675              :     /* Shouldn't go through all these pushups for simple chrs */
     676              :     assert(c > MAX_SIMPLE_CHR);
     677              : 
     678              :     /*
     679              :      * Note: we should not see requests to consider cclasses that are not
     680              :      * treated as locale-specific by cclasscvec(), above.
     681              :      */
     682           38 :     if (cm->classbits[CC_PRINT] && regc_wc_isprint(c))
     683            3 :         colnum |= cm->classbits[CC_PRINT];
     684           38 :     if (cm->classbits[CC_ALNUM] && regc_wc_isalnum(c))
     685           10 :         colnum |= cm->classbits[CC_ALNUM];
     686           38 :     if (cm->classbits[CC_ALPHA] && regc_wc_isalpha(c))
     687            5 :         colnum |= cm->classbits[CC_ALPHA];
     688           38 :     if (cm->classbits[CC_WORD] && regc_wc_isword(c))
     689            1 :         colnum |= cm->classbits[CC_WORD];
     690              :     assert(cm->classbits[CC_ASCII] == 0);
     691              :     assert(cm->classbits[CC_BLANK] == 0);
     692              :     assert(cm->classbits[CC_CNTRL] == 0);
     693           38 :     if (cm->classbits[CC_DIGIT] && regc_wc_isdigit(c))
     694            3 :         colnum |= cm->classbits[CC_DIGIT];
     695           38 :     if (cm->classbits[CC_PUNCT] && regc_wc_ispunct(c))
     696            0 :         colnum |= cm->classbits[CC_PUNCT];
     697              :     assert(cm->classbits[CC_XDIGIT] == 0);
     698           38 :     if (cm->classbits[CC_SPACE] && regc_wc_isspace(c))
     699            0 :         colnum |= cm->classbits[CC_SPACE];
     700           38 :     if (cm->classbits[CC_LOWER] && regc_wc_islower(c))
     701            0 :         colnum |= cm->classbits[CC_LOWER];
     702           38 :     if (cm->classbits[CC_UPPER] && regc_wc_isupper(c))
     703            0 :         colnum |= cm->classbits[CC_UPPER];
     704           38 :     if (cm->classbits[CC_GRAPH] && regc_wc_isgraph(c))
     705            3 :         colnum |= cm->classbits[CC_GRAPH];
     706              : 
     707           38 :     return colnum;
     708              : }
     709              : 
     710              : /*
     711              :  * allcases - supply cvec for all case counterparts of a chr (including itself)
     712              :  *
     713              :  * This is a shortcut, preferably an efficient one, for simple characters;
     714              :  * messy cases are done via range().
     715              :  */
     716              : static struct cvec *
     717          822 : allcases(struct vars *v,        /* context */
     718              :          chr c)                 /* character to get case equivs of */
     719              : {
     720              :     struct cvec *cv;
     721              :     chr         lc,
     722              :                 uc;
     723              : 
     724          822 :     lc = regc_wc_tolower(c);
     725          822 :     uc = regc_wc_toupper(c);
     726              : 
     727          822 :     cv = getcvec(v, 2, 0);
     728          822 :     addchr(cv, lc);
     729          822 :     if (lc != uc)
     730          692 :         addchr(cv, uc);
     731          822 :     return cv;
     732              : }
     733              : 
     734              : /*
     735              :  * cmp - chr-substring compare
     736              :  *
     737              :  * Backrefs need this.  It should preferably be efficient.
     738              :  * Note that it does not need to report anything except equal/unequal.
     739              :  * Note also that the length is exact, and the comparison should not
     740              :  * stop at embedded NULs!
     741              :  */
     742              : static int                      /* 0 for equal, nonzero for unequal */
     743          642 : cmp(const chr *x, const chr *y, /* strings to compare */
     744              :     size_t len)                 /* exact length of comparison */
     745              : {
     746          642 :     return memcmp(VS(x), VS(y), len * sizeof(chr));
     747              : }
     748              : 
     749              : /*
     750              :  * casecmp - case-independent chr-substring compare
     751              :  *
     752              :  * REG_ICASE backrefs need this.  It should preferably be efficient.
     753              :  * Note that it does not need to report anything except equal/unequal.
     754              :  * Note also that the length is exact, and the comparison should not
     755              :  * stop at embedded NULs!
     756              :  */
     757              : static int                      /* 0 for equal, nonzero for unequal */
     758            1 : casecmp(const chr *x, const chr *y, /* strings to compare */
     759              :         size_t len)             /* exact length of comparison */
     760              : {
     761            2 :     for (; len > 0; len--, x++, y++)
     762              :     {
     763            1 :         if ((*x != *y) && (regc_wc_tolower(*x) != regc_wc_tolower(*y)))
     764            0 :             return 1;
     765              :     }
     766            1 :     return 0;
     767              : }
        

Generated by: LCOV version 2.0-1