LCOV - code coverage report
Current view: top level - src/common - unicode_norm.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19beta1 Lines: 90.6 % 202 183
Test Date: 2026-06-10 08:16:38 Functions: 100.0 % 11 11
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  * unicode_norm.c
       3              :  *      Normalize a Unicode string
       4              :  *
       5              :  * This implements Unicode normalization, per the documentation at
       6              :  * https://www.unicode.org/reports/tr15/.
       7              :  *
       8              :  * Portions Copyright (c) 2017-2026, PostgreSQL Global Development Group
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/common/unicode_norm.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #ifndef FRONTEND
      16              : #include "postgres.h"
      17              : #else
      18              : #include "postgres_fe.h"
      19              : #endif
      20              : 
      21              : #include "common/unicode_norm.h"
      22              : #ifndef FRONTEND
      23              : #include "common/unicode_norm_hashfunc.h"
      24              : #include "common/unicode_normprops_table.h"
      25              : #include "port/pg_bswap.h"
      26              : #include "utils/memutils.h"
      27              : #else
      28              : #include "common/unicode_norm_table.h"
      29              : #endif
      30              : 
      31              : #ifndef FRONTEND
      32              : #define ALLOC(size) palloc(size)
      33              : #define FREE(size) pfree(size)
      34              : #else
      35              : #define ALLOC(size) malloc(size)
      36              : #define FREE(size) free(size)
      37              : #endif
      38              : 
      39              : /* Constants for calculations with Hangul characters */
      40              : #define SBASE       0xAC00      /* U+AC00 */
      41              : #define LBASE       0x1100      /* U+1100 */
      42              : #define VBASE       0x1161      /* U+1161 */
      43              : #define TBASE       0x11A7      /* U+11A7 */
      44              : #define LCOUNT      19
      45              : #define VCOUNT      21
      46              : #define TCOUNT      28
      47              : #define NCOUNT      VCOUNT * TCOUNT
      48              : #define SCOUNT      LCOUNT * NCOUNT
      49              : 
      50              : #ifdef FRONTEND
      51              : /* comparison routine for bsearch() of decomposition lookup table. */
      52              : static int
      53        14699 : conv_compare(const void *p1, const void *p2)
      54              : {
      55              :     uint32      v1,
      56              :                 v2;
      57              : 
      58        14699 :     v1 = *(const uint32 *) p1;
      59        14699 :     v2 = ((const pg_unicode_decomposition *) p2)->codepoint;
      60        14699 :     return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
      61              : }
      62              : 
      63              : #endif
      64              : 
      65              : /*
      66              :  * get_code_entry
      67              :  *
      68              :  * Get the entry corresponding to code in the decomposition lookup table.
      69              :  * The backend version of this code uses a perfect hash function for the
      70              :  * lookup, while the frontend version uses a binary search.
      71              :  */
      72              : static const pg_unicode_decomposition *
      73        14871 : get_code_entry(char32_t code)
      74              : {
      75              : #ifndef FRONTEND
      76              :     int         h;
      77              :     uint32      hashkey;
      78        13740 :     pg_unicode_decompinfo decompinfo = UnicodeDecompInfo;
      79              : 
      80              :     /*
      81              :      * Compute the hash function. The hash key is the codepoint with the bytes
      82              :      * in network order.
      83              :      */
      84        13740 :     hashkey = pg_hton32(code);
      85        13740 :     h = decompinfo.hash(&hashkey);
      86              : 
      87              :     /* An out-of-range result implies no match */
      88        13740 :     if (h < 0 || h >= decompinfo.num_decomps)
      89         3505 :         return NULL;
      90              : 
      91              :     /*
      92              :      * Since it's a perfect hash, we need only match to the specific codepoint
      93              :      * it identifies.
      94              :      */
      95        10235 :     if (code != decompinfo.decomps[h].codepoint)
      96         9529 :         return NULL;
      97              : 
      98              :     /* Success! */
      99          706 :     return &decompinfo.decomps[h];
     100              : #else
     101         1131 :     return bsearch(&(code),
     102              :                    UnicodeDecompMain,
     103              :                    lengthof(UnicodeDecompMain),
     104              :                    sizeof(pg_unicode_decomposition),
     105              :                    conv_compare);
     106              : #endif
     107              : }
     108              : 
     109              : /*
     110              :  * Get the combining class of the given codepoint.
     111              :  */
     112              : static uint8
     113         8403 : get_canonical_class(char32_t code)
     114              : {
     115         8403 :     const pg_unicode_decomposition *entry = get_code_entry(code);
     116              : 
     117              :     /*
     118              :      * If no entries are found, the character used is either a Hangul
     119              :      * character or a character with a class of 0 and no decompositions.
     120              :      */
     121         8403 :     if (!entry)
     122         8063 :         return 0;
     123              :     else
     124          340 :         return entry->comb_class;
     125              : }
     126              : 
     127              : /*
     128              :  * Given a decomposition entry looked up earlier, get the decomposed
     129              :  * characters.
     130              :  *
     131              :  * Note: the returned pointer can point to statically allocated buffer, and
     132              :  * is only valid until next call to this function!
     133              :  */
     134              : static const char32_t *
     135          138 : get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
     136              : {
     137              :     static char32_t x;
     138              : 
     139          138 :     if (DECOMPOSITION_IS_INLINE(entry))
     140              :     {
     141              :         Assert(DECOMPOSITION_SIZE(entry) == 1);
     142           42 :         x = (char32_t) entry->dec_index;
     143           42 :         *dec_size = 1;
     144           42 :         return &x;
     145              :     }
     146              :     else
     147              :     {
     148           96 :         *dec_size = DECOMPOSITION_SIZE(entry);
     149           96 :         return &UnicodeDecomp_codepoints[entry->dec_index];
     150              :     }
     151              : }
     152              : 
     153              : /*
     154              :  * Calculate how many characters a given character will decompose to.
     155              :  *
     156              :  * This needs to recurse, if the character decomposes into characters that
     157              :  * are, in turn, decomposable.
     158              :  */
     159              : static int
     160         3269 : get_decomposed_size(char32_t code, bool compat)
     161              : {
     162              :     const pg_unicode_decomposition *entry;
     163         3269 :     int         size = 0;
     164              :     int         i;
     165              :     const uint32 *decomp;
     166              :     int         dec_size;
     167              : 
     168              :     /*
     169              :      * Fast path for Hangul characters not stored in tables to save memory as
     170              :      * decomposition is algorithmic. See
     171              :      * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
     172              :      * on the matter.
     173              :      */
     174         3269 :     if (code >= SBASE && code < SBASE + SCOUNT)
     175              :     {
     176              :         uint32      tindex,
     177              :                     sindex;
     178              : 
     179           35 :         sindex = code - SBASE;
     180           35 :         tindex = sindex % TCOUNT;
     181              : 
     182           35 :         if (tindex != 0)
     183           10 :             return 3;
     184           25 :         return 2;
     185              :     }
     186              : 
     187         3234 :     entry = get_code_entry(code);
     188              : 
     189              :     /*
     190              :      * Just count current code if no other decompositions.  A NULL entry is
     191              :      * equivalent to a character with class 0 and no decompositions.
     192              :      */
     193         3234 :     if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
     194           96 :         (!compat && DECOMPOSITION_IS_COMPAT(entry)))
     195         3165 :         return 1;
     196              : 
     197              :     /*
     198              :      * If this entry has other decomposition codes look at them as well. First
     199              :      * get its decomposition in the list of tables available.
     200              :      */
     201           69 :     decomp = get_code_decomposition(entry, &dec_size);
     202          186 :     for (i = 0; i < dec_size; i++)
     203              :     {
     204          117 :         uint32      lcode = decomp[i];
     205              : 
     206          117 :         size += get_decomposed_size(lcode, compat);
     207              :     }
     208              : 
     209           69 :     return size;
     210              : }
     211              : 
     212              : /*
     213              :  * Recompose a set of characters. For hangul characters, the calculation
     214              :  * is algorithmic. For others, an inverse lookup at the decomposition
     215              :  * table is necessary. Returns true if a recomposition can be done, and
     216              :  * false otherwise.
     217              :  */
     218              : static bool
     219         2613 : recompose_code(uint32 start, uint32 code, uint32 *result)
     220              : {
     221              :     /*
     222              :      * Handle Hangul characters algorithmically, per the Unicode spec.
     223              :      *
     224              :      * Check if two current characters are L and V.
     225              :      */
     226         2613 :     if (start >= LBASE && start < LBASE + LCOUNT &&
     227           45 :         code >= VBASE && code < VBASE + VCOUNT)
     228              :     {
     229              :         /* make syllable of form LV */
     230           45 :         uint32      lindex = start - LBASE;
     231           45 :         uint32      vindex = code - VBASE;
     232              : 
     233           45 :         *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
     234           45 :         return true;
     235              :     }
     236              :     /* Check if two current characters are LV and T */
     237         2568 :     else if (start >= SBASE && start < (SBASE + SCOUNT) &&
     238           35 :              ((start - SBASE) % TCOUNT) == 0 &&
     239           25 :              code > TBASE && code < (TBASE + TCOUNT))
     240              :     {
     241              :         /* make syllable of form LVT */
     242           25 :         uint32      tindex = code - TBASE;
     243              : 
     244           25 :         *result = start + tindex;
     245           25 :         return true;
     246              :     }
     247              :     else
     248              :     {
     249              :         const pg_unicode_decomposition *entry;
     250              : 
     251              :         /*
     252              :          * Do an inverse lookup of the decomposition tables to see if anything
     253              :          * matches. The comparison just needs to be a perfect match on the
     254              :          * sub-table of size two, because the start character has already been
     255              :          * recomposed partially.  This lookup uses a perfect hash function for
     256              :          * the backend code.
     257              :          */
     258              : #ifndef FRONTEND
     259              : 
     260              :         int         h,
     261              :                     inv_lookup_index;
     262              :         uint64      hashkey;
     263         2344 :         pg_unicode_recompinfo recompinfo = UnicodeRecompInfo;
     264              : 
     265              :         /*
     266              :          * Compute the hash function. The hash key is formed by concatenating
     267              :          * bytes of the two codepoints in network order. See also
     268              :          * src/common/unicode/generate-unicode_norm_table.pl.
     269              :          */
     270         2344 :         hashkey = pg_hton64(((uint64) start << 32) | (uint64) code);
     271         2344 :         h = recompinfo.hash(&hashkey);
     272              : 
     273              :         /* An out-of-range result implies no match */
     274         2344 :         if (h < 0 || h >= recompinfo.num_recomps)
     275         1937 :             return false;
     276              : 
     277          439 :         inv_lookup_index = recompinfo.inverse_lookup[h];
     278          439 :         entry = &UnicodeDecompMain[inv_lookup_index];
     279              : 
     280          439 :         if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
     281           36 :             code == UnicodeDecomp_codepoints[entry->dec_index + 1])
     282              :         {
     283           32 :             *result = entry->codepoint;
     284           32 :             return true;
     285              :         }
     286              : 
     287              : #else
     288              : 
     289              :         int         i;
     290              : 
     291      1368921 :         for (i = 0; i < lengthof(UnicodeDecompMain); i++)
     292              :         {
     293      1368722 :             entry = &UnicodeDecompMain[i];
     294              : 
     295      1368722 :             if (DECOMPOSITION_SIZE(entry) != 2)
     296      1031616 :                 continue;
     297              : 
     298       337106 :             if (DECOMPOSITION_NO_COMPOSE(entry))
     299       145867 :                 continue;
     300              : 
     301       191239 :             if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
     302         1718 :                 code == UnicodeDecomp_codepoints[entry->dec_index + 1])
     303              :             {
     304            0 :                 *result = entry->codepoint;
     305            0 :                 return true;
     306              :             }
     307              :         }
     308              : #endif                          /* !FRONTEND */
     309              :     }
     310              : 
     311          606 :     return false;
     312              : }
     313              : 
     314              : /*
     315              :  * Decompose the given code into the array given by caller. The
     316              :  * decomposition begins at the position given by caller, saving one
     317              :  * lookup on the decomposition table. The current position needs to be
     318              :  * updated here to let the caller know from where to continue filling
     319              :  * in the array result.
     320              :  */
     321              : static void
     322         3269 : decompose_code(char32_t code, bool compat, char32_t **result, int *current)
     323              : {
     324              :     const pg_unicode_decomposition *entry;
     325              :     int         i;
     326              :     const uint32 *decomp;
     327              :     int         dec_size;
     328              : 
     329              :     /*
     330              :      * Fast path for Hangul characters not stored in tables to save memory as
     331              :      * decomposition is algorithmic. See
     332              :      * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
     333              :      * on the matter.
     334              :      */
     335         3269 :     if (code >= SBASE && code < SBASE + SCOUNT)
     336              :     {
     337              :         uint32      l,
     338              :                     v,
     339              :                     tindex,
     340              :                     sindex;
     341           35 :         char32_t   *res = *result;
     342              : 
     343           35 :         sindex = code - SBASE;
     344           35 :         l = LBASE + sindex / (VCOUNT * TCOUNT);
     345           35 :         v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
     346           35 :         tindex = sindex % TCOUNT;
     347              : 
     348           35 :         res[*current] = l;
     349           35 :         (*current)++;
     350           35 :         res[*current] = v;
     351           35 :         (*current)++;
     352              : 
     353           35 :         if (tindex != 0)
     354              :         {
     355           10 :             res[*current] = TBASE + tindex;
     356           10 :             (*current)++;
     357              :         }
     358              : 
     359         3200 :         return;
     360              :     }
     361              : 
     362         3234 :     entry = get_code_entry(code);
     363              : 
     364              :     /*
     365              :      * Just fill in with the current decomposition if there are no
     366              :      * decomposition codes to recurse to.  A NULL entry is equivalent to a
     367              :      * character with class 0 and no decompositions, so just leave also in
     368              :      * this case.
     369              :      */
     370         3234 :     if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
     371           96 :         (!compat && DECOMPOSITION_IS_COMPAT(entry)))
     372              :     {
     373         3165 :         char32_t   *res = *result;
     374              : 
     375         3165 :         res[*current] = code;
     376         3165 :         (*current)++;
     377         3165 :         return;
     378              :     }
     379              : 
     380              :     /*
     381              :      * If this entry has other decomposition codes look at them as well.
     382              :      */
     383           69 :     decomp = get_code_decomposition(entry, &dec_size);
     384          186 :     for (i = 0; i < dec_size; i++)
     385              :     {
     386          117 :         char32_t    lcode = (char32_t) decomp[i];
     387              : 
     388              :         /* Leave if no more decompositions */
     389          117 :         decompose_code(lcode, compat, result, current);
     390              :     }
     391              : }
     392              : 
     393              : /*
     394              :  * unicode_normalize - Normalize a Unicode string to the specified form.
     395              :  *
     396              :  * The input is a 0-terminated array of codepoints.
     397              :  *
     398              :  * In frontend, returns a 0-terminated array of codepoints, allocated with
     399              :  * malloc. Or NULL if we run out of memory. In backend, the returned
     400              :  * string is palloc'd instead, and OOM is reported with ereport().
     401              :  */
     402              : char32_t *
     403          430 : unicode_normalize(UnicodeNormalizationForm form, const char32_t *input)
     404              : {
     405          430 :     bool        compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);
     406          430 :     bool        recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);
     407              :     char32_t   *decomp_chars;
     408              :     char32_t   *recomp_chars;
     409              :     int         decomp_size,
     410              :                 current_size;
     411              :     int         count;
     412              :     const char32_t *p;
     413              : 
     414              :     /* variables for recomposition */
     415              :     int         last_class;
     416              :     int         starter_pos;
     417              :     int         target_pos;
     418              :     uint32      starter_ch;
     419              : 
     420              :     /* First, do character decomposition */
     421              : 
     422              :     /*
     423              :      * Calculate how many characters long the decomposed version will be.
     424              :      *
     425              :      * Some characters decompose to quite a few code points, so that the
     426              :      * decomposed version's size could overrun MaxAllocSize, and even 32-bit
     427              :      * size_t, even though the input string presumably fits in that.  In
     428              :      * frontend we want to just return NULL in that case, so monitor the sum
     429              :      * and exit early once we'd need more than MaxAllocSize bytes.
     430              :      */
     431          430 :     decomp_size = 0;
     432         3582 :     for (p = input; *p; p++)
     433              :     {
     434         3152 :         decomp_size += get_decomposed_size(*p, compat);
     435         3152 :         if (unlikely(decomp_size > MaxAllocSize / sizeof(char32_t)))
     436              :         {
     437              : #ifndef FRONTEND
     438              :             /* Exit loop and let palloc() throw error below */
     439            0 :             break;
     440              : #else
     441              :             /* Just return NULL with no explicit error */
     442            0 :             return NULL;
     443              : #endif
     444              :         }
     445              :     }
     446              : 
     447          430 :     decomp_chars = (char32_t *) ALLOC((decomp_size + 1) * sizeof(char32_t));
     448          430 :     if (decomp_chars == NULL)
     449            0 :         return NULL;
     450              : 
     451              :     /*
     452              :      * Now fill in each entry recursively. This needs a second pass on the
     453              :      * decomposition table.
     454              :      */
     455          430 :     current_size = 0;
     456         3582 :     for (p = input; *p; p++)
     457         3152 :         decompose_code(*p, compat, &decomp_chars, &current_size);
     458          430 :     decomp_chars[decomp_size] = '\0';
     459              :     Assert(decomp_size == current_size);
     460              : 
     461              :     /* Leave if there is nothing to decompose */
     462          430 :     if (decomp_size == 0)
     463           13 :         return decomp_chars;
     464              : 
     465              :     /*
     466              :      * Now apply canonical ordering.
     467              :      */
     468         3245 :     for (count = 1; count < decomp_size; count++)
     469              :     {
     470         2828 :         char32_t    prev = decomp_chars[count - 1];
     471         2828 :         char32_t    next = decomp_chars[count];
     472              :         char32_t    tmp;
     473         2828 :         const uint8 prevClass = get_canonical_class(prev);
     474         2828 :         const uint8 nextClass = get_canonical_class(next);
     475              : 
     476              :         /*
     477              :          * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)
     478              :          * annex 4, a sequence of two adjacent characters in a string is an
     479              :          * exchangeable pair if the combining class (from the Unicode
     480              :          * Character Database) for the first character is greater than the
     481              :          * combining class for the second, and the second is not a starter.  A
     482              :          * character is a starter if its combining class is 0.
     483              :          */
     484         2828 :         if (prevClass == 0 || nextClass == 0)
     485         2828 :             continue;
     486              : 
     487            0 :         if (prevClass <= nextClass)
     488            0 :             continue;
     489              : 
     490              :         /* exchange can happen */
     491            0 :         tmp = decomp_chars[count - 1];
     492            0 :         decomp_chars[count - 1] = decomp_chars[count];
     493            0 :         decomp_chars[count] = tmp;
     494              : 
     495              :         /* backtrack to check again */
     496            0 :         if (count > 1)
     497            0 :             count -= 2;
     498              :     }
     499              : 
     500          417 :     if (!recompose)
     501           73 :         return decomp_chars;
     502              : 
     503              :     /*
     504              :      * The last phase of NFC and NFKC is the recomposition of the reordered
     505              :      * Unicode string using combining classes. The recomposed string cannot be
     506              :      * longer than the decomposed one, so make the allocation of the output
     507              :      * string based on that assumption.
     508              :      */
     509          344 :     recomp_chars = (char32_t *) ALLOC((decomp_size + 1) * sizeof(char32_t));
     510          344 :     if (!recomp_chars)
     511              :     {
     512            0 :         FREE(decomp_chars);
     513            0 :         return NULL;
     514              :     }
     515              : 
     516          344 :     last_class = -1;            /* this eliminates a special check */
     517          344 :     starter_pos = 0;
     518          344 :     target_pos = 1;
     519          344 :     starter_ch = recomp_chars[0] = decomp_chars[0];
     520              : 
     521         2957 :     for (count = 1; count < decomp_size; count++)
     522              :     {
     523         2613 :         char32_t    ch = decomp_chars[count];
     524         2613 :         int         ch_class = get_canonical_class(ch);
     525              :         char32_t    composite;
     526              : 
     527         5226 :         if (last_class < ch_class &&
     528         2613 :             recompose_code(starter_ch, ch, &composite))
     529              :         {
     530          102 :             recomp_chars[starter_pos] = composite;
     531          102 :             starter_ch = composite;
     532              :         }
     533         2511 :         else if (ch_class == 0)
     534              :         {
     535         2511 :             starter_pos = target_pos;
     536         2511 :             starter_ch = ch;
     537         2511 :             last_class = -1;
     538         2511 :             recomp_chars[target_pos++] = ch;
     539              :         }
     540              :         else
     541              :         {
     542            0 :             last_class = ch_class;
     543            0 :             recomp_chars[target_pos++] = ch;
     544              :         }
     545              :     }
     546          344 :     recomp_chars[target_pos] = (char32_t) '\0';
     547              : 
     548          344 :     FREE(decomp_chars);
     549              : 
     550          344 :     return recomp_chars;
     551              : }
     552              : 
     553              : /*
     554              :  * Normalization "quick check" algorithm; see
     555              :  * <http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms>
     556              :  */
     557              : 
     558              : /* We only need this in the backend. */
     559              : #ifndef FRONTEND
     560              : 
     561              : static const pg_unicode_normprops *
     562          134 : qc_hash_lookup(char32_t ch, const pg_unicode_norminfo *norminfo)
     563              : {
     564              :     int         h;
     565              :     uint32      hashkey;
     566              : 
     567              :     /*
     568              :      * Compute the hash function. The hash key is the codepoint with the bytes
     569              :      * in network order.
     570              :      */
     571          134 :     hashkey = pg_hton32(ch);
     572          134 :     h = norminfo->hash(&hashkey);
     573              : 
     574              :     /* An out-of-range result implies no match */
     575          134 :     if (h < 0 || h >= norminfo->num_normprops)
     576           92 :         return NULL;
     577              : 
     578              :     /*
     579              :      * Since it's a perfect hash, we need only match to the specific codepoint
     580              :      * it identifies.
     581              :      */
     582           42 :     if (ch != norminfo->normprops[h].codepoint)
     583           18 :         return NULL;
     584              : 
     585              :     /* Success! */
     586           24 :     return &norminfo->normprops[h];
     587              : }
     588              : 
     589              : /*
     590              :  * Look up the normalization quick check character property
     591              :  */
     592              : static UnicodeNormalizationQC
     593          134 : qc_is_allowed(UnicodeNormalizationForm form, char32_t ch)
     594              : {
     595          134 :     const pg_unicode_normprops *found = NULL;
     596              : 
     597          134 :     switch (form)
     598              :     {
     599           86 :         case UNICODE_NFC:
     600           86 :             found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC);
     601           86 :             break;
     602           48 :         case UNICODE_NFKC:
     603           48 :             found = qc_hash_lookup(ch, &UnicodeNormInfo_NFKC_QC);
     604           48 :             break;
     605            0 :         default:
     606              :             Assert(false);
     607            0 :             break;
     608              :     }
     609              : 
     610          134 :     if (found)
     611           24 :         return found->quickcheck;
     612              :     else
     613          110 :         return UNICODE_NORM_QC_YES;
     614              : }
     615              : 
     616              : UnicodeNormalizationQC
     617           90 : unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const char32_t *input)
     618              : {
     619           90 :     uint8       lastCanonicalClass = 0;
     620           90 :     UnicodeNormalizationQC result = UNICODE_NORM_QC_YES;
     621              : 
     622              :     /*
     623              :      * For the "D" forms, we don't run the quickcheck.  We don't include the
     624              :      * lookup tables for those because they are huge, checking for these
     625              :      * particular forms is less common, and running the slow path is faster
     626              :      * for the "D" forms than the "C" forms because you don't need to
     627              :      * recompose, which is slow.
     628              :      */
     629           90 :     if (form == UNICODE_NFD || form == UNICODE_NFKD)
     630           40 :         return UNICODE_NORM_QC_MAYBE;
     631              : 
     632          176 :     for (const char32_t *p = input; *p; p++)
     633              :     {
     634          134 :         char32_t    ch = *p;
     635              :         uint8       canonicalClass;
     636              :         UnicodeNormalizationQC check;
     637              : 
     638          134 :         canonicalClass = get_canonical_class(ch);
     639          134 :         if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
     640            0 :             return UNICODE_NORM_QC_NO;
     641              : 
     642          134 :         check = qc_is_allowed(form, ch);
     643          134 :         if (check == UNICODE_NORM_QC_NO)
     644            8 :             return UNICODE_NORM_QC_NO;
     645          126 :         else if (check == UNICODE_NORM_QC_MAYBE)
     646           16 :             result = UNICODE_NORM_QC_MAYBE;
     647              : 
     648          126 :         lastCanonicalClass = canonicalClass;
     649              :     }
     650           42 :     return result;
     651              : }
     652              : 
     653              : #endif                          /* !FRONTEND */
        

Generated by: LCOV version 2.0-1