LCOV - code coverage report
Current view: top level - src/common - unicode_norm.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 156 200 78.0 %
Date: 2020-11-27 11:06:40 Functions: 11 11 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  * unicode_norm.c
       3             :  *      Normalize a Unicode string
       4             :  *
       5             :  * This implements Unicode normalization, per the documentation at
       6             :  * https://www.unicode.org/reports/tr15/.
       7             :  *
       8             :  * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/common/unicode_norm.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #ifndef FRONTEND
      16             : #include "postgres.h"
      17             : #else
      18             : #include "postgres_fe.h"
      19             : #endif
      20             : 
      21             : #include "common/unicode_norm.h"
      22             : #ifndef FRONTEND
      23             : #include "common/unicode_norm_hashfunc.h"
      24             : #include "common/unicode_normprops_table.h"
      25             : #include "port/pg_bswap.h"
      26             : #else
      27             : #include "common/unicode_norm_table.h"
      28             : #endif
      29             : 
      30             : #ifndef FRONTEND
      31             : #define ALLOC(size) palloc(size)
      32             : #define FREE(size) pfree(size)
      33             : #else
      34             : #define ALLOC(size) malloc(size)
      35             : #define FREE(size) free(size)
      36             : #endif
      37             : 
      38             : /* Constants for calculations with Hangul characters */
      39             : #define SBASE       0xAC00      /* U+AC00 */
      40             : #define LBASE       0x1100      /* U+1100 */
      41             : #define VBASE       0x1161      /* U+1161 */
      42             : #define TBASE       0x11A7      /* U+11A7 */
      43             : #define LCOUNT      19
      44             : #define VCOUNT      21
      45             : #define TCOUNT      28
      46             : #define NCOUNT      VCOUNT * TCOUNT
      47             : #define SCOUNT      LCOUNT * NCOUNT
      48             : 
      49             : #ifdef FRONTEND
      50             : /* comparison routine for bsearch() of decomposition lookup table. */
      51             : static int
      52        2548 : conv_compare(const void *p1, const void *p2)
      53             : {
      54             :     uint32      v1,
      55             :                 v2;
      56             : 
      57        2548 :     v1 = *(const uint32 *) p1;
      58        2548 :     v2 = ((const pg_unicode_decomposition *) p2)->codepoint;
      59        2548 :     return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
      60             : }
      61             : 
      62             : #endif
      63             : 
      64             : /*
      65             :  * get_code_entry
      66             :  *
      67             :  * Get the entry corresponding to code in the decomposition lookup table.
      68             :  * The backend version of this code uses a perfect hash function for the
      69             :  * lookup, while the frontend version uses a binary search.
      70             :  */
      71             : static const pg_unicode_decomposition *
      72        1530 : get_code_entry(pg_wchar code)
      73             : {
      74             : #ifndef FRONTEND
      75             :     int         h;
      76             :     uint32      hashkey;
      77        1334 :     pg_unicode_decompinfo decompinfo = UnicodeDecompInfo;
      78             : 
      79             :     /*
      80             :      * Compute the hash function. The hash key is the codepoint with the bytes
      81             :      * in network order.
      82             :      */
      83        1334 :     hashkey = pg_hton32(code);
      84        1334 :     h = decompinfo.hash(&hashkey);
      85             : 
      86             :     /* An out-of-range result implies no match */
      87        1334 :     if (h < 0 || h >= decompinfo.num_decomps)
      88         492 :         return NULL;
      89             : 
      90             :     /*
      91             :      * Since it's a perfect hash, we need only match to the specific codepoint
      92             :      * it identifies.
      93             :      */
      94         842 :     if (code != decompinfo.decomps[h].codepoint)
      95         278 :         return NULL;
      96             : 
      97             :     /* Success! */
      98         564 :     return &decompinfo.decomps[h];
      99             : #else
     100         196 :     return bsearch(&(code),
     101             :                    UnicodeDecompMain,
     102             :                    lengthof(UnicodeDecompMain),
     103             :                    sizeof(pg_unicode_decomposition),
     104             :                    conv_compare);
     105             : #endif
     106             : }
     107             : 
     108             : 
     109             : /*
     110             :  * Given a decomposition entry looked up earlier, get the decomposed
     111             :  * characters.
     112             :  *
     113             :  * Note: the returned pointer can point to statically allocated buffer, and
     114             :  * is only valid until next call to this function!
     115             :  */
     116             : static const pg_wchar *
     117         104 : get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
     118             : {
     119             :     static pg_wchar x;
     120             : 
     121         104 :     if (DECOMPOSITION_IS_INLINE(entry))
     122             :     {
     123             :         Assert(DECOMPOSITION_SIZE(entry) == 1);
     124          44 :         x = (pg_wchar) entry->dec_index;
     125          44 :         *dec_size = 1;
     126          44 :         return &x;
     127             :     }
     128             :     else
     129             :     {
     130          60 :         *dec_size = DECOMPOSITION_SIZE(entry);
     131          60 :         return &UnicodeDecomp_codepoints[entry->dec_index];
     132             :     }
     133             : }
     134             : 
     135             : /*
     136             :  * Calculate how many characters a given character will decompose to.
     137             :  *
     138             :  * This needs to recurse, if the character decomposes into characters that
     139             :  * are, in turn, decomposable.
     140             :  */
     141             : static int
     142         386 : get_decomposed_size(pg_wchar code, bool compat)
     143             : {
     144             :     const pg_unicode_decomposition *entry;
     145         386 :     int         size = 0;
     146             :     int         i;
     147             :     const uint32 *decomp;
     148             :     int         dec_size;
     149             : 
     150             :     /*
     151             :      * Fast path for Hangul characters not stored in tables to save memory as
     152             :      * decomposition is algorithmic. See
     153             :      * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
     154             :      * on the matter.
     155             :      */
     156         386 :     if (code >= SBASE && code < SBASE + SCOUNT)
     157             :     {
     158             :         uint32      tindex,
     159             :                     sindex;
     160             : 
     161           0 :         sindex = code - SBASE;
     162           0 :         tindex = sindex % TCOUNT;
     163             : 
     164           0 :         if (tindex != 0)
     165           0 :             return 3;
     166           0 :         return 2;
     167             :     }
     168             : 
     169         386 :     entry = get_code_entry(code);
     170             : 
     171             :     /*
     172             :      * Just count current code if no other decompositions.  A NULL entry is
     173             :      * equivalent to a character with class 0 and no decompositions.
     174             :      */
     175         386 :     if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
     176          76 :         (!compat && DECOMPOSITION_IS_COMPAT(entry)))
     177         334 :         return 1;
     178             : 
     179             :     /*
     180             :      * If this entry has other decomposition codes look at them as well. First
     181             :      * get its decomposition in the list of tables available.
     182             :      */
     183          52 :     decomp = get_code_decomposition(entry, &dec_size);
     184         134 :     for (i = 0; i < dec_size; i++)
     185             :     {
     186          82 :         uint32      lcode = decomp[i];
     187             : 
     188          82 :         size += get_decomposed_size(lcode, compat);
     189             :     }
     190             : 
     191          52 :     return size;
     192             : }
     193             : 
     194             : /*
     195             :  * Recompose a set of characters. For hangul characters, the calculation
     196             :  * is algorithmic. For others, an inverse lookup at the decomposition
     197             :  * table is necessary. Returns true if a recomposition can be done, and
     198             :  * false otherwise.
     199             :  */
     200             : static bool
     201         130 : recompose_code(uint32 start, uint32 code, uint32 *result)
     202             : {
     203             :     /*
     204             :      * Handle Hangul characters algorithmically, per the Unicode spec.
     205             :      *
     206             :      * Check if two current characters are L and V.
     207             :      */
     208         130 :     if (start >= LBASE && start < LBASE + LCOUNT &&
     209           0 :         code >= VBASE && code < VBASE + VCOUNT)
     210             :     {
     211             :         /* make syllable of form LV */
     212           0 :         uint32      lindex = start - LBASE;
     213           0 :         uint32      vindex = code - VBASE;
     214             : 
     215           0 :         *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
     216           0 :         return true;
     217             :     }
     218             :     /* Check if two current characters are LV and T */
     219         130 :     else if (start >= SBASE && start < (SBASE + SCOUNT) &&
     220           0 :              ((start - SBASE) % TCOUNT) == 0 &&
     221           0 :              code >= TBASE && code < (TBASE + TCOUNT))
     222             :     {
     223             :         /* make syllable of form LVT */
     224           0 :         uint32      tindex = code - TBASE;
     225             : 
     226           0 :         *result = start + tindex;
     227           0 :         return true;
     228             :     }
     229             :     else
     230             :     {
     231             :         const pg_unicode_decomposition *entry;
     232             : 
     233             :         /*
     234             :          * Do an inverse lookup of the decomposition tables to see if anything
     235             :          * matches. The comparison just needs to be a perfect match on the
     236             :          * sub-table of size two, because the start character has already been
     237             :          * recomposed partially.  This lookup uses a perfect hash function for
     238             :          * the backend code.
     239             :          */
     240             : #ifndef FRONTEND
     241             : 
     242             :         int         h,
     243             :                     inv_lookup_index;
     244             :         uint64      hashkey;
     245          98 :         pg_unicode_recompinfo recompinfo = UnicodeRecompInfo;
     246             : 
     247             :         /*
     248             :          * Compute the hash function. The hash key is formed by concatenating
     249             :          * bytes of the two codepoints in network order. See also
     250             :          * src/common/unicode/generate-unicode_norm_table.pl.
     251             :          */
     252          98 :         hashkey = pg_hton64(((uint64) start << 32) | (uint64) code);
     253          98 :         h = recompinfo.hash(&hashkey);
     254             : 
     255             :         /* An out-of-range result implies no match */
     256          98 :         if (h < 0 || h >= recompinfo.num_recomps)
     257          94 :             return false;
     258             : 
     259          32 :         inv_lookup_index = recompinfo.inverse_lookup[h];
     260          32 :         entry = &UnicodeDecompMain[inv_lookup_index];
     261             : 
     262          32 :         if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
     263          28 :             code == UnicodeDecomp_codepoints[entry->dec_index + 1])
     264             :         {
     265          28 :             *result = entry->codepoint;
     266          28 :             return true;
     267             :         }
     268             : 
     269             : #else
     270             : 
     271             :         int         i;
     272             : 
     273      211360 :         for (i = 0; i < lengthof(UnicodeDecompMain); i++)
     274             :         {
     275      211328 :             entry = &UnicodeDecompMain[i];
     276             : 
     277      211328 :             if (DECOMPOSITION_SIZE(entry) != 2)
     278      157760 :                 continue;
     279             : 
     280       53568 :             if (DECOMPOSITION_NO_COMPOSE(entry))
     281       23456 :                 continue;
     282             : 
     283       30112 :             if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
     284         280 :                 code == UnicodeDecomp_codepoints[entry->dec_index + 1])
     285             :             {
     286           0 :                 *result = entry->codepoint;
     287           0 :                 return true;
     288             :             }
     289             :         }
     290             : #endif                          /* !FRONTEND */
     291             :     }
     292             : 
     293          36 :     return false;
     294             : }
     295             : 
     296             : /*
     297             :  * Decompose the given code into the array given by caller. The
     298             :  * decomposition begins at the position given by caller, saving one
     299             :  * lookup on the decomposition table. The current position needs to be
     300             :  * updated here to let the caller know from where to continue filling
     301             :  * in the array result.
     302             :  */
     303             : static void
     304         386 : decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
     305             : {
     306             :     const pg_unicode_decomposition *entry;
     307             :     int         i;
     308             :     const uint32 *decomp;
     309             :     int         dec_size;
     310             : 
     311             :     /*
     312             :      * Fast path for Hangul characters not stored in tables to save memory as
     313             :      * decomposition is algorithmic. See
     314             :      * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
     315             :      * on the matter.
     316             :      */
     317         386 :     if (code >= SBASE && code < SBASE + SCOUNT)
     318             :     {
     319             :         uint32      l,
     320             :                     v,
     321             :                     tindex,
     322             :                     sindex;
     323           0 :         pg_wchar   *res = *result;
     324             : 
     325           0 :         sindex = code - SBASE;
     326           0 :         l = LBASE + sindex / (VCOUNT * TCOUNT);
     327           0 :         v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
     328           0 :         tindex = sindex % TCOUNT;
     329             : 
     330           0 :         res[*current] = l;
     331           0 :         (*current)++;
     332           0 :         res[*current] = v;
     333           0 :         (*current)++;
     334             : 
     335           0 :         if (tindex != 0)
     336             :         {
     337           0 :             res[*current] = TBASE + tindex;
     338           0 :             (*current)++;
     339             :         }
     340             : 
     341         334 :         return;
     342             :     }
     343             : 
     344         386 :     entry = get_code_entry(code);
     345             : 
     346             :     /*
     347             :      * Just fill in with the current decomposition if there are no
     348             :      * decomposition codes to recurse to.  A NULL entry is equivalent to a
     349             :      * character with class 0 and no decompositions, so just leave also in
     350             :      * this case.
     351             :      */
     352         386 :     if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
     353          76 :         (!compat && DECOMPOSITION_IS_COMPAT(entry)))
     354             :     {
     355         334 :         pg_wchar   *res = *result;
     356             : 
     357         334 :         res[*current] = code;
     358         334 :         (*current)++;
     359         334 :         return;
     360             :     }
     361             : 
     362             :     /*
     363             :      * If this entry has other decomposition codes look at them as well.
     364             :      */
     365          52 :     decomp = get_code_decomposition(entry, &dec_size);
     366         134 :     for (i = 0; i < dec_size; i++)
     367             :     {
     368          82 :         pg_wchar    lcode = (pg_wchar) decomp[i];
     369             : 
     370             :         /* Leave if no more decompositions */
     371          82 :         decompose_code(lcode, compat, result, current);
     372             :     }
     373             : }
     374             : 
     375             : /*
     376             :  * unicode_normalize - Normalize a Unicode string to the specified form.
     377             :  *
     378             :  * The input is a 0-terminated array of codepoints.
     379             :  *
     380             :  * In frontend, returns a 0-terminated array of codepoints, allocated with
     381             :  * malloc. Or NULL if we run out of memory. In backend, the returned
     382             :  * string is palloc'd instead, and OOM is reported with ereport().
     383             :  */
     384             : pg_wchar *
     385          84 : unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
     386             : {
     387          84 :     bool        compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);
     388          84 :     bool        recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);
     389             :     pg_wchar   *decomp_chars;
     390             :     pg_wchar   *recomp_chars;
     391             :     int         decomp_size,
     392             :                 current_size;
     393             :     int         count;
     394             :     const pg_wchar *p;
     395             : 
     396             :     /* variables for recomposition */
     397             :     int         last_class;
     398             :     int         starter_pos;
     399             :     int         target_pos;
     400             :     uint32      starter_ch;
     401             : 
     402             :     /* First, do character decomposition */
     403             : 
     404             :     /*
     405             :      * Calculate how many characters long the decomposed version will be.
     406             :      */
     407          84 :     decomp_size = 0;
     408         388 :     for (p = input; *p; p++)
     409         304 :         decomp_size += get_decomposed_size(*p, compat);
     410             : 
     411          84 :     decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
     412          84 :     if (decomp_chars == NULL)
     413           0 :         return NULL;
     414             : 
     415             :     /*
     416             :      * Now fill in each entry recursively. This needs a second pass on the
     417             :      * decomposition table.
     418             :      */
     419          84 :     current_size = 0;
     420         388 :     for (p = input; *p; p++)
     421         304 :         decompose_code(*p, compat, &decomp_chars, &current_size);
     422          84 :     decomp_chars[decomp_size] = '\0';
     423             :     Assert(decomp_size == current_size);
     424             : 
     425             :     /*
     426             :      * Now apply canonical ordering.
     427             :      */
     428         334 :     for (count = 1; count < decomp_size; count++)
     429             :     {
     430         250 :         pg_wchar    prev = decomp_chars[count - 1];
     431         250 :         pg_wchar    next = decomp_chars[count];
     432             :         pg_wchar    tmp;
     433         250 :         const pg_unicode_decomposition *prevEntry = get_code_entry(prev);
     434         250 :         const pg_unicode_decomposition *nextEntry = get_code_entry(next);
     435             : 
     436             :         /*
     437             :          * If no entries are found, the character used is either an Hangul
     438             :          * character or a character with a class of 0 and no decompositions,
     439             :          * so move to next result.
     440             :          */
     441         250 :         if (prevEntry == NULL || nextEntry == NULL)
     442         226 :             continue;
     443             : 
     444             :         /*
     445             :          * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)
     446             :          * annex 4, a sequence of two adjacent characters in a string is an
     447             :          * exchangeable pair if the combining class (from the Unicode
     448             :          * Character Database) for the first character is greater than the
     449             :          * combining class for the second, and the second is not a starter.  A
     450             :          * character is a starter if its combining class is 0.
     451             :          */
     452          24 :         if (nextEntry->comb_class == 0x0 || prevEntry->comb_class == 0x0)
     453          24 :             continue;
     454             : 
     455           0 :         if (prevEntry->comb_class <= nextEntry->comb_class)
     456           0 :             continue;
     457             : 
     458             :         /* exchange can happen */
     459           0 :         tmp = decomp_chars[count - 1];
     460           0 :         decomp_chars[count - 1] = decomp_chars[count];
     461           0 :         decomp_chars[count] = tmp;
     462             : 
     463             :         /* backtrack to check again */
     464           0 :         if (count > 1)
     465           0 :             count -= 2;
     466             :     }
     467             : 
     468          84 :     if (!recompose)
     469          40 :         return decomp_chars;
     470             : 
     471             :     /*
     472             :      * The last phase of NFC and NFKC is the recomposition of the reordered
     473             :      * Unicode string using combining classes. The recomposed string cannot be
     474             :      * longer than the decomposed one, so make the allocation of the output
     475             :      * string based on that assumption.
     476             :      */
     477          44 :     recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
     478          44 :     if (!recomp_chars)
     479             :     {
     480           0 :         FREE(decomp_chars);
     481           0 :         return NULL;
     482             :     }
     483             : 
     484          44 :     last_class = -1;            /* this eliminates a special check */
     485          44 :     starter_pos = 0;
     486          44 :     target_pos = 1;
     487          44 :     starter_ch = recomp_chars[0] = decomp_chars[0];
     488             : 
     489         174 :     for (count = 1; count < decomp_size; count++)
     490             :     {
     491         130 :         pg_wchar    ch = decomp_chars[count];
     492         130 :         const pg_unicode_decomposition *ch_entry = get_code_entry(ch);
     493         130 :         int         ch_class = (ch_entry == NULL) ? 0 : ch_entry->comb_class;
     494             :         pg_wchar    composite;
     495             : 
     496         260 :         if (last_class < ch_class &&
     497         130 :             recompose_code(starter_ch, ch, &composite))
     498             :         {
     499          28 :             recomp_chars[starter_pos] = composite;
     500          28 :             starter_ch = composite;
     501             :         }
     502         102 :         else if (ch_class == 0)
     503             :         {
     504         102 :             starter_pos = target_pos;
     505         102 :             starter_ch = ch;
     506         102 :             last_class = -1;
     507         102 :             recomp_chars[target_pos++] = ch;
     508             :         }
     509             :         else
     510             :         {
     511           0 :             last_class = ch_class;
     512           0 :             recomp_chars[target_pos++] = ch;
     513             :         }
     514             :     }
     515          44 :     recomp_chars[target_pos] = (pg_wchar) '\0';
     516             : 
     517          44 :     FREE(decomp_chars);
     518             : 
     519          44 :     return recomp_chars;
     520             : }
     521             : 
     522             : /*
     523             :  * Normalization "quick check" algorithm; see
     524             :  * <http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms>
     525             :  */
     526             : 
     527             : /* We only need this in the backend. */
     528             : #ifndef FRONTEND
     529             : 
     530             : static uint8
     531         128 : get_canonical_class(pg_wchar ch)
     532             : {
     533         128 :     const pg_unicode_decomposition *entry = get_code_entry(ch);
     534             : 
     535         128 :     if (!entry)
     536          64 :         return 0;
     537             :     else
     538          64 :         return entry->comb_class;
     539             : }
     540             : 
     541             : static const pg_unicode_normprops *
     542         128 : qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
     543             : {
     544             :     int         h;
     545             :     uint32      hashkey;
     546             : 
     547             :     /*
     548             :      * Compute the hash function. The hash key is the codepoint with the bytes
     549             :      * in network order.
     550             :      */
     551         128 :     hashkey = pg_hton32(ch);
     552         128 :     h = norminfo->hash(&hashkey);
     553             : 
     554             :     /* An out-of-range result implies no match */
     555         128 :     if (h < 0 || h >= norminfo->num_normprops)
     556          56 :         return NULL;
     557             : 
     558             :     /*
     559             :      * Since it's a perfect hash, we need only match to the specific codepoint
     560             :      * it identifies.
     561             :      */
     562          72 :     if (ch != norminfo->normprops[h].codepoint)
     563          48 :         return NULL;
     564             : 
     565             :     /* Success! */
     566          24 :     return &norminfo->normprops[h];
     567             : }
     568             : 
     569             : /*
     570             :  * Look up the normalization quick check character property
     571             :  */
     572             : static UnicodeNormalizationQC
     573         128 : qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
     574             : {
     575         128 :     const pg_unicode_normprops *found = NULL;
     576             : 
     577         128 :     switch (form)
     578             :     {
     579          80 :         case UNICODE_NFC:
     580          80 :             found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC);
     581          80 :             break;
     582          48 :         case UNICODE_NFKC:
     583          48 :             found = qc_hash_lookup(ch, &UnicodeNormInfo_NFKC_QC);
     584          48 :             break;
     585           0 :         default:
     586             :             Assert(false);
     587           0 :             break;
     588             :     }
     589             : 
     590         128 :     if (found)
     591          24 :         return found->quickcheck;
     592             :     else
     593         104 :         return UNICODE_NORM_QC_YES;
     594             : }
     595             : 
     596             : UnicodeNormalizationQC
     597          72 : unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)
     598             : {
     599          72 :     uint8       lastCanonicalClass = 0;
     600          72 :     UnicodeNormalizationQC result = UNICODE_NORM_QC_YES;
     601             : 
     602             :     /*
     603             :      * For the "D" forms, we don't run the quickcheck.  We don't include the
     604             :      * lookup tables for those because they are huge, checking for these
     605             :      * particular forms is less common, and running the slow path is faster
     606             :      * for the "D" forms than the "C" forms because you don't need to
     607             :      * recompose, which is slow.
     608             :      */
     609          72 :     if (form == UNICODE_NFD || form == UNICODE_NFKD)
     610          32 :         return UNICODE_NORM_QC_MAYBE;
     611             : 
     612         160 :     for (const pg_wchar *p = input; *p; p++)
     613             :     {
     614         128 :         pg_wchar    ch = *p;
     615             :         uint8       canonicalClass;
     616             :         UnicodeNormalizationQC check;
     617             : 
     618         128 :         canonicalClass = get_canonical_class(ch);
     619         128 :         if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
     620           0 :             return UNICODE_NORM_QC_NO;
     621             : 
     622         128 :         check = qc_is_allowed(form, ch);
     623         128 :         if (check == UNICODE_NORM_QC_NO)
     624           8 :             return UNICODE_NORM_QC_NO;
     625         120 :         else if (check == UNICODE_NORM_QC_MAYBE)
     626          16 :             result = UNICODE_NORM_QC_MAYBE;
     627             : 
     628         120 :         lastCanonicalClass = canonicalClass;
     629             :     }
     630          32 :     return result;
     631             : }
     632             : 
     633             : #endif                          /* !FRONTEND */

Generated by: LCOV version 1.13