LCOV - PostgreSQL 18devel - src/common/unicode

LCOV - code coverage report

Current view:	top level - src/common - unicode_norm.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 18devel	Lines:	154	199	77.4 %
Date:	2024-11-21 08:14:44	Functions:	11	11	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  * unicode_norm.c
       3             :  *      Normalize a Unicode string
       4             :  *
       5             :  * This implements Unicode normalization, per the documentation at
       6             :  * https://www.unicode.org/reports/tr15/.
       7             :  *
       8             :  * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/common/unicode_norm.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #ifndef FRONTEND
      16             : #include "postgres.h"
      17             : #else
      18             : #include "postgres_fe.h"
      19             : #endif
      20             : 
      21             : #include "common/unicode_norm.h"
      22             : #ifndef FRONTEND
      23             : #include "common/unicode_norm_hashfunc.h"
      24             : #include "common/unicode_normprops_table.h"
      25             : #include "port/pg_bswap.h"
      26             : #else
      27             : #include "common/unicode_norm_table.h"
      28             : #endif
      29             : 
      30             : #ifndef FRONTEND
      31             : #define ALLOC(size) palloc(size)
      32             : #define FREE(size) pfree(size)
      33             : #else
      34             : #define ALLOC(size) malloc(size)
      35             : #define FREE(size) free(size)
      36             : #endif
      37             : 
      38             : /* Constants for calculations with Hangul characters */
      39             : #define SBASE       0xAC00      /* U+AC00 */
      40             : #define LBASE       0x1100      /* U+1100 */
      41             : #define VBASE       0x1161      /* U+1161 */
      42             : #define TBASE       0x11A7      /* U+11A7 */
      43             : #define LCOUNT      19
      44             : #define VCOUNT      21
      45             : #define TCOUNT      28
      46             : #define NCOUNT      VCOUNT * TCOUNT
      47             : #define SCOUNT      LCOUNT * NCOUNT
      48             : 
      49             : #ifdef FRONTEND
      50             : /* comparison routine for bsearch() of decomposition lookup table. */
      51             : static int
      52        2520 : conv_compare(const void *p1, const void *p2)
      53             : {
      54             :     uint32      v1,
      55             :                 v2;
      56             : 
      57        2520 :     v1 = *(const uint32 *) p1;
      58        2520 :     v2 = ((const pg_unicode_decomposition *) p2)->codepoint;
      59        2520 :     return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
      60             : }
      61             : 
      62             : #endif
      63             : 
      64             : /*
      65             :  * get_code_entry
      66             :  *
      67             :  * Get the entry corresponding to code in the decomposition lookup table.
      68             :  * The backend version of this code uses a perfect hash function for the
      69             :  * lookup, while the frontend version uses a binary search.
      70             :  */
      71             : static const pg_unicode_decomposition *
      72        2156 : get_code_entry(pg_wchar code)
      73             : {
      74             : #ifndef FRONTEND
      75             :     int         h;
      76             :     uint32      hashkey;
      77        1960 :     pg_unicode_decompinfo decompinfo = UnicodeDecompInfo;
      78             : 
      79             :     /*
      80             :      * Compute the hash function. The hash key is the codepoint with the bytes
      81             :      * in network order.
      82             :      */
      83        1960 :     hashkey = pg_hton32(code);
      84        1960 :     h = decompinfo.hash(&hashkey);
      85             : 
      86             :     /* An out-of-range result implies no match */
      87        1960 :     if (h < 0 || h >= decompinfo.num_decomps)
      88        1116 :         return NULL;
      89             : 
      90             :     /*
      91             :      * Since it's a perfect hash, we need only match to the specific codepoint
      92             :      * it identifies.
      93             :      */
      94         844 :     if (code != decompinfo.decomps[h].codepoint)
      95           0 :         return NULL;
      96             : 
      97             :     /* Success! */
      98         844 :     return &decompinfo.decomps[h];
      99             : #else
     100         196 :     return bsearch(&(code),
     101             :                    UnicodeDecompMain,
     102             :                    lengthof(UnicodeDecompMain),
     103             :                    sizeof(pg_unicode_decomposition),
     104             :                    conv_compare);
     105             : #endif
     106             : }
     107             : 
     108             : /*
     109             :  * Get the combining class of the given codepoint.
     110             :  */
     111             : static uint8
     112        1068 : get_canonical_class(pg_wchar code)
     113             : {
     114        1068 :     const pg_unicode_decomposition *entry = get_code_entry(code);
     115             : 
     116             :     /*
     117             :      * If no entries are found, the character used is either an Hangul
     118             :      * character or a character with a class of 0 and no decompositions.
     119             :      */
     120        1068 :     if (!entry)
     121         636 :         return 0;
     122             :     else
     123         432 :         return entry->comb_class;
     124             : }
     125             : 
     126             : /*
     127             :  * Given a decomposition entry looked up earlier, get the decomposed
     128             :  * characters.
     129             :  *
     130             :  * Note: the returned pointer can point to statically allocated buffer, and
     131             :  * is only valid until next call to this function!
     132             :  */
     133             : static const pg_wchar *
     134         148 : get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
     135             : {
     136             :     static pg_wchar x;
     137             : 
     138         148 :     if (DECOMPOSITION_IS_INLINE(entry))
     139             :     {
     140             :         Assert(DECOMPOSITION_SIZE(entry) == 1);
     141          60 :         x = (pg_wchar) entry->dec_index;
     142          60 :         *dec_size = 1;
     143          60 :         return &x;
     144             :     }
     145             :     else
     146             :     {
     147          88 :         *dec_size = DECOMPOSITION_SIZE(entry);
     148          88 :         return &UnicodeDecomp_codepoints[entry->dec_index];
     149             :     }
     150             : }
     151             : 
     152             : /*
     153             :  * Calculate how many characters a given character will decompose to.
     154             :  *
     155             :  * This needs to recurse, if the character decomposes into characters that
     156             :  * are, in turn, decomposable.
     157             :  */
     158             : static int
     159         544 : get_decomposed_size(pg_wchar code, bool compat)
     160             : {
     161             :     const pg_unicode_decomposition *entry;
     162         544 :     int         size = 0;
     163             :     int         i;
     164             :     const uint32 *decomp;
     165             :     int         dec_size;
     166             : 
     167             :     /*
     168             :      * Fast path for Hangul characters not stored in tables to save memory as
     169             :      * decomposition is algorithmic. See
     170             :      * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
     171             :      * on the matter.
     172             :      */
     173         544 :     if (code >= SBASE && code < SBASE + SCOUNT)
     174             :     {
     175             :         uint32      tindex,
     176             :                     sindex;
     177             : 
     178           0 :         sindex = code - SBASE;
     179           0 :         tindex = sindex % TCOUNT;
     180             : 
     181           0 :         if (tindex != 0)
     182           0 :             return 3;
     183           0 :         return 2;
     184             :     }
     185             : 
     186         544 :     entry = get_code_entry(code);
     187             : 
     188             :     /*
     189             :      * Just count current code if no other decompositions.  A NULL entry is
     190             :      * equivalent to a character with class 0 and no decompositions.
     191             :      */
     192         544 :     if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
     193         110 :         (!compat && DECOMPOSITION_IS_COMPAT(entry)))
     194         470 :         return 1;
     195             : 
     196             :     /*
     197             :      * If this entry has other decomposition codes look at them as well. First
     198             :      * get its decomposition in the list of tables available.
     199             :      */
     200          74 :     decomp = get_code_decomposition(entry, &dec_size);
     201         192 :     for (i = 0; i < dec_size; i++)
     202             :     {
     203         118 :         uint32      lcode = decomp[i];
     204             : 
     205         118 :         size += get_decomposed_size(lcode, compat);
     206             :     }
     207             : 
     208          74 :     return size;
     209             : }
     210             : 
     211             : /*
     212             :  * Recompose a set of characters. For hangul characters, the calculation
     213             :  * is algorithmic. For others, an inverse lookup at the decomposition
     214             :  * table is necessary. Returns true if a recomposition can be done, and
     215             :  * false otherwise.
     216             :  */
     217             : static bool
     218         172 : recompose_code(uint32 start, uint32 code, uint32 *result)
     219             : {
     220             :     /*
     221             :      * Handle Hangul characters algorithmically, per the Unicode spec.
     222             :      *
     223             :      * Check if two current characters are L and V.
     224             :      */
     225         172 :     if (start >= LBASE && start < LBASE + LCOUNT &&
     226           0 :         code >= VBASE && code < VBASE + VCOUNT)
     227             :     {
     228             :         /* make syllable of form LV */
     229           0 :         uint32      lindex = start - LBASE;
     230           0 :         uint32      vindex = code - VBASE;
     231             : 
     232           0 :         *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
     233           0 :         return true;
     234             :     }
     235             :     /* Check if two current characters are LV and T */
     236         172 :     else if (start >= SBASE && start < (SBASE + SCOUNT) &&
     237           0 :              ((start - SBASE) % TCOUNT) == 0 &&
     238           0 :              code >= TBASE && code < (TBASE + TCOUNT))
     239             :     {
     240             :         /* make syllable of form LVT */
     241           0 :         uint32      tindex = code - TBASE;
     242             : 
     243           0 :         *result = start + tindex;
     244           0 :         return true;
     245             :     }
     246             :     else
     247             :     {
     248             :         const pg_unicode_decomposition *entry;
     249             : 
     250             :         /*
     251             :          * Do an inverse lookup of the decomposition tables to see if anything
     252             :          * matches. The comparison just needs to be a perfect match on the
     253             :          * sub-table of size two, because the start character has already been
     254             :          * recomposed partially.  This lookup uses a perfect hash function for
     255             :          * the backend code.
     256             :          */
     257             : #ifndef FRONTEND
     258             : 
     259             :         int         h,
     260             :                     inv_lookup_index;
     261             :         uint64      hashkey;
     262         140 :         pg_unicode_recompinfo recompinfo = UnicodeRecompInfo;
     263             : 
     264             :         /*
     265             :          * Compute the hash function. The hash key is formed by concatenating
     266             :          * bytes of the two codepoints in network order. See also
     267             :          * src/common/unicode/generate-unicode_norm_table.pl.
     268             :          */
     269         140 :         hashkey = pg_hton64(((uint64) start << 32) | (uint64) code);
     270         140 :         h = recompinfo.hash(&hashkey);
     271             : 
     272             :         /* An out-of-range result implies no match */
     273         140 :         if (h < 0 || h >= recompinfo.num_recomps)
     274         136 :             return false;
     275             : 
     276          46 :         inv_lookup_index = recompinfo.inverse_lookup[h];
     277          46 :         entry = &UnicodeDecompMain[inv_lookup_index];
     278             : 
     279          46 :         if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
     280          42 :             code == UnicodeDecomp_codepoints[entry->dec_index + 1])
     281             :         {
     282          42 :             *result = entry->codepoint;
     283          42 :             return true;
     284             :         }
     285             : 
     286             : #else
     287             : 
     288             :         int         i;
     289             : 
     290      216832 :         for (i = 0; i < lengthof(UnicodeDecompMain); i++)
     291             :         {
     292      216800 :             entry = &UnicodeDecompMain[i];
     293             : 
     294      216800 :             if (DECOMPOSITION_SIZE(entry) != 2)
     295      163232 :                 continue;
     296             : 
     297       53568 :             if (DECOMPOSITION_NO_COMPOSE(entry))
     298       23456 :                 continue;
     299             : 
     300       30112 :             if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
     301         280 :                 code == UnicodeDecomp_codepoints[entry->dec_index + 1])
     302             :             {
     303           0 :                 *result = entry->codepoint;
     304           0 :                 return true;
     305             :             }
     306             :         }
     307             : #endif                          /* !FRONTEND */
     308             :     }
     309             : 
     310          36 :     return false;
     311             : }
     312             : 
     313             : /*
     314             :  * Decompose the given code into the array given by caller. The
     315             :  * decomposition begins at the position given by caller, saving one
     316             :  * lookup on the decomposition table. The current position needs to be
     317             :  * updated here to let the caller know from where to continue filling
     318             :  * in the array result.
     319             :  */
     320             : static void
     321         544 : decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
     322             : {
     323             :     const pg_unicode_decomposition *entry;
     324             :     int         i;
     325             :     const uint32 *decomp;
     326             :     int         dec_size;
     327             : 
     328             :     /*
     329             :      * Fast path for Hangul characters not stored in tables to save memory as
     330             :      * decomposition is algorithmic. See
     331             :      * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
     332             :      * on the matter.
     333             :      */
     334         544 :     if (code >= SBASE && code < SBASE + SCOUNT)
     335             :     {
     336             :         uint32      l,
     337             :                     v,
     338             :                     tindex,
     339             :                     sindex;
     340           0 :         pg_wchar   *res = *result;
     341             : 
     342           0 :         sindex = code - SBASE;
     343           0 :         l = LBASE + sindex / (VCOUNT * TCOUNT);
     344           0 :         v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
     345           0 :         tindex = sindex % TCOUNT;
     346             : 
     347           0 :         res[*current] = l;
     348           0 :         (*current)++;
     349           0 :         res[*current] = v;
     350           0 :         (*current)++;
     351             : 
     352           0 :         if (tindex != 0)
     353             :         {
     354           0 :             res[*current] = TBASE + tindex;
     355           0 :             (*current)++;
     356             :         }
     357             : 
     358         470 :         return;
     359             :     }
     360             : 
     361         544 :     entry = get_code_entry(code);
     362             : 
     363             :     /*
     364             :      * Just fill in with the current decomposition if there are no
     365             :      * decomposition codes to recurse to.  A NULL entry is equivalent to a
     366             :      * character with class 0 and no decompositions, so just leave also in
     367             :      * this case.
     368             :      */
     369         544 :     if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
     370         110 :         (!compat && DECOMPOSITION_IS_COMPAT(entry)))
     371             :     {
     372         470 :         pg_wchar   *res = *result;
     373             : 
     374         470 :         res[*current] = code;
     375         470 :         (*current)++;
     376         470 :         return;
     377             :     }
     378             : 
     379             :     /*
     380             :      * If this entry has other decomposition codes look at them as well.
     381             :      */
     382          74 :     decomp = get_code_decomposition(entry, &dec_size);
     383         192 :     for (i = 0; i < dec_size; i++)
     384             :     {
     385         118 :         pg_wchar    lcode = (pg_wchar) decomp[i];
     386             : 
     387             :         /* Leave if no more decompositions */
     388         118 :         decompose_code(lcode, compat, result, current);
     389             :     }
     390             : }
     391             : 
     392             : /*
     393             :  * unicode_normalize - Normalize a Unicode string to the specified form.
     394             :  *
     395             :  * The input is a 0-terminated array of codepoints.
     396             :  *
     397             :  * In frontend, returns a 0-terminated array of codepoints, allocated with
     398             :  * malloc. Or NULL if we run out of memory. In backend, the returned
     399             :  * string is palloc'd instead, and OOM is reported with ereport().
     400             :  */
     401             : pg_wchar *
     402         136 : unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
     403             : {
     404         136 :     bool        compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);
     405         136 :     bool        recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);
     406             :     pg_wchar   *decomp_chars;
     407             :     pg_wchar   *recomp_chars;
     408             :     int         decomp_size,
     409             :                 current_size;
     410             :     int         count;
     411             :     const pg_wchar *p;
     412             : 
     413             :     /* variables for recomposition */
     414             :     int         last_class;
     415             :     int         starter_pos;
     416             :     int         target_pos;
     417             :     uint32      starter_ch;
     418             : 
     419             :     /* First, do character decomposition */
     420             : 
     421             :     /*
     422             :      * Calculate how many characters long the decomposed version will be.
     423             :      */
     424         136 :     decomp_size = 0;
     425         562 :     for (p = input; *p; p++)
     426         426 :         decomp_size += get_decomposed_size(*p, compat);
     427             : 
     428         136 :     decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
     429         136 :     if (decomp_chars == NULL)
     430           0 :         return NULL;
     431             : 
     432             :     /*
     433             :      * Now fill in each entry recursively. This needs a second pass on the
     434             :      * decomposition table.
     435             :      */
     436         136 :     current_size = 0;
     437         562 :     for (p = input; *p; p++)
     438         426 :         decompose_code(*p, compat, &decomp_chars, &current_size);
     439         136 :     decomp_chars[decomp_size] = '\0';
     440             :     Assert(decomp_size == current_size);
     441             : 
     442             :     /* Leave if there is nothing to decompose */
     443         136 :     if (decomp_size == 0)
     444          18 :         return decomp_chars;
     445             : 
     446             :     /*
     447             :      * Now apply canonical ordering.
     448             :      */
     449         470 :     for (count = 1; count < decomp_size; count++)
     450             :     {
     451         352 :         pg_wchar    prev = decomp_chars[count - 1];
     452         352 :         pg_wchar    next = decomp_chars[count];
     453             :         pg_wchar    tmp;
     454         352 :         const uint8 prevClass = get_canonical_class(prev);
     455         352 :         const uint8 nextClass = get_canonical_class(next);
     456             : 
     457             :         /*
     458             :          * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)
     459             :          * annex 4, a sequence of two adjacent characters in a string is an
     460             :          * exchangeable pair if the combining class (from the Unicode
     461             :          * Character Database) for the first character is greater than the
     462             :          * combining class for the second, and the second is not a starter.  A
     463             :          * character is a starter if its combining class is 0.
     464             :          */
     465         352 :         if (prevClass == 0 || nextClass == 0)
     466         352 :             continue;
     467             : 
     468           0 :         if (prevClass <= nextClass)
     469           0 :             continue;
     470             : 
     471             :         /* exchange can happen */
     472           0 :         tmp = decomp_chars[count - 1];
     473           0 :         decomp_chars[count - 1] = decomp_chars[count];
     474           0 :         decomp_chars[count] = tmp;
     475             : 
     476             :         /* backtrack to check again */
     477           0 :         if (count > 1)
     478           0 :             count -= 2;
     479             :     }
     480             : 
     481         118 :     if (!recompose)
     482          60 :         return decomp_chars;
     483             : 
     484             :     /*
     485             :      * The last phase of NFC and NFKC is the recomposition of the reordered
     486             :      * Unicode string using combining classes. The recomposed string cannot be
     487             :      * longer than the decomposed one, so make the allocation of the output
     488             :      * string based on that assumption.
     489             :      */
     490          58 :     recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
     491          58 :     if (!recomp_chars)
     492             :     {
     493           0 :         FREE(decomp_chars);
     494           0 :         return NULL;
     495             :     }
     496             : 
     497          58 :     last_class = -1;            /* this eliminates a special check */
     498          58 :     starter_pos = 0;
     499          58 :     target_pos = 1;
     500          58 :     starter_ch = recomp_chars[0] = decomp_chars[0];
     501             : 
     502         230 :     for (count = 1; count < decomp_size; count++)
     503             :     {
     504         172 :         pg_wchar    ch = decomp_chars[count];
     505         172 :         int         ch_class = get_canonical_class(ch);
     506             :         pg_wchar    composite;
     507             : 
     508         344 :         if (last_class < ch_class &&
     509         172 :             recompose_code(starter_ch, ch, &composite))
     510             :         {
     511          42 :             recomp_chars[starter_pos] = composite;
     512          42 :             starter_ch = composite;
     513             :         }
     514         130 :         else if (ch_class == 0)
     515             :         {
     516         130 :             starter_pos = target_pos;
     517         130 :             starter_ch = ch;
     518         130 :             last_class = -1;
     519         130 :             recomp_chars[target_pos++] = ch;
     520             :         }
     521             :         else
     522             :         {
     523           0 :             last_class = ch_class;
     524           0 :             recomp_chars[target_pos++] = ch;
     525             :         }
     526             :     }
     527          58 :     recomp_chars[target_pos] = (pg_wchar) '\0';
     528             : 
     529          58 :     FREE(decomp_chars);
     530             : 
     531          58 :     return recomp_chars;
     532             : }
     533             : 
     534             : /*
     535             :  * Normalization "quick check" algorithm; see
     536             :  * <http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms>
     537             :  */
     538             : 
     539             : /* We only need this in the backend. */
     540             : #ifndef FRONTEND
     541             : 
     542             : static const pg_unicode_normprops *
     543         192 : qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
     544             : {
     545             :     int         h;
     546             :     uint32      hashkey;
     547             : 
     548             :     /*
     549             :      * Compute the hash function. The hash key is the codepoint with the bytes
     550             :      * in network order.
     551             :      */
     552         192 :     hashkey = pg_hton32(ch);
     553         192 :     h = norminfo->hash(&hashkey);
     554             : 
     555             :     /* An out-of-range result implies no match */
     556         192 :     if (h < 0 || h >= norminfo->num_normprops)
     557          84 :         return NULL;
     558             : 
     559             :     /*
     560             :      * Since it's a perfect hash, we need only match to the specific codepoint
     561             :      * it identifies.
     562             :      */
     563         108 :     if (ch != norminfo->normprops[h].codepoint)
     564          72 :         return NULL;
     565             : 
     566             :     /* Success! */
     567          36 :     return &norminfo->normprops[h];
     568             : }
     569             : 
     570             : /*
     571             :  * Look up the normalization quick check character property
     572             :  */
     573             : static UnicodeNormalizationQC
     574         192 : qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
     575             : {
     576         192 :     const pg_unicode_normprops *found = NULL;
     577             : 
     578         192 :     switch (form)
     579             :     {
     580         120 :         case UNICODE_NFC:
     581         120 :             found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC);
     582         120 :             break;
     583          72 :         case UNICODE_NFKC:
     584          72 :             found = qc_hash_lookup(ch, &UnicodeNormInfo_NFKC_QC);
     585          72 :             break;
     586           0 :         default:
     587             :             Assert(false);
     588           0 :             break;
     589             :     }
     590             : 
     591         192 :     if (found)
     592          36 :         return found->quickcheck;
     593             :     else
     594         156 :         return UNICODE_NORM_QC_YES;
     595             : }
     596             : 
     597             : UnicodeNormalizationQC
     598         132 : unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)
     599             : {
     600         132 :     uint8       lastCanonicalClass = 0;
     601         132 :     UnicodeNormalizationQC result = UNICODE_NORM_QC_YES;
     602             : 
     603             :     /*
     604             :      * For the "D" forms, we don't run the quickcheck.  We don't include the
     605             :      * lookup tables for those because they are huge, checking for these
     606             :      * particular forms is less common, and running the slow path is faster
     607             :      * for the "D" forms than the "C" forms because you don't need to
     608             :      * recompose, which is slow.
     609             :      */
     610         132 :     if (form == UNICODE_NFD || form == UNICODE_NFKD)
     611          60 :         return UNICODE_NORM_QC_MAYBE;
     612             : 
     613         252 :     for (const pg_wchar *p = input; *p; p++)
     614             :     {
     615         192 :         pg_wchar    ch = *p;
     616             :         uint8       canonicalClass;
     617             :         UnicodeNormalizationQC check;
     618             : 
     619         192 :         canonicalClass = get_canonical_class(ch);
     620         192 :         if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
     621           0 :             return UNICODE_NORM_QC_NO;
     622             : 
     623         192 :         check = qc_is_allowed(form, ch);
     624         192 :         if (check == UNICODE_NORM_QC_NO)
     625          12 :             return UNICODE_NORM_QC_NO;
     626         180 :         else if (check == UNICODE_NORM_QC_MAYBE)
     627          24 :             result = UNICODE_NORM_QC_MAYBE;
     628             : 
     629         180 :         lastCanonicalClass = canonicalClass;
     630             :     }
     631          60 :     return result;
     632             : }
     633             : 
     634             : #endif                          /* !FRONTEND */

Generated by: LCOV version 1.14