LCOV - code coverage report
Current view: top level - src/backend/utils/mb - conv.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 82 288 28.5 %
Date: 2019-09-19 02:07:14 Functions: 7 11 63.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  *    Utility functions for conversion procs.
       4             :  *
       5             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       6             :  * Portions Copyright (c) 1994, Regents of the University of California
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/mb/conv.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : #include "postgres.h"
      14             : #include "mb/pg_wchar.h"
      15             : 
      16             : 
      17             : /*
      18             :  * local2local: a generic single byte charset encoding
      19             :  * conversion between two ASCII-superset encodings.
      20             :  *
      21             :  * l points to the source string of length len
      22             :  * p is the output area (must be large enough!)
      23             :  * src_encoding is the PG identifier for the source encoding
      24             :  * dest_encoding is the PG identifier for the target encoding
      25             :  * tab holds conversion entries for the source charset
      26             :  * starting from 128 (0x80). each entry in the table holds the corresponding
      27             :  * code point for the target charset, or 0 if there is no equivalent code.
      28             :  */
      29             : void
      30          56 : local2local(const unsigned char *l,
      31             :             unsigned char *p,
      32             :             int len,
      33             :             int src_encoding,
      34             :             int dest_encoding,
      35             :             const unsigned char *tab)
      36             : {
      37             :     unsigned char c1,
      38             :                 c2;
      39             : 
      40         280 :     while (len > 0)
      41             :     {
      42         168 :         c1 = *l;
      43         168 :         if (c1 == 0)
      44           0 :             report_invalid_encoding(src_encoding, (const char *) l, len);
      45         168 :         if (!IS_HIGHBIT_SET(c1))
      46         168 :             *p++ = c1;
      47             :         else
      48             :         {
      49           0 :             c2 = tab[c1 - HIGHBIT];
      50           0 :             if (c2)
      51           0 :                 *p++ = c2;
      52             :             else
      53           0 :                 report_untranslatable_char(src_encoding, dest_encoding,
      54             :                                            (const char *) l, len);
      55             :         }
      56         168 :         l++;
      57         168 :         len--;
      58             :     }
      59          56 :     *p = '\0';
      60          56 : }
      61             : 
      62             : /*
      63             :  * LATINn ---> MIC when the charset's local codes map directly to MIC
      64             :  *
      65             :  * l points to the source string of length len
      66             :  * p is the output area (must be large enough!)
      67             :  * lc is the mule character set id for the local encoding
      68             :  * encoding is the PG identifier for the local encoding
      69             :  */
      70             : void
      71          20 : latin2mic(const unsigned char *l, unsigned char *p, int len,
      72             :           int lc, int encoding)
      73             : {
      74             :     int         c1;
      75             : 
      76         100 :     while (len > 0)
      77             :     {
      78          60 :         c1 = *l;
      79          60 :         if (c1 == 0)
      80           0 :             report_invalid_encoding(encoding, (const char *) l, len);
      81          60 :         if (IS_HIGHBIT_SET(c1))
      82           0 :             *p++ = lc;
      83          60 :         *p++ = c1;
      84          60 :         l++;
      85          60 :         len--;
      86             :     }
      87          20 :     *p = '\0';
      88          20 : }
      89             : 
      90             : /*
      91             :  * MIC ---> LATINn when the charset's local codes map directly to MIC
      92             :  *
      93             :  * mic points to the source string of length len
      94             :  * p is the output area (must be large enough!)
      95             :  * lc is the mule character set id for the local encoding
      96             :  * encoding is the PG identifier for the local encoding
      97             :  */
      98             : void
      99          20 : mic2latin(const unsigned char *mic, unsigned char *p, int len,
     100             :           int lc, int encoding)
     101             : {
     102             :     int         c1;
     103             : 
     104         100 :     while (len > 0)
     105             :     {
     106          60 :         c1 = *mic;
     107          60 :         if (c1 == 0)
     108           0 :             report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
     109          60 :         if (!IS_HIGHBIT_SET(c1))
     110             :         {
     111             :             /* easy for ASCII */
     112          60 :             *p++ = c1;
     113          60 :             mic++;
     114          60 :             len--;
     115             :         }
     116             :         else
     117             :         {
     118           0 :             int         l = pg_mic_mblen(mic);
     119             : 
     120           0 :             if (len < l)
     121           0 :                 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
     122             :                                         len);
     123           0 :             if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
     124           0 :                 report_untranslatable_char(PG_MULE_INTERNAL, encoding,
     125             :                                            (const char *) mic, len);
     126           0 :             *p++ = mic[1];
     127           0 :             mic += 2;
     128           0 :             len -= 2;
     129             :         }
     130             :     }
     131          20 :     *p = '\0';
     132          20 : }
     133             : 
     134             : 
     135             : /*
     136             :  * latin2mic_with_table: a generic single byte charset encoding
     137             :  * conversion from a local charset to the mule internal code.
     138             :  *
     139             :  * l points to the source string of length len
     140             :  * p is the output area (must be large enough!)
     141             :  * lc is the mule character set id for the local encoding
     142             :  * encoding is the PG identifier for the local encoding
     143             :  * tab holds conversion entries for the local charset
     144             :  * starting from 128 (0x80). each entry in the table holds the corresponding
     145             :  * code point for the mule encoding, or 0 if there is no equivalent code.
     146             :  */
     147             : void
     148          16 : latin2mic_with_table(const unsigned char *l,
     149             :                      unsigned char *p,
     150             :                      int len,
     151             :                      int lc,
     152             :                      int encoding,
     153             :                      const unsigned char *tab)
     154             : {
     155             :     unsigned char c1,
     156             :                 c2;
     157             : 
     158          80 :     while (len > 0)
     159             :     {
     160          48 :         c1 = *l;
     161          48 :         if (c1 == 0)
     162           0 :             report_invalid_encoding(encoding, (const char *) l, len);
     163          48 :         if (!IS_HIGHBIT_SET(c1))
     164          48 :             *p++ = c1;
     165             :         else
     166             :         {
     167           0 :             c2 = tab[c1 - HIGHBIT];
     168           0 :             if (c2)
     169             :             {
     170           0 :                 *p++ = lc;
     171           0 :                 *p++ = c2;
     172             :             }
     173             :             else
     174           0 :                 report_untranslatable_char(encoding, PG_MULE_INTERNAL,
     175             :                                            (const char *) l, len);
     176             :         }
     177          48 :         l++;
     178          48 :         len--;
     179             :     }
     180          16 :     *p = '\0';
     181          16 : }
     182             : 
     183             : /*
     184             :  * mic2latin_with_table: a generic single byte charset encoding
     185             :  * conversion from the mule internal code to a local charset.
     186             :  *
     187             :  * mic points to the source string of length len
     188             :  * p is the output area (must be large enough!)
     189             :  * lc is the mule character set id for the local encoding
     190             :  * encoding is the PG identifier for the local encoding
     191             :  * tab holds conversion entries for the mule internal code's second byte,
     192             :  * starting from 128 (0x80). each entry in the table holds the corresponding
     193             :  * code point for the local charset, or 0 if there is no equivalent code.
     194             :  */
     195             : void
     196          16 : mic2latin_with_table(const unsigned char *mic,
     197             :                      unsigned char *p,
     198             :                      int len,
     199             :                      int lc,
     200             :                      int encoding,
     201             :                      const unsigned char *tab)
     202             : {
     203             :     unsigned char c1,
     204             :                 c2;
     205             : 
     206          80 :     while (len > 0)
     207             :     {
     208          48 :         c1 = *mic;
     209          48 :         if (c1 == 0)
     210           0 :             report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
     211          48 :         if (!IS_HIGHBIT_SET(c1))
     212             :         {
     213             :             /* easy for ASCII */
     214          48 :             *p++ = c1;
     215          48 :             mic++;
     216          48 :             len--;
     217             :         }
     218             :         else
     219             :         {
     220           0 :             int         l = pg_mic_mblen(mic);
     221             : 
     222           0 :             if (len < l)
     223           0 :                 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
     224             :                                         len);
     225           0 :             if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
     226           0 :                 (c2 = tab[mic[1] - HIGHBIT]) == 0)
     227             :             {
     228           0 :                 report_untranslatable_char(PG_MULE_INTERNAL, encoding,
     229             :                                            (const char *) mic, len);
     230             :                 break;          /* keep compiler quiet */
     231             :             }
     232           0 :             *p++ = c2;
     233           0 :             mic += 2;
     234           0 :             len -= 2;
     235             :         }
     236             :     }
     237          16 :     *p = '\0';
     238          16 : }
     239             : 
     240             : /*
     241             :  * comparison routine for bsearch()
     242             :  * this routine is intended for combined UTF8 -> local code
     243             :  */
     244             : static int
     245           0 : compare3(const void *p1, const void *p2)
     246             : {
     247             :     uint32      s1,
     248             :                 s2,
     249             :                 d1,
     250             :                 d2;
     251             : 
     252           0 :     s1 = *(const uint32 *) p1;
     253           0 :     s2 = *((const uint32 *) p1 + 1);
     254           0 :     d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
     255           0 :     d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
     256           0 :     return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
     257             : }
     258             : 
     259             : /*
     260             :  * comparison routine for bsearch()
     261             :  * this routine is intended for local code -> combined UTF8
     262             :  */
     263             : static int
     264           0 : compare4(const void *p1, const void *p2)
     265             : {
     266             :     uint32      v1,
     267             :                 v2;
     268             : 
     269           0 :     v1 = *(const uint32 *) p1;
     270           0 :     v2 = ((const pg_local_to_utf_combined *) p2)->code;
     271           0 :     return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
     272             : }
     273             : 
     274             : /*
     275             :  * store 32bit character representation into multibyte stream
     276             :  */
     277             : static inline unsigned char *
     278           0 : store_coded_char(unsigned char *dest, uint32 code)
     279             : {
     280           0 :     if (code & 0xff000000)
     281           0 :         *dest++ = code >> 24;
     282           0 :     if (code & 0x00ff0000)
     283           0 :         *dest++ = code >> 16;
     284           0 :     if (code & 0x0000ff00)
     285           0 :         *dest++ = code >> 8;
     286           0 :     if (code & 0x000000ff)
     287           0 :         *dest++ = code;
     288           0 :     return dest;
     289             : }
     290             : 
     291             : /*
     292             :  * Convert a character using a conversion radix tree.
     293             :  *
     294             :  * 'l' is the length of the input character in bytes, and b1-b4 are
     295             :  * the input character's bytes.
     296             :  */
     297             : static inline uint32
     298           0 : pg_mb_radix_conv(const pg_mb_radix_tree *rt,
     299             :                  int l,
     300             :                  unsigned char b1,
     301             :                  unsigned char b2,
     302             :                  unsigned char b3,
     303             :                  unsigned char b4)
     304             : {
     305           0 :     if (l == 4)
     306             :     {
     307             :         /* 4-byte code */
     308             : 
     309             :         /* check code validity */
     310           0 :         if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
     311           0 :             b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
     312           0 :             b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
     313           0 :             b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
     314           0 :             return 0;
     315             : 
     316             :         /* perform lookup */
     317           0 :         if (rt->chars32)
     318             :         {
     319           0 :             uint32      idx = rt->b4root;
     320             : 
     321           0 :             idx = rt->chars32[b1 + idx - rt->b4_1_lower];
     322           0 :             idx = rt->chars32[b2 + idx - rt->b4_2_lower];
     323           0 :             idx = rt->chars32[b3 + idx - rt->b4_3_lower];
     324           0 :             return rt->chars32[b4 + idx - rt->b4_4_lower];
     325             :         }
     326             :         else
     327             :         {
     328           0 :             uint16      idx = rt->b4root;
     329             : 
     330           0 :             idx = rt->chars16[b1 + idx - rt->b4_1_lower];
     331           0 :             idx = rt->chars16[b2 + idx - rt->b4_2_lower];
     332           0 :             idx = rt->chars16[b3 + idx - rt->b4_3_lower];
     333           0 :             return rt->chars16[b4 + idx - rt->b4_4_lower];
     334             :         }
     335             :     }
     336           0 :     else if (l == 3)
     337             :     {
     338             :         /* 3-byte code */
     339             : 
     340             :         /* check code validity */
     341           0 :         if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
     342           0 :             b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
     343           0 :             b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
     344           0 :             return 0;
     345             : 
     346             :         /* perform lookup */
     347           0 :         if (rt->chars32)
     348             :         {
     349           0 :             uint32      idx = rt->b3root;
     350             : 
     351           0 :             idx = rt->chars32[b2 + idx - rt->b3_1_lower];
     352           0 :             idx = rt->chars32[b3 + idx - rt->b3_2_lower];
     353           0 :             return rt->chars32[b4 + idx - rt->b3_3_lower];
     354             :         }
     355             :         else
     356             :         {
     357           0 :             uint16      idx = rt->b3root;
     358             : 
     359           0 :             idx = rt->chars16[b2 + idx - rt->b3_1_lower];
     360           0 :             idx = rt->chars16[b3 + idx - rt->b3_2_lower];
     361           0 :             return rt->chars16[b4 + idx - rt->b3_3_lower];
     362             :         }
     363             :     }
     364           0 :     else if (l == 2)
     365             :     {
     366             :         /* 2-byte code */
     367             : 
     368             :         /* check code validity - first byte */
     369           0 :         if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
     370           0 :             b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
     371           0 :             return 0;
     372             : 
     373             :         /* perform lookup */
     374           0 :         if (rt->chars32)
     375             :         {
     376           0 :             uint32      idx = rt->b2root;
     377             : 
     378           0 :             idx = rt->chars32[b3 + idx - rt->b2_1_lower];
     379           0 :             return rt->chars32[b4 + idx - rt->b2_2_lower];
     380             :         }
     381             :         else
     382             :         {
     383           0 :             uint16      idx = rt->b2root;
     384             : 
     385           0 :             idx = rt->chars16[b3 + idx - rt->b2_1_lower];
     386           0 :             return rt->chars16[b4 + idx - rt->b2_2_lower];
     387             :         }
     388             :     }
     389           0 :     else if (l == 1)
     390             :     {
     391             :         /* 1-byte code */
     392             : 
     393             :         /* check code validity - first byte */
     394           0 :         if (b4 < rt->b1_lower || b4 > rt->b1_upper)
     395           0 :             return 0;
     396             : 
     397             :         /* perform lookup */
     398           0 :         if (rt->chars32)
     399           0 :             return rt->chars32[b4 + rt->b1root - rt->b1_lower];
     400             :         else
     401           0 :             return rt->chars16[b4 + rt->b1root - rt->b1_lower];
     402             :     }
     403           0 :     return 0;                   /* shouldn't happen */
     404             : }
     405             : 
     406             : /*
     407             :  * UTF8 ---> local code
     408             :  *
     409             :  * utf: input string in UTF8 encoding (need not be null-terminated)
     410             :  * len: length of input string (in bytes)
     411             :  * iso: pointer to the output area (must be large enough!)
     412             :           (output string will be null-terminated)
     413             :  * map: conversion map for single characters
     414             :  * cmap: conversion map for combined characters
     415             :  *        (optional, pass NULL if none)
     416             :  * cmapsize: number of entries in the conversion map for combined characters
     417             :  *        (optional, pass 0 if none)
     418             :  * conv_func: algorithmic encoding conversion function
     419             :  *        (optional, pass NULL if none)
     420             :  * encoding: PG identifier for the local encoding
     421             :  *
     422             :  * For each character, the cmap (if provided) is consulted first; if no match,
     423             :  * the map is consulted next; if still no match, the conv_func (if provided)
     424             :  * is applied.  An error is raised if no match is found.
     425             :  *
     426             :  * See pg_wchar.h for more details about the data structures used here.
     427             :  */
     428             : void
     429         152 : UtfToLocal(const unsigned char *utf, int len,
     430             :            unsigned char *iso,
     431             :            const pg_mb_radix_tree *map,
     432             :            const pg_utf_to_local_combined *cmap, int cmapsize,
     433             :            utf_local_conversion_func conv_func,
     434             :            int encoding)
     435             : {
     436             :     uint32      iutf;
     437             :     int         l;
     438             :     const pg_utf_to_local_combined *cp;
     439             : 
     440         152 :     if (!PG_VALID_ENCODING(encoding))
     441           0 :         ereport(ERROR,
     442             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     443             :                  errmsg("invalid encoding number: %d", encoding)));
     444             : 
     445         608 :     for (; len > 0; len -= l)
     446             :     {
     447         456 :         unsigned char b1 = 0;
     448         456 :         unsigned char b2 = 0;
     449         456 :         unsigned char b3 = 0;
     450         456 :         unsigned char b4 = 0;
     451             : 
     452             :         /* "break" cases all represent errors */
     453         456 :         if (*utf == '\0')
     454           0 :             break;
     455             : 
     456         456 :         l = pg_utf_mblen(utf);
     457         456 :         if (len < l)
     458           0 :             break;
     459             : 
     460         456 :         if (!pg_utf8_islegal(utf, l))
     461           0 :             break;
     462             : 
     463         456 :         if (l == 1)
     464             :         {
     465             :             /* ASCII case is easy, assume it's one-to-one conversion */
     466         456 :             *iso++ = *utf++;
     467         456 :             continue;
     468             :         }
     469             : 
     470             :         /* collect coded char of length l */
     471           0 :         if (l == 2)
     472             :         {
     473           0 :             b3 = *utf++;
     474           0 :             b4 = *utf++;
     475             :         }
     476           0 :         else if (l == 3)
     477             :         {
     478           0 :             b2 = *utf++;
     479           0 :             b3 = *utf++;
     480           0 :             b4 = *utf++;
     481             :         }
     482           0 :         else if (l == 4)
     483             :         {
     484           0 :             b1 = *utf++;
     485           0 :             b2 = *utf++;
     486           0 :             b3 = *utf++;
     487           0 :             b4 = *utf++;
     488             :         }
     489             :         else
     490             :         {
     491           0 :             elog(ERROR, "unsupported character length %d", l);
     492             :             iutf = 0;           /* keep compiler quiet */
     493             :         }
     494           0 :         iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
     495             : 
     496             :         /* First, try with combined map if possible */
     497           0 :         if (cmap && len > l)
     498             :         {
     499           0 :             const unsigned char *utf_save = utf;
     500           0 :             int         len_save = len;
     501           0 :             int         l_save = l;
     502             : 
     503             :             /* collect next character, same as above */
     504           0 :             len -= l;
     505             : 
     506           0 :             l = pg_utf_mblen(utf);
     507           0 :             if (len < l)
     508           0 :                 break;
     509             : 
     510           0 :             if (!pg_utf8_islegal(utf, l))
     511           0 :                 break;
     512             : 
     513             :             /* We assume ASCII character cannot be in combined map */
     514           0 :             if (l > 1)
     515             :             {
     516             :                 uint32      iutf2;
     517             :                 uint32      cutf[2];
     518             : 
     519           0 :                 if (l == 2)
     520             :                 {
     521           0 :                     iutf2 = *utf++ << 8;
     522           0 :                     iutf2 |= *utf++;
     523             :                 }
     524           0 :                 else if (l == 3)
     525             :                 {
     526           0 :                     iutf2 = *utf++ << 16;
     527           0 :                     iutf2 |= *utf++ << 8;
     528           0 :                     iutf2 |= *utf++;
     529             :                 }
     530           0 :                 else if (l == 4)
     531             :                 {
     532           0 :                     iutf2 = *utf++ << 24;
     533           0 :                     iutf2 |= *utf++ << 16;
     534           0 :                     iutf2 |= *utf++ << 8;
     535           0 :                     iutf2 |= *utf++;
     536             :                 }
     537             :                 else
     538             :                 {
     539           0 :                     elog(ERROR, "unsupported character length %d", l);
     540             :                     iutf2 = 0;  /* keep compiler quiet */
     541             :                 }
     542             : 
     543           0 :                 cutf[0] = iutf;
     544           0 :                 cutf[1] = iutf2;
     545             : 
     546           0 :                 cp = bsearch(cutf, cmap, cmapsize,
     547             :                              sizeof(pg_utf_to_local_combined), compare3);
     548             : 
     549           0 :                 if (cp)
     550             :                 {
     551           0 :                     iso = store_coded_char(iso, cp->code);
     552           0 :                     continue;
     553             :                 }
     554             :             }
     555             : 
     556             :             /* fail, so back up to reprocess second character next time */
     557           0 :             utf = utf_save;
     558           0 :             len = len_save;
     559           0 :             l = l_save;
     560             :         }
     561             : 
     562             :         /* Now check ordinary map */
     563           0 :         if (map)
     564             :         {
     565           0 :             uint32      converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
     566             : 
     567           0 :             if (converted)
     568             :             {
     569           0 :                 iso = store_coded_char(iso, converted);
     570           0 :                 continue;
     571             :             }
     572             :         }
     573             : 
     574             :         /* if there's a conversion function, try that */
     575           0 :         if (conv_func)
     576             :         {
     577           0 :             uint32      converted = (*conv_func) (iutf);
     578             : 
     579           0 :             if (converted)
     580             :             {
     581           0 :                 iso = store_coded_char(iso, converted);
     582           0 :                 continue;
     583             :             }
     584             :         }
     585             : 
     586             :         /* failed to translate this character */
     587           0 :         report_untranslatable_char(PG_UTF8, encoding,
     588           0 :                                    (const char *) (utf - l), len);
     589             :     }
     590             : 
     591             :     /* if we broke out of loop early, must be invalid input */
     592         152 :     if (len > 0)
     593           0 :         report_invalid_encoding(PG_UTF8, (const char *) utf, len);
     594             : 
     595         152 :     *iso = '\0';
     596         152 : }
     597             : 
     598             : /*
     599             :  * local code ---> UTF8
     600             :  *
     601             :  * iso: input string in local encoding (need not be null-terminated)
     602             :  * len: length of input string (in bytes)
     603             :  * utf: pointer to the output area (must be large enough!)
     604             :           (output string will be null-terminated)
     605             :  * map: conversion map for single characters
     606             :  * cmap: conversion map for combined characters
     607             :  *        (optional, pass NULL if none)
     608             :  * cmapsize: number of entries in the conversion map for combined characters
     609             :  *        (optional, pass 0 if none)
     610             :  * conv_func: algorithmic encoding conversion function
     611             :  *        (optional, pass NULL if none)
     612             :  * encoding: PG identifier for the local encoding
     613             :  *
     614             :  * For each character, the map is consulted first; if no match, the cmap
     615             :  * (if provided) is consulted next; if still no match, the conv_func
     616             :  * (if provided) is applied.  An error is raised if no match is found.
     617             :  *
     618             :  * See pg_wchar.h for more details about the data structures used here.
     619             :  */
     620             : void
     621         152 : LocalToUtf(const unsigned char *iso, int len,
     622             :            unsigned char *utf,
     623             :            const pg_mb_radix_tree *map,
     624             :            const pg_local_to_utf_combined *cmap, int cmapsize,
     625             :            utf_local_conversion_func conv_func,
     626             :            int encoding)
     627             : {
     628             :     uint32      iiso;
     629             :     int         l;
     630             :     const pg_local_to_utf_combined *cp;
     631             : 
     632         152 :     if (!PG_VALID_ENCODING(encoding))
     633           0 :         ereport(ERROR,
     634             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     635             :                  errmsg("invalid encoding number: %d", encoding)));
     636             : 
     637         608 :     for (; len > 0; len -= l)
     638             :     {
     639         456 :         unsigned char b1 = 0;
     640         456 :         unsigned char b2 = 0;
     641         456 :         unsigned char b3 = 0;
     642         456 :         unsigned char b4 = 0;
     643             : 
     644             :         /* "break" cases all represent errors */
     645         456 :         if (*iso == '\0')
     646           0 :             break;
     647             : 
     648         456 :         if (!IS_HIGHBIT_SET(*iso))
     649             :         {
     650             :             /* ASCII case is easy, assume it's one-to-one conversion */
     651         456 :             *utf++ = *iso++;
     652         456 :             l = 1;
     653         456 :             continue;
     654             :         }
     655             : 
     656           0 :         l = pg_encoding_verifymb(encoding, (const char *) iso, len);
     657           0 :         if (l < 0)
     658           0 :             break;
     659             : 
     660             :         /* collect coded char of length l */
     661           0 :         if (l == 1)
     662           0 :             b4 = *iso++;
     663           0 :         else if (l == 2)
     664             :         {
     665           0 :             b3 = *iso++;
     666           0 :             b4 = *iso++;
     667             :         }
     668           0 :         else if (l == 3)
     669             :         {
     670           0 :             b2 = *iso++;
     671           0 :             b3 = *iso++;
     672           0 :             b4 = *iso++;
     673             :         }
     674           0 :         else if (l == 4)
     675             :         {
     676           0 :             b1 = *iso++;
     677           0 :             b2 = *iso++;
     678           0 :             b3 = *iso++;
     679           0 :             b4 = *iso++;
     680             :         }
     681             :         else
     682             :         {
     683           0 :             elog(ERROR, "unsupported character length %d", l);
     684             :             iiso = 0;           /* keep compiler quiet */
     685             :         }
     686           0 :         iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
     687             : 
     688           0 :         if (map)
     689             :         {
     690           0 :             uint32      converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
     691             : 
     692           0 :             if (converted)
     693             :             {
     694           0 :                 utf = store_coded_char(utf, converted);
     695           0 :                 continue;
     696             :             }
     697             : 
     698             :             /* If there's a combined character map, try that */
     699           0 :             if (cmap)
     700             :             {
     701           0 :                 cp = bsearch(&iiso, cmap, cmapsize,
     702             :                              sizeof(pg_local_to_utf_combined), compare4);
     703             : 
     704           0 :                 if (cp)
     705             :                 {
     706           0 :                     utf = store_coded_char(utf, cp->utf1);
     707           0 :                     utf = store_coded_char(utf, cp->utf2);
     708           0 :                     continue;
     709             :                 }
     710             :             }
     711             :         }
     712             : 
     713             :         /* if there's a conversion function, try that */
     714           0 :         if (conv_func)
     715             :         {
     716           0 :             uint32      converted = (*conv_func) (iiso);
     717             : 
     718           0 :             if (converted)
     719             :             {
     720           0 :                 utf = store_coded_char(utf, converted);
     721           0 :                 continue;
     722             :             }
     723             :         }
     724             : 
     725             :         /* failed to translate this character */
     726           0 :         report_untranslatable_char(encoding, PG_UTF8,
     727           0 :                                    (const char *) (iso - l), len);
     728             :     }
     729             : 
     730             :     /* if we broke out of loop early, must be invalid input */
     731         152 :     if (len > 0)
     732           0 :         report_invalid_encoding(encoding, (const char *) iso, len);
     733             : 
     734         152 :     *utf = '\0';
     735         152 : }

Generated by: LCOV version 1.13