LCOV - code coverage report
Current view: top level - src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1 - utf8_and_iso8859_1.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 58 61 95.1 %
Date: 2025-04-01 16:15:31 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  *    ISO8859_1 <--> UTF8
       4             :  *
       5             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       6             :  * Portions Copyright (c) 1994, Regents of the University of California
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : 
      14             : #include "postgres.h"
      15             : #include "fmgr.h"
      16             : #include "mb/pg_wchar.h"
      17             : 
      18         248 : PG_MODULE_MAGIC_EXT(
      19             :                     .name = "utf8_and_iso8859_1",
      20             :                     .version = PG_VERSION
      21             : );
      22             : 
      23          48 : PG_FUNCTION_INFO_V1(iso8859_1_to_utf8);
      24         226 : PG_FUNCTION_INFO_V1(utf8_to_iso8859_1);
      25             : 
      26             : /* ----------
      27             :  * conv_proc(
      28             :  *      INTEGER,    -- source encoding id
      29             :  *      INTEGER,    -- destination encoding id
      30             :  *      CSTRING,    -- source string (null terminated C string)
      31             :  *      CSTRING,    -- destination string (null terminated C string)
      32             :  *      INTEGER,    -- source string length
      33             :  *      BOOL        -- if true, don't throw an error if conversion fails
      34             :  * ) returns INTEGER;
      35             :  *
      36             :  * Returns the number of bytes successfully converted.
      37             :  * ----------
      38             :  */
      39             : 
      40             : Datum
      41         130 : iso8859_1_to_utf8(PG_FUNCTION_ARGS)
      42             : {
      43         130 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      44         130 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
      45         130 :     int         len = PG_GETARG_INT32(4);
      46         130 :     bool        noError = PG_GETARG_BOOL(5);
      47         130 :     unsigned char *start = src;
      48             :     unsigned short c;
      49             : 
      50         130 :     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
      51             : 
      52        5474 :     while (len > 0)
      53             :     {
      54        5344 :         c = *src;
      55        5344 :         if (c == 0)
      56             :         {
      57           0 :             if (noError)
      58           0 :                 break;
      59           0 :             report_invalid_encoding(PG_LATIN1, (const char *) src, len);
      60             :         }
      61        5344 :         if (!IS_HIGHBIT_SET(c))
      62        5308 :             *dest++ = c;
      63             :         else
      64             :         {
      65          36 :             *dest++ = (c >> 6) | 0xc0;
      66          36 :             *dest++ = (c & 0x003f) | HIGHBIT;
      67             :         }
      68        5344 :         src++;
      69        5344 :         len--;
      70             :     }
      71         130 :     *dest = '\0';
      72             : 
      73         130 :     PG_RETURN_INT32(src - start);
      74             : }
      75             : 
      76             : Datum
      77         762 : utf8_to_iso8859_1(PG_FUNCTION_ARGS)
      78             : {
      79         762 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      80         762 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
      81         762 :     int         len = PG_GETARG_INT32(4);
      82         762 :     bool        noError = PG_GETARG_BOOL(5);
      83         762 :     unsigned char *start = src;
      84             :     unsigned short c,
      85             :                 c1;
      86             : 
      87         762 :     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_LATIN1);
      88             : 
      89        4478 :     while (len > 0)
      90             :     {
      91        4112 :         c = *src;
      92        4112 :         if (c == 0)
      93             :         {
      94          36 :             if (noError)
      95          18 :                 break;
      96          18 :             report_invalid_encoding(PG_UTF8, (const char *) src, len);
      97             :         }
      98             :         /* fast path for ASCII-subset characters */
      99        4076 :         if (!IS_HIGHBIT_SET(c))
     100             :         {
     101        3678 :             *dest++ = c;
     102        3678 :             src++;
     103        3678 :             len--;
     104             :         }
     105             :         else
     106             :         {
     107         398 :             int         l = pg_utf_mblen(src);
     108             : 
     109         398 :             if (l > len || !pg_utf8_islegal(src, l))
     110             :             {
     111         108 :                 if (noError)
     112          54 :                     break;
     113          54 :                 report_invalid_encoding(PG_UTF8, (const char *) src, len);
     114             :             }
     115         290 :             if (l != 2)
     116             :             {
     117         216 :                 if (noError)
     118         108 :                     break;
     119         108 :                 report_untranslatable_char(PG_UTF8, PG_LATIN1,
     120             :                                            (const char *) src, len);
     121             :             }
     122          74 :             c1 = src[1] & 0x3f;
     123          74 :             c = ((c & 0x1f) << 6) | c1;
     124          74 :             if (c >= 0x80 && c <= 0xff)
     125             :             {
     126          38 :                 *dest++ = (unsigned char) c;
     127          38 :                 src += 2;
     128          38 :                 len -= 2;
     129             :             }
     130             :             else
     131             :             {
     132          36 :                 if (noError)
     133          18 :                     break;
     134          18 :                 report_untranslatable_char(PG_UTF8, PG_LATIN1,
     135             :                                            (const char *) src, len);
     136             :             }
     137             :         }
     138             :     }
     139         564 :     *dest = '\0';
     140             : 
     141         564 :     PG_RETURN_INT32(src - start);
     142             : }

Generated by: LCOV version 1.14