LCOV - code coverage report
Current view: top level - src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1 - utf8_and_iso8859_1.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 58 61 95.1 %
Date: 2025-01-18 04:15:08 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  *    ISO8859_1 <--> UTF8
       4             :  *
       5             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       6             :  * Portions Copyright (c) 1994, Regents of the University of California
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : 
      14             : #include "postgres.h"
      15             : #include "fmgr.h"
      16             : #include "mb/pg_wchar.h"
      17             : 
      18         246 : PG_MODULE_MAGIC;
      19             : 
      20          48 : PG_FUNCTION_INFO_V1(iso8859_1_to_utf8);
      21         224 : PG_FUNCTION_INFO_V1(utf8_to_iso8859_1);
      22             : 
      23             : /* ----------
      24             :  * conv_proc(
      25             :  *      INTEGER,    -- source encoding id
      26             :  *      INTEGER,    -- destination encoding id
      27             :  *      CSTRING,    -- source string (null terminated C string)
      28             :  *      CSTRING,    -- destination string (null terminated C string)
      29             :  *      INTEGER,    -- source string length
      30             :  *      BOOL        -- if true, don't throw an error if conversion fails
      31             :  * ) returns INTEGER;
      32             :  *
      33             :  * Returns the number of bytes successfully converted.
      34             :  * ----------
      35             :  */
      36             : 
      37             : Datum
      38         130 : iso8859_1_to_utf8(PG_FUNCTION_ARGS)
      39             : {
      40         130 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      41         130 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
      42         130 :     int         len = PG_GETARG_INT32(4);
      43         130 :     bool        noError = PG_GETARG_BOOL(5);
      44         130 :     unsigned char *start = src;
      45             :     unsigned short c;
      46             : 
      47         130 :     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
      48             : 
      49        5474 :     while (len > 0)
      50             :     {
      51        5344 :         c = *src;
      52        5344 :         if (c == 0)
      53             :         {
      54           0 :             if (noError)
      55           0 :                 break;
      56           0 :             report_invalid_encoding(PG_LATIN1, (const char *) src, len);
      57             :         }
      58        5344 :         if (!IS_HIGHBIT_SET(c))
      59        5308 :             *dest++ = c;
      60             :         else
      61             :         {
      62          36 :             *dest++ = (c >> 6) | 0xc0;
      63          36 :             *dest++ = (c & 0x003f) | HIGHBIT;
      64             :         }
      65        5344 :         src++;
      66        5344 :         len--;
      67             :     }
      68         130 :     *dest = '\0';
      69             : 
      70         130 :     PG_RETURN_INT32(src - start);
      71             : }
      72             : 
      73             : Datum
      74         762 : utf8_to_iso8859_1(PG_FUNCTION_ARGS)
      75             : {
      76         762 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      77         762 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
      78         762 :     int         len = PG_GETARG_INT32(4);
      79         762 :     bool        noError = PG_GETARG_BOOL(5);
      80         762 :     unsigned char *start = src;
      81             :     unsigned short c,
      82             :                 c1;
      83             : 
      84         762 :     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_LATIN1);
      85             : 
      86        4478 :     while (len > 0)
      87             :     {
      88        4112 :         c = *src;
      89        4112 :         if (c == 0)
      90             :         {
      91          36 :             if (noError)
      92          18 :                 break;
      93          18 :             report_invalid_encoding(PG_UTF8, (const char *) src, len);
      94             :         }
      95             :         /* fast path for ASCII-subset characters */
      96        4076 :         if (!IS_HIGHBIT_SET(c))
      97             :         {
      98        3678 :             *dest++ = c;
      99        3678 :             src++;
     100        3678 :             len--;
     101             :         }
     102             :         else
     103             :         {
     104         398 :             int         l = pg_utf_mblen(src);
     105             : 
     106         398 :             if (l > len || !pg_utf8_islegal(src, l))
     107             :             {
     108         108 :                 if (noError)
     109          54 :                     break;
     110          54 :                 report_invalid_encoding(PG_UTF8, (const char *) src, len);
     111             :             }
     112         290 :             if (l != 2)
     113             :             {
     114         216 :                 if (noError)
     115         108 :                     break;
     116         108 :                 report_untranslatable_char(PG_UTF8, PG_LATIN1,
     117             :                                            (const char *) src, len);
     118             :             }
     119          74 :             c1 = src[1] & 0x3f;
     120          74 :             c = ((c & 0x1f) << 6) | c1;
     121          74 :             if (c >= 0x80 && c <= 0xff)
     122             :             {
     123          38 :                 *dest++ = (unsigned char) c;
     124          38 :                 src += 2;
     125          38 :                 len -= 2;
     126             :             }
     127             :             else
     128             :             {
     129          36 :                 if (noError)
     130          18 :                     break;
     131          18 :                 report_untranslatable_char(PG_UTF8, PG_LATIN1,
     132             :                                            (const char *) src, len);
     133             :             }
     134             :         }
     135             :     }
     136         564 :     *dest = '\0';
     137             : 
     138         564 :     PG_RETURN_INT32(src - start);
     139             : }

Generated by: LCOV version 1.14