LCOV - code coverage report
Current view: top level - src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1 - utf8_and_iso8859_1.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 95.1 % 61 58
Test Date: 2026-02-28 13:14:45 Functions: 100.0 % 5 5
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  *    ISO8859_1 <--> UTF8
       4              :  *
       5              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       6              :  * Portions Copyright (c) 1994, Regents of the University of California
       7              :  *
       8              :  * IDENTIFICATION
       9              :  *    src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
      10              :  *
      11              :  *-------------------------------------------------------------------------
      12              :  */
      13              : 
      14              : #include "postgres.h"
      15              : #include "fmgr.h"
      16              : #include "mb/pg_wchar.h"
      17              : 
      18          111 : PG_MODULE_MAGIC_EXT(
      19              :                     .name = "utf8_and_iso8859_1",
      20              :                     .version = PG_VERSION
      21              : );
      22              : 
      23           24 : PG_FUNCTION_INFO_V1(iso8859_1_to_utf8);
      24          100 : PG_FUNCTION_INFO_V1(utf8_to_iso8859_1);
      25              : 
      26              : /* ----------
      27              :  * conv_proc(
      28              :  *      INTEGER,    -- source encoding id
      29              :  *      INTEGER,    -- destination encoding id
      30              :  *      CSTRING,    -- source string (null terminated C string)
      31              :  *      CSTRING,    -- destination string (null terminated C string)
      32              :  *      INTEGER,    -- source string length
      33              :  *      BOOL        -- if true, don't throw an error if conversion fails
      34              :  * ) returns INTEGER;
      35              :  *
      36              :  * Returns the number of bytes successfully converted.
      37              :  * ----------
      38              :  */
      39              : 
      40              : Datum
      41           65 : iso8859_1_to_utf8(PG_FUNCTION_ARGS)
      42              : {
      43           65 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      44           65 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
      45           65 :     int         len = PG_GETARG_INT32(4);
      46           65 :     bool        noError = PG_GETARG_BOOL(5);
      47           65 :     unsigned char *start = src;
      48              :     unsigned short c;
      49              : 
      50           65 :     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
      51              : 
      52         2937 :     while (len > 0)
      53              :     {
      54         2872 :         c = *src;
      55         2872 :         if (c == 0)
      56              :         {
      57            0 :             if (noError)
      58            0 :                 break;
      59            0 :             report_invalid_encoding(PG_LATIN1, (const char *) src, len);
      60              :         }
      61         2872 :         if (!IS_HIGHBIT_SET(c))
      62         2854 :             *dest++ = c;
      63              :         else
      64              :         {
      65           18 :             *dest++ = (c >> 6) | 0xc0;
      66           18 :             *dest++ = (c & 0x003f) | HIGHBIT;
      67              :         }
      68         2872 :         src++;
      69         2872 :         len--;
      70              :     }
      71           65 :     *dest = '\0';
      72              : 
      73           65 :     PG_RETURN_INT32(src - start);
      74              : }
      75              : 
      76              : Datum
      77          383 : utf8_to_iso8859_1(PG_FUNCTION_ARGS)
      78              : {
      79          383 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      80          383 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
      81          383 :     int         len = PG_GETARG_INT32(4);
      82          383 :     bool        noError = PG_GETARG_BOOL(5);
      83          383 :     unsigned char *start = src;
      84              :     unsigned short c,
      85              :                 c1;
      86              : 
      87          383 :     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_LATIN1);
      88              : 
      89         2252 :     while (len > 0)
      90              :     {
      91         2067 :         c = *src;
      92         2067 :         if (c == 0)
      93              :         {
      94           18 :             if (noError)
      95            9 :                 break;
      96            9 :             report_invalid_encoding(PG_UTF8, (const char *) src, len);
      97              :         }
      98              :         /* fast path for ASCII-subset characters */
      99         2049 :         if (!IS_HIGHBIT_SET(c))
     100              :         {
     101         1850 :             *dest++ = c;
     102         1850 :             src++;
     103         1850 :             len--;
     104              :         }
     105              :         else
     106              :         {
     107          199 :             int         l = pg_utf_mblen(src);
     108              : 
     109          199 :             if (l > len || !pg_utf8_islegal(src, l))
     110              :             {
     111           54 :                 if (noError)
     112           27 :                     break;
     113           27 :                 report_invalid_encoding(PG_UTF8, (const char *) src, len);
     114              :             }
     115          145 :             if (l != 2)
     116              :             {
     117          108 :                 if (noError)
     118           54 :                     break;
     119           54 :                 report_untranslatable_char(PG_UTF8, PG_LATIN1,
     120              :                                            (const char *) src, len);
     121              :             }
     122           37 :             c1 = src[1] & 0x3f;
     123           37 :             c = ((c & 0x1f) << 6) | c1;
     124           37 :             if (c >= 0x80 && c <= 0xff)
     125              :             {
     126           19 :                 *dest++ = (unsigned char) c;
     127           19 :                 src += 2;
     128           19 :                 len -= 2;
     129              :             }
     130              :             else
     131              :             {
     132           18 :                 if (noError)
     133            9 :                     break;
     134            9 :                 report_untranslatable_char(PG_UTF8, PG_LATIN1,
     135              :                                            (const char *) src, len);
     136              :             }
     137              :         }
     138              :     }
     139          284 :     *dest = '\0';
     140              : 
     141          284 :     PG_RETURN_INT32(src - start);
     142              : }
        

Generated by: LCOV version 2.0-1