LCOV - code coverage report
Current view: top level - src/backend/utils/adt - ascii.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 17.9 % 56 10
Test Date: 2026-03-02 03:14:39 Functions: 16.7 % 6 1
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-----------------------------------------------------------------------
       2              :  * ascii.c
       3              :  *   The PostgreSQL routine for string to ascii conversion.
       4              :  *
       5              :  *   Portions Copyright (c) 1999-2026, PostgreSQL Global Development Group
       6              :  *
       7              :  * IDENTIFICATION
       8              :  *    src/backend/utils/adt/ascii.c
       9              :  *
      10              :  *-----------------------------------------------------------------------
      11              :  */
      12              : #include "postgres.h"
      13              : 
      14              : #include "mb/pg_wchar.h"
      15              : #include "utils/ascii.h"
      16              : #include "utils/fmgrprotos.h"
      17              : #include "varatt.h"
      18              : 
      19              : static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
      20              :                         unsigned char *dest, int enc);
      21              : static text *encode_to_ascii(text *data, int enc);
      22              : 
      23              : 
      24              : /* ----------
      25              :  * to_ascii
      26              :  * ----------
      27              :  */
      28              : static void
      29            0 : pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
      30              : {
      31              :     unsigned char *x;
      32              :     const unsigned char *ascii;
      33              :     int         range;
      34              : 
      35              :     /*
      36              :      * relevant start for an encoding
      37              :      */
      38              : #define RANGE_128   128
      39              : #define RANGE_160   160
      40              : 
      41            0 :     if (enc == PG_LATIN1)
      42              :     {
      43              :         /*
      44              :          * ISO-8859-1 <range: 160 -- 255>
      45              :          */
      46            0 :         ascii = (const unsigned char *) "  cL Y  \"Ca  -R     'u .,      ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
      47            0 :         range = RANGE_160;
      48              :     }
      49            0 :     else if (enc == PG_LATIN2)
      50              :     {
      51              :         /*
      52              :          * ISO-8859-2 <range: 160 -- 255>
      53              :          */
      54            0 :         ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
      55            0 :         range = RANGE_160;
      56              :     }
      57            0 :     else if (enc == PG_LATIN9)
      58              :     {
      59              :         /*
      60              :          * ISO-8859-15 <range: 160 -- 255>
      61              :          */
      62            0 :         ascii = (const unsigned char *) "  cL YS sCa  -R     Zu .z   EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
      63            0 :         range = RANGE_160;
      64              :     }
      65            0 :     else if (enc == PG_WIN1250)
      66              :     {
      67              :         /*
      68              :          * Window CP1250 <range: 128 -- 255>
      69              :          */
      70            0 :         ascii = (const unsigned char *) "  ' \"    %S<STZZ `'\"\".--  s>stzz   L A  \"CS  -RZ  ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
      71            0 :         range = RANGE_128;
      72              :     }
      73              :     else
      74              :     {
      75            0 :         ereport(ERROR,
      76              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
      77              :                  errmsg("encoding conversion from %s to ASCII not supported",
      78              :                         pg_encoding_to_char(enc))));
      79              :         return;                 /* keep compiler quiet */
      80              :     }
      81              : 
      82              :     /*
      83              :      * Encode
      84              :      */
      85            0 :     for (x = src; x < src_end; x++)
      86              :     {
      87            0 :         if (*x < 128)
      88            0 :             *dest++ = *x;
      89            0 :         else if (*x < range)
      90            0 :             *dest++ = ' ';      /* bogus 128 to 'range' */
      91              :         else
      92            0 :             *dest++ = ascii[*x - range];
      93              :     }
      94              : }
      95              : 
      96              : /* ----------
      97              :  * encode text
      98              :  *
      99              :  * The text datum is overwritten in-place, therefore this coding method
     100              :  * cannot support conversions that change the string length!
     101              :  * ----------
     102              :  */
     103              : static text *
     104            0 : encode_to_ascii(text *data, int enc)
     105              : {
     106            0 :     pg_to_ascii((unsigned char *) VARDATA(data),    /* src */
     107            0 :                 (unsigned char *) (data) + VARSIZE(data),   /* src end */
     108            0 :                 (unsigned char *) VARDATA(data),    /* dest */
     109              :                 enc);           /* encoding */
     110              : 
     111            0 :     return data;
     112              : }
     113              : 
     114              : /* ----------
     115              :  * convert to ASCII - enc is set as 'name' arg.
     116              :  * ----------
     117              :  */
     118              : Datum
     119            0 : to_ascii_encname(PG_FUNCTION_ARGS)
     120              : {
     121            0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     122            0 :     char       *encname = NameStr(*PG_GETARG_NAME(1));
     123            0 :     int         enc = pg_char_to_encoding(encname);
     124              : 
     125            0 :     if (enc < 0)
     126            0 :         ereport(ERROR,
     127              :                 (errcode(ERRCODE_UNDEFINED_OBJECT),
     128              :                  errmsg("%s is not a valid encoding name", encname)));
     129              : 
     130            0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     131              : }
     132              : 
     133              : /* ----------
     134              :  * convert to ASCII - enc is set as int4
     135              :  * ----------
     136              :  */
     137              : Datum
     138            0 : to_ascii_enc(PG_FUNCTION_ARGS)
     139              : {
     140            0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     141            0 :     int         enc = PG_GETARG_INT32(1);
     142              : 
     143            0 :     if (!PG_VALID_ENCODING(enc))
     144            0 :         ereport(ERROR,
     145              :                 (errcode(ERRCODE_UNDEFINED_OBJECT),
     146              :                  errmsg("%d is not a valid encoding code", enc)));
     147              : 
     148            0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     149              : }
     150              : 
     151              : /* ----------
     152              :  * convert to ASCII - current enc is DatabaseEncoding
     153              :  * ----------
     154              :  */
     155              : Datum
     156            0 : to_ascii_default(PG_FUNCTION_ARGS)
     157              : {
     158            0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     159            0 :     int         enc = GetDatabaseEncoding();
     160              : 
     161            0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     162              : }
     163              : 
     164              : /* ----------
     165              :  * Copy a string in an arbitrary backend-safe encoding, converting it to a
     166              :  * valid ASCII string by replacing non-ASCII bytes with '?'.  Otherwise the
     167              :  * behavior is identical to strlcpy(), except that we don't bother with a
     168              :  * return value.
     169              :  *
     170              :  * This must not trigger ereport(ERROR), as it is called in postmaster.
     171              :  * ----------
     172              :  */
     173              : void
     174         8221 : ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
     175              : {
     176         8221 :     if (destsiz == 0)           /* corner case: no room for trailing nul */
     177            0 :         return;
     178              : 
     179       182101 :     while (--destsiz > 0)
     180              :     {
     181              :         /* use unsigned char here to avoid compiler warning */
     182       182101 :         unsigned char ch = *src++;
     183              : 
     184       182101 :         if (ch == '\0')
     185         8221 :             break;
     186              :         /* Keep printable ASCII characters */
     187       173880 :         if (32 <= ch && ch <= 127)
     188       173880 :             *dest = ch;
     189              :         /* White-space is also OK */
     190            0 :         else if (ch == '\n' || ch == '\r' || ch == '\t')
     191            0 :             *dest = ch;
     192              :         /* Everything else is replaced with '?' */
     193              :         else
     194            0 :             *dest = '?';
     195       173880 :         dest++;
     196              :     }
     197              : 
     198         8221 :     *dest = '\0';
     199              : }
        

Generated by: LCOV version 2.0-1