LCOV - code coverage report
Current view: top level - src/backend/utils/adt - ascii.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 10 56 17.9 %
Date: 2024-11-21 08:14:44 Functions: 1 6 16.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  * ascii.c
       3             :  *   The PostgreSQL routine for string to ascii conversion.
       4             :  *
       5             :  *   Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
       6             :  *
       7             :  * IDENTIFICATION
       8             :  *    src/backend/utils/adt/ascii.c
       9             :  *
      10             :  *-----------------------------------------------------------------------
      11             :  */
      12             : #include "postgres.h"
      13             : 
      14             : #include "mb/pg_wchar.h"
      15             : #include "utils/ascii.h"
      16             : #include "utils/fmgrprotos.h"
      17             : #include "varatt.h"
      18             : 
      19             : static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
      20             :                         unsigned char *dest, int enc);
      21             : static text *encode_to_ascii(text *data, int enc);
      22             : 
      23             : 
      24             : /* ----------
      25             :  * to_ascii
      26             :  * ----------
      27             :  */
      28             : static void
      29           0 : pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
      30             : {
      31             :     unsigned char *x;
      32             :     const unsigned char *ascii;
      33             :     int         range;
      34             : 
      35             :     /*
      36             :      * relevant start for an encoding
      37             :      */
      38             : #define RANGE_128   128
      39             : #define RANGE_160   160
      40             : 
      41           0 :     if (enc == PG_LATIN1)
      42             :     {
      43             :         /*
      44             :          * ISO-8859-1 <range: 160 -- 255>
      45             :          */
      46           0 :         ascii = (const unsigned char *) "  cL Y  \"Ca  -R     'u .,      ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
      47           0 :         range = RANGE_160;
      48             :     }
      49           0 :     else if (enc == PG_LATIN2)
      50             :     {
      51             :         /*
      52             :          * ISO-8859-2 <range: 160 -- 255>
      53             :          */
      54           0 :         ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
      55           0 :         range = RANGE_160;
      56             :     }
      57           0 :     else if (enc == PG_LATIN9)
      58             :     {
      59             :         /*
      60             :          * ISO-8859-15 <range: 160 -- 255>
      61             :          */
      62           0 :         ascii = (const unsigned char *) "  cL YS sCa  -R     Zu .z   EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
      63           0 :         range = RANGE_160;
      64             :     }
      65           0 :     else if (enc == PG_WIN1250)
      66             :     {
      67             :         /*
      68             :          * Window CP1250 <range: 128 -- 255>
      69             :          */
      70           0 :         ascii = (const unsigned char *) "  ' \"    %S<STZZ `'\"\".--  s>stzz   L A  \"CS  -RZ  ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
      71           0 :         range = RANGE_128;
      72             :     }
      73             :     else
      74             :     {
      75           0 :         ereport(ERROR,
      76             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
      77             :                  errmsg("encoding conversion from %s to ASCII not supported",
      78             :                         pg_encoding_to_char(enc))));
      79             :         return;                 /* keep compiler quiet */
      80             :     }
      81             : 
      82             :     /*
      83             :      * Encode
      84             :      */
      85           0 :     for (x = src; x < src_end; x++)
      86             :     {
      87           0 :         if (*x < 128)
      88           0 :             *dest++ = *x;
      89           0 :         else if (*x < range)
      90           0 :             *dest++ = ' ';      /* bogus 128 to 'range' */
      91             :         else
      92           0 :             *dest++ = ascii[*x - range];
      93             :     }
      94             : }
      95             : 
      96             : /* ----------
      97             :  * encode text
      98             :  *
      99             :  * The text datum is overwritten in-place, therefore this coding method
     100             :  * cannot support conversions that change the string length!
     101             :  * ----------
     102             :  */
     103             : static text *
     104           0 : encode_to_ascii(text *data, int enc)
     105             : {
     106           0 :     pg_to_ascii((unsigned char *) VARDATA(data),    /* src */
     107           0 :                 (unsigned char *) (data) + VARSIZE(data),   /* src end */
     108           0 :                 (unsigned char *) VARDATA(data),    /* dest */
     109             :                 enc);           /* encoding */
     110             : 
     111           0 :     return data;
     112             : }
     113             : 
     114             : /* ----------
     115             :  * convert to ASCII - enc is set as 'name' arg.
     116             :  * ----------
     117             :  */
     118             : Datum
     119           0 : to_ascii_encname(PG_FUNCTION_ARGS)
     120             : {
     121           0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     122           0 :     char       *encname = NameStr(*PG_GETARG_NAME(1));
     123           0 :     int         enc = pg_char_to_encoding(encname);
     124             : 
     125           0 :     if (enc < 0)
     126           0 :         ereport(ERROR,
     127             :                 (errcode(ERRCODE_UNDEFINED_OBJECT),
     128             :                  errmsg("%s is not a valid encoding name", encname)));
     129             : 
     130           0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     131             : }
     132             : 
     133             : /* ----------
     134             :  * convert to ASCII - enc is set as int4
     135             :  * ----------
     136             :  */
     137             : Datum
     138           0 : to_ascii_enc(PG_FUNCTION_ARGS)
     139             : {
     140           0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     141           0 :     int         enc = PG_GETARG_INT32(1);
     142             : 
     143           0 :     if (!PG_VALID_ENCODING(enc))
     144           0 :         ereport(ERROR,
     145             :                 (errcode(ERRCODE_UNDEFINED_OBJECT),
     146             :                  errmsg("%d is not a valid encoding code", enc)));
     147             : 
     148           0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     149             : }
     150             : 
     151             : /* ----------
     152             :  * convert to ASCII - current enc is DatabaseEncoding
     153             :  * ----------
     154             :  */
     155             : Datum
     156           0 : to_ascii_default(PG_FUNCTION_ARGS)
     157             : {
     158           0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     159           0 :     int         enc = GetDatabaseEncoding();
     160             : 
     161           0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     162             : }
     163             : 
     164             : /* ----------
     165             :  * Copy a string in an arbitrary backend-safe encoding, converting it to a
     166             :  * valid ASCII string by replacing non-ASCII bytes with '?'.  Otherwise the
     167             :  * behavior is identical to strlcpy(), except that we don't bother with a
     168             :  * return value.
     169             :  *
     170             :  * This must not trigger ereport(ERROR), as it is called in postmaster.
     171             :  * ----------
     172             :  */
     173             : void
     174       14114 : ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
     175             : {
     176       14114 :     if (destsiz == 0)           /* corner case: no room for trailing nul */
     177           0 :         return;
     178             : 
     179      306306 :     while (--destsiz > 0)
     180             :     {
     181             :         /* use unsigned char here to avoid compiler warning */
     182      306306 :         unsigned char ch = *src++;
     183             : 
     184      306306 :         if (ch == '\0')
     185       14114 :             break;
     186             :         /* Keep printable ASCII characters */
     187      292192 :         if (32 <= ch && ch <= 127)
     188      292192 :             *dest = ch;
     189             :         /* White-space is also OK */
     190           0 :         else if (ch == '\n' || ch == '\r' || ch == '\t')
     191           0 :             *dest = ch;
     192             :         /* Everything else is replaced with '?' */
     193             :         else
     194           0 :             *dest = '?';
     195      292192 :         dest++;
     196             :     }
     197             : 
     198       14114 :     *dest = '\0';
     199             : }

Generated by: LCOV version 1.14