LCOV - PostgreSQL 19devel - src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and

LCOV - code coverage report

Current view:	top level - src/backend/utils/mb/conversion_procs/euc_jp_and_sjis - euc_jp_and_sjis.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 19devel	Lines:	167	359	46.5 %
Date:	2025-07-29 05:16:53	Functions:	19	19	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  *    EUC_JP, SJIS and MULE_INTERNAL
       4             :  *
       5             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       6             :  * Portions Copyright (c) 1994, Regents of the University of California
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : 
      14             : #include "postgres.h"
      15             : #include "fmgr.h"
      16             : #include "mb/pg_wchar.h"
      17             : 
      18             : /*
      19             :  * SJIS alternative code.
      20             :  * this code is used if a mapping EUC -> SJIS is not defined.
      21             :  */
      22             : #define PGSJISALTCODE 0x81ac
      23             : #define PGEUCALTCODE 0xa2ae
      24             : 
      25             : /*
      26             :  * conversion table between SJIS UDC (IBM kanji) and EUC_JP
      27             :  */
      28             : #include "sjis.map"
      29             : 
      30          12 : PG_MODULE_MAGIC_EXT(
      31             :                     .name = "euc_jp_and_sjis",
      32             :                     .version = PG_VERSION
      33             : );
      34             : 
      35           6 : PG_FUNCTION_INFO_V1(euc_jp_to_sjis);
      36           6 : PG_FUNCTION_INFO_V1(sjis_to_euc_jp);
      37           6 : PG_FUNCTION_INFO_V1(euc_jp_to_mic);
      38          12 : PG_FUNCTION_INFO_V1(mic_to_euc_jp);
      39           6 : PG_FUNCTION_INFO_V1(sjis_to_mic);
      40          12 : PG_FUNCTION_INFO_V1(mic_to_sjis);
      41             : 
      42             : /* ----------
      43             :  * conv_proc(
      44             :  *      INTEGER,    -- source encoding id
      45             :  *      INTEGER,    -- destination encoding id
      46             :  *      CSTRING,    -- source string (null terminated C string)
      47             :  *      CSTRING,    -- destination string (null terminated C string)
      48             :  *      INTEGER,    -- source string length
      49             :  *      BOOL        -- if true, don't throw an error if conversion fails
      50             :  * ) returns INTEGER;
      51             :  *
      52             :  * Returns the number of bytes successfully converted.
      53             :  * ----------
      54             :  */
      55             : 
      56             : static int  sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError);
      57             : static int  mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
      58             : static int  euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
      59             : static int  mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
      60             : static int  euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError);
      61             : static int  sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError);
      62             : 
      63             : Datum
      64           6 : euc_jp_to_sjis(PG_FUNCTION_ARGS)
      65             : {
      66           6 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      67           6 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
      68           6 :     int         len = PG_GETARG_INT32(4);
      69           6 :     bool        noError = PG_GETARG_BOOL(5);
      70             :     int         converted;
      71             : 
      72           6 :     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_SJIS);
      73             : 
      74           6 :     converted = euc_jp2sjis(src, dest, len, noError);
      75             : 
      76           6 :     PG_RETURN_INT32(converted);
      77             : }
      78             : 
      79             : Datum
      80           6 : sjis_to_euc_jp(PG_FUNCTION_ARGS)
      81             : {
      82           6 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      83           6 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
      84           6 :     int         len = PG_GETARG_INT32(4);
      85           6 :     bool        noError = PG_GETARG_BOOL(5);
      86             :     int         converted;
      87             : 
      88           6 :     CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_EUC_JP);
      89             : 
      90           6 :     converted = sjis2euc_jp(src, dest, len, noError);
      91             : 
      92           6 :     PG_RETURN_INT32(converted);
      93             : }
      94             : 
      95             : Datum
      96           6 : euc_jp_to_mic(PG_FUNCTION_ARGS)
      97             : {
      98           6 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
      99           6 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     100           6 :     int         len = PG_GETARG_INT32(4);
     101           6 :     bool        noError = PG_GETARG_BOOL(5);
     102             :     int         converted;
     103             : 
     104           6 :     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL);
     105             : 
     106           6 :     converted = euc_jp2mic(src, dest, len, noError);
     107             : 
     108           6 :     PG_RETURN_INT32(converted);
     109             : }
     110             : 
     111             : Datum
     112         330 : mic_to_euc_jp(PG_FUNCTION_ARGS)
     113             : {
     114         330 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     115         330 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     116         330 :     int         len = PG_GETARG_INT32(4);
     117         330 :     bool        noError = PG_GETARG_BOOL(5);
     118             :     int         converted;
     119             : 
     120         330 :     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP);
     121             : 
     122         330 :     converted = mic2euc_jp(src, dest, len, noError);
     123             : 
     124         186 :     PG_RETURN_INT32(converted);
     125             : }
     126             : 
     127             : Datum
     128           6 : sjis_to_mic(PG_FUNCTION_ARGS)
     129             : {
     130           6 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     131           6 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     132           6 :     int         len = PG_GETARG_INT32(4);
     133           6 :     bool        noError = PG_GETARG_BOOL(5);
     134             :     int         converted;
     135             : 
     136           6 :     CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL);
     137             : 
     138           6 :     converted = sjis2mic(src, dest, len, noError);
     139             : 
     140           6 :     PG_RETURN_INT32(converted);
     141             : }
     142             : 
     143             : Datum
     144         330 : mic_to_sjis(PG_FUNCTION_ARGS)
     145             : {
     146         330 :     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     147         330 :     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     148         330 :     int         len = PG_GETARG_INT32(4);
     149         330 :     bool        noError = PG_GETARG_BOOL(5);
     150             :     int         converted;
     151             : 
     152         330 :     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS);
     153             : 
     154         330 :     converted = mic2sjis(src, dest, len, noError);
     155             : 
     156         186 :     PG_RETURN_INT32(converted);
     157             : }
     158             : 
     159             : /*
     160             :  * SJIS ---> MIC
     161             :  */
     162             : static int
     163           6 : sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError)
     164             : {
     165           6 :     const unsigned char *start = sjis;
     166             :     int         c1,
     167             :                 c2,
     168             :                 i,
     169             :                 k,
     170             :                 k2;
     171             : 
     172          24 :     while (len > 0)
     173             :     {
     174          18 :         c1 = *sjis;
     175          18 :         if (c1 >= 0xa1 && c1 <= 0xdf)
     176             :         {
     177             :             /* JIS X0201 (1 byte kana) */
     178           0 :             *p++ = LC_JISX0201K;
     179           0 :             *p++ = c1;
     180           0 :             sjis++;
     181           0 :             len--;
     182             :         }
     183          18 :         else if (IS_HIGHBIT_SET(c1))
     184             :         {
     185             :             /*
     186             :              * JIS X0208, X0212, user defined extended characters
     187             :              */
     188           0 :             if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1]))
     189             :             {
     190           0 :                 if (noError)
     191           0 :                     break;
     192           0 :                 report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
     193             :             }
     194           0 :             c2 = sjis[1];
     195           0 :             k = (c1 << 8) + c2;
     196           0 :             if (k >= 0xed40 && k < 0xf040)
     197             :             {
     198             :                 /* NEC selection IBM kanji */
     199           0 :                 for (i = 0;; i++)
     200             :                 {
     201           0 :                     k2 = ibmkanji[i].nec;
     202           0 :                     if (k2 == 0xffff)
     203           0 :                         break;
     204           0 :                     if (k2 == k)
     205             :                     {
     206           0 :                         k = ibmkanji[i].sjis;
     207           0 :                         c1 = (k >> 8) & 0xff;
     208           0 :                         c2 = k & 0xff;
     209             :                     }
     210             :                 }
     211             :             }
     212             : 
     213           0 :             if (k < 0xeb3f)
     214             :             {
     215             :                 /* JIS X0208 */
     216           0 :                 *p++ = LC_JISX0208;
     217           0 :                 *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
     218           0 :                 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
     219             :             }
     220           0 :             else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
     221             :             {
     222             :                 /* NEC selection IBM kanji - Other undecided justice */
     223           0 :                 *p++ = LC_JISX0208;
     224           0 :                 *p++ = PGEUCALTCODE >> 8;
     225           0 :                 *p++ = PGEUCALTCODE & 0xff;
     226             :             }
     227           0 :             else if (k >= 0xf040 && k < 0xf540)
     228             :             {
     229             :                 /*
     230             :                  * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
     231             :                  * 0x7e7e EUC 0xf5a1 - 0xfefe
     232             :                  */
     233           0 :                 *p++ = LC_JISX0208;
     234           0 :                 c1 -= 0x6f;
     235           0 :                 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
     236           0 :                 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
     237             :             }
     238           0 :             else if (k >= 0xf540 && k < 0xfa40)
     239             :             {
     240             :                 /*
     241             :                  * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
     242             :                  * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
     243             :                  */
     244           0 :                 *p++ = LC_JISX0212;
     245           0 :                 c1 -= 0x74;
     246           0 :                 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
     247           0 :                 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
     248             :             }
     249           0 :             else if (k >= 0xfa40)
     250             :             {
     251             :                 /*
     252             :                  * mapping IBM kanji to X0208 and X0212
     253             :                  */
     254           0 :                 for (i = 0;; i++)
     255             :                 {
     256           0 :                     k2 = ibmkanji[i].sjis;
     257           0 :                     if (k2 == 0xffff)
     258           0 :                         break;
     259           0 :                     if (k2 == k)
     260             :                     {
     261           0 :                         k = ibmkanji[i].euc;
     262           0 :                         if (k >= 0x8f0000)
     263             :                         {
     264           0 :                             *p++ = LC_JISX0212;
     265           0 :                             *p++ = 0x80 | ((k & 0xff00) >> 8);
     266           0 :                             *p++ = 0x80 | (k & 0xff);
     267             :                         }
     268             :                         else
     269             :                         {
     270           0 :                             *p++ = LC_JISX0208;
     271           0 :                             *p++ = 0x80 | (k >> 8);
     272           0 :                             *p++ = 0x80 | (k & 0xff);
     273             :                         }
     274             :                     }
     275             :                 }
     276             :             }
     277           0 :             sjis += 2;
     278           0 :             len -= 2;
     279             :         }
     280             :         else
     281             :         {                       /* should be ASCII */
     282          18 :             if (c1 == 0)
     283             :             {
     284           0 :                 if (noError)
     285           0 :                     break;
     286           0 :                 report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
     287             :             }
     288          18 :             *p++ = c1;
     289          18 :             sjis++;
     290          18 :             len--;
     291             :         }
     292             :     }
     293           6 :     *p = '\0';
     294             : 
     295           6 :     return sjis - start;
     296             : }
     297             : 
     298             : /*
     299             :  * MIC ---> SJIS
     300             :  */
     301             : static int
     302         330 : mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError)
     303             : {
     304         330 :     const unsigned char *start = mic;
     305             :     int         c1,
     306             :                 c2,
     307             :                 k,
     308             :                 l;
     309             : 
     310         672 :     while (len > 0)
     311             :     {
     312         630 :         c1 = *mic;
     313         630 :         if (!IS_HIGHBIT_SET(c1))
     314             :         {
     315             :             /* ASCII */
     316         324 :             if (c1 == 0)
     317             :             {
     318          36 :                 if (noError)
     319          18 :                     break;
     320          18 :                 report_invalid_encoding(PG_MULE_INTERNAL,
     321             :                                         (const char *) mic, len);
     322             :             }
     323         288 :             *p++ = c1;
     324         288 :             mic++;
     325         288 :             len--;
     326         288 :             continue;
     327             :         }
     328         306 :         l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
     329         306 :         if (l < 0)
     330             :         {
     331         144 :             if (noError)
     332          72 :                 break;
     333          72 :             report_invalid_encoding(PG_MULE_INTERNAL,
     334             :                                     (const char *) mic, len);
     335             :         }
     336         162 :         if (c1 == LC_JISX0201K)
     337           0 :             *p++ = mic[1];
     338         162 :         else if (c1 == LC_JISX0208)
     339             :         {
     340          54 :             c1 = mic[1];
     341          54 :             c2 = mic[2];
     342          54 :             k = (c1 << 8) | (c2 & 0xff);
     343          54 :             if (k >= 0xf5a1)
     344             :             {
     345             :                 /* UDC1 */
     346           0 :                 c1 -= 0x54;
     347           0 :                 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
     348             :             }
     349             :             else
     350          54 :                 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
     351          54 :             *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
     352             :         }
     353         108 :         else if (c1 == LC_JISX0212)
     354             :         {
     355             :             int         i,
     356             :                         k2;
     357             : 
     358           0 :             c1 = mic[1];
     359           0 :             c2 = mic[2];
     360           0 :             k = c1 << 8 | c2;
     361           0 :             if (k >= 0xf5a1)
     362             :             {
     363             :                 /* UDC2 */
     364           0 :                 c1 -= 0x54;
     365           0 :                 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
     366           0 :                 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
     367             :             }
     368             :             else
     369             :             {
     370             :                 /* IBM kanji */
     371           0 :                 for (i = 0;; i++)
     372             :                 {
     373           0 :                     k2 = ibmkanji[i].euc & 0xffff;
     374           0 :                     if (k2 == 0xffff)
     375             :                     {
     376           0 :                         *p++ = PGSJISALTCODE >> 8;
     377           0 :                         *p++ = PGSJISALTCODE & 0xff;
     378           0 :                         break;
     379             :                     }
     380           0 :                     if (k2 == k)
     381             :                     {
     382           0 :                         k = ibmkanji[i].sjis;
     383           0 :                         *p++ = k >> 8;
     384           0 :                         *p++ = k & 0xff;
     385           0 :                         break;
     386             :                     }
     387             :                 }
     388             :             }
     389             :         }
     390             :         else
     391             :         {
     392         108 :             if (noError)
     393          54 :                 break;
     394          54 :             report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS,
     395             :                                        (const char *) mic, len);
     396             :         }
     397          54 :         mic += l;
     398          54 :         len -= l;
     399             :     }
     400         186 :     *p = '\0';
     401             : 
     402         186 :     return mic - start;
     403             : }
     404             : 
     405             : /*
     406             :  * EUC_JP ---> MIC
     407             :  */
     408             : static int
     409           6 : euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
     410             : {
     411           6 :     const unsigned char *start = euc;
     412             :     int         c1;
     413             :     int         l;
     414             : 
     415          24 :     while (len > 0)
     416             :     {
     417          18 :         c1 = *euc;
     418          18 :         if (!IS_HIGHBIT_SET(c1))
     419             :         {
     420             :             /* ASCII */
     421          18 :             if (c1 == 0)
     422             :             {
     423           0 :                 if (noError)
     424           0 :                     break;
     425           0 :                 report_invalid_encoding(PG_EUC_JP,
     426             :                                         (const char *) euc, len);
     427             :             }
     428          18 :             *p++ = c1;
     429          18 :             euc++;
     430          18 :             len--;
     431          18 :             continue;
     432             :         }
     433           0 :         l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
     434           0 :         if (l < 0)
     435             :         {
     436           0 :             if (noError)
     437           0 :                 break;
     438           0 :             report_invalid_encoding(PG_EUC_JP,
     439             :                                     (const char *) euc, len);
     440             :         }
     441           0 :         if (c1 == SS2)
     442             :         {                       /* 1 byte kana? */
     443           0 :             *p++ = LC_JISX0201K;
     444           0 :             *p++ = euc[1];
     445             :         }
     446           0 :         else if (c1 == SS3)
     447             :         {                       /* JIS X0212 kanji? */
     448           0 :             *p++ = LC_JISX0212;
     449           0 :             *p++ = euc[1];
     450           0 :             *p++ = euc[2];
     451             :         }
     452             :         else
     453             :         {                       /* kanji? */
     454           0 :             *p++ = LC_JISX0208;
     455           0 :             *p++ = c1;
     456           0 :             *p++ = euc[1];
     457             :         }
     458           0 :         euc += l;
     459           0 :         len -= l;
     460             :     }
     461           6 :     *p = '\0';
     462             : 
     463           6 :     return euc - start;
     464             : }
     465             : 
     466             : /*
     467             :  * MIC ---> EUC_JP
     468             :  */
     469             : static int
     470         330 : mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError)
     471             : {
     472         330 :     const unsigned char *start = mic;
     473             :     int         c1;
     474             :     int         l;
     475             : 
     476         672 :     while (len > 0)
     477             :     {
     478         630 :         c1 = *mic;
     479         630 :         if (!IS_HIGHBIT_SET(c1))
     480             :         {
     481             :             /* ASCII */
     482         324 :             if (c1 == 0)
     483             :             {
     484          36 :                 if (noError)
     485          18 :                     break;
     486          18 :                 report_invalid_encoding(PG_MULE_INTERNAL,
     487             :                                         (const char *) mic, len);
     488             :             }
     489         288 :             *p++ = c1;
     490         288 :             mic++;
     491         288 :             len--;
     492         288 :             continue;
     493             :         }
     494         306 :         l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
     495         306 :         if (l < 0)
     496             :         {
     497         144 :             if (noError)
     498          72 :                 break;
     499          72 :             report_invalid_encoding(PG_MULE_INTERNAL,
     500             :                                     (const char *) mic, len);
     501             :         }
     502         162 :         if (c1 == LC_JISX0201K)
     503             :         {
     504           0 :             *p++ = SS2;
     505           0 :             *p++ = mic[1];
     506             :         }
     507         162 :         else if (c1 == LC_JISX0212)
     508             :         {
     509           0 :             *p++ = SS3;
     510           0 :             *p++ = mic[1];
     511           0 :             *p++ = mic[2];
     512             :         }
     513         162 :         else if (c1 == LC_JISX0208)
     514             :         {
     515          54 :             *p++ = mic[1];
     516          54 :             *p++ = mic[2];
     517             :         }
     518             :         else
     519             :         {
     520         108 :             if (noError)
     521          54 :                 break;
     522          54 :             report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP,
     523             :                                        (const char *) mic, len);
     524             :         }
     525          54 :         mic += l;
     526          54 :         len -= l;
     527             :     }
     528         186 :     *p = '\0';
     529             : 
     530         186 :     return mic - start;
     531             : }
     532             : 
     533             : /*
     534             :  * EUC_JP -> SJIS
     535             :  */
     536             : static int
     537           6 : euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError)
     538             : {
     539           6 :     const unsigned char *start = euc;
     540             :     int         c1,
     541             :                 c2,
     542             :                 k;
     543             :     int         l;
     544             : 
     545          24 :     while (len > 0)
     546             :     {
     547          18 :         c1 = *euc;
     548          18 :         if (!IS_HIGHBIT_SET(c1))
     549             :         {
     550             :             /* ASCII */
     551          18 :             if (c1 == 0)
     552             :             {
     553           0 :                 if (noError)
     554           0 :                     break;
     555           0 :                 report_invalid_encoding(PG_EUC_JP,
     556             :                                         (const char *) euc, len);
     557             :             }
     558          18 :             *p++ = c1;
     559          18 :             euc++;
     560          18 :             len--;
     561          18 :             continue;
     562             :         }
     563           0 :         l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
     564           0 :         if (l < 0)
     565             :         {
     566           0 :             if (noError)
     567           0 :                 break;
     568           0 :             report_invalid_encoding(PG_EUC_JP,
     569             :                                     (const char *) euc, len);
     570             :         }
     571           0 :         if (c1 == SS2)
     572             :         {
     573             :             /* hankaku kana? */
     574           0 :             *p++ = euc[1];
     575             :         }
     576           0 :         else if (c1 == SS3)
     577             :         {
     578             :             /* JIS X0212 kanji? */
     579           0 :             c1 = euc[1];
     580           0 :             c2 = euc[2];
     581           0 :             k = c1 << 8 | c2;
     582           0 :             if (k >= 0xf5a1)
     583             :             {
     584             :                 /* UDC2 */
     585           0 :                 c1 -= 0x54;
     586           0 :                 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
     587           0 :                 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
     588             :             }
     589             :             else
     590             :             {
     591             :                 int         i,
     592             :                             k2;
     593             : 
     594             :                 /* IBM kanji */
     595           0 :                 for (i = 0;; i++)
     596             :                 {
     597           0 :                     k2 = ibmkanji[i].euc & 0xffff;
     598           0 :                     if (k2 == 0xffff)
     599             :                     {
     600           0 :                         *p++ = PGSJISALTCODE >> 8;
     601           0 :                         *p++ = PGSJISALTCODE & 0xff;
     602           0 :                         break;
     603             :                     }
     604           0 :                     if (k2 == k)
     605             :                     {
     606           0 :                         k = ibmkanji[i].sjis;
     607           0 :                         *p++ = k >> 8;
     608           0 :                         *p++ = k & 0xff;
     609           0 :                         break;
     610             :                     }
     611             :                 }
     612             :             }
     613             :         }
     614             :         else
     615             :         {
     616             :             /* JIS X0208 kanji? */
     617           0 :             c2 = euc[1];
     618           0 :             k = (c1 << 8) | (c2 & 0xff);
     619           0 :             if (k >= 0xf5a1)
     620             :             {
     621             :                 /* UDC1 */
     622           0 :                 c1 -= 0x54;
     623           0 :                 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
     624             :             }
     625             :             else
     626           0 :                 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
     627           0 :             *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
     628             :         }
     629           0 :         euc += l;
     630           0 :         len -= l;
     631             :     }
     632           6 :     *p = '\0';
     633             : 
     634           6 :     return euc - start;
     635             : }
     636             : 
     637             : /*
     638             :  * SJIS ---> EUC_JP
     639             :  */
     640             : static int
     641           6 : sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError)
     642             : {
     643           6 :     const unsigned char *start = sjis;
     644             :     int         c1,
     645             :                 c2,
     646             :                 i,
     647             :                 k,
     648             :                 k2;
     649             :     int         l;
     650             : 
     651          24 :     while (len > 0)
     652             :     {
     653          18 :         c1 = *sjis;
     654          18 :         if (!IS_HIGHBIT_SET(c1))
     655             :         {
     656             :             /* ASCII */
     657          18 :             if (c1 == 0)
     658             :             {
     659           0 :                 if (noError)
     660           0 :                     break;
     661           0 :                 report_invalid_encoding(PG_SJIS,
     662             :                                         (const char *) sjis, len);
     663             :             }
     664          18 :             *p++ = c1;
     665          18 :             sjis++;
     666          18 :             len--;
     667          18 :             continue;
     668             :         }
     669           0 :         l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
     670           0 :         if (l < 0)
     671             :         {
     672           0 :             if (noError)
     673           0 :                 break;
     674           0 :             report_invalid_encoding(PG_SJIS,
     675             :                                     (const char *) sjis, len);
     676             :         }
     677           0 :         if (c1 >= 0xa1 && c1 <= 0xdf)
     678             :         {
     679             :             /* JIS X0201 (1 byte kana) */
     680           0 :             *p++ = SS2;
     681           0 :             *p++ = c1;
     682             :         }
     683             :         else
     684             :         {
     685             :             /*
     686             :              * JIS X0208, X0212, user defined extended characters
     687             :              */
     688           0 :             c2 = sjis[1];
     689           0 :             k = (c1 << 8) + c2;
     690           0 :             if (k >= 0xed40 && k < 0xf040)
     691             :             {
     692             :                 /* NEC selection IBM kanji */
     693           0 :                 for (i = 0;; i++)
     694             :                 {
     695           0 :                     k2 = ibmkanji[i].nec;
     696           0 :                     if (k2 == 0xffff)
     697           0 :                         break;
     698           0 :                     if (k2 == k)
     699             :                     {
     700           0 :                         k = ibmkanji[i].sjis;
     701           0 :                         c1 = (k >> 8) & 0xff;
     702           0 :                         c2 = k & 0xff;
     703             :                     }
     704             :                 }
     705             :             }
     706             : 
     707           0 :             if (k < 0xeb3f)
     708             :             {
     709             :                 /* JIS X0208 */
     710           0 :                 *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
     711           0 :                 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
     712             :             }
     713           0 :             else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
     714             :             {
     715             :                 /* NEC selection IBM kanji - Other undecided justice */
     716           0 :                 *p++ = PGEUCALTCODE >> 8;
     717           0 :                 *p++ = PGEUCALTCODE & 0xff;
     718             :             }
     719           0 :             else if (k >= 0xf040 && k < 0xf540)
     720             :             {
     721             :                 /*
     722             :                  * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
     723             :                  * 0x7e7e EUC 0xf5a1 - 0xfefe
     724             :                  */
     725           0 :                 c1 -= 0x6f;
     726           0 :                 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
     727           0 :                 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
     728             :             }
     729           0 :             else if (k >= 0xf540 && k < 0xfa40)
     730             :             {
     731             :                 /*
     732             :                  * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
     733             :                  * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
     734             :                  */
     735           0 :                 *p++ = SS3;
     736           0 :                 c1 -= 0x74;
     737           0 :                 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
     738           0 :                 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
     739             :             }
     740           0 :             else if (k >= 0xfa40)
     741             :             {
     742             :                 /*
     743             :                  * mapping IBM kanji to X0208 and X0212
     744             :                  *
     745             :                  */
     746           0 :                 for (i = 0;; i++)
     747             :                 {
     748           0 :                     k2 = ibmkanji[i].sjis;
     749           0 :                     if (k2 == 0xffff)
     750           0 :                         break;
     751           0 :                     if (k2 == k)
     752             :                     {
     753           0 :                         k = ibmkanji[i].euc;
     754           0 :                         if (k >= 0x8f0000)
     755             :                         {
     756           0 :                             *p++ = SS3;
     757           0 :                             *p++ = 0x80 | ((k & 0xff00) >> 8);
     758           0 :                             *p++ = 0x80 | (k & 0xff);
     759             :                         }
     760             :                         else
     761             :                         {
     762           0 :                             *p++ = 0x80 | (k >> 8);
     763           0 :                             *p++ = 0x80 | (k & 0xff);
     764             :                         }
     765             :                     }
     766             :                 }
     767             :             }
     768             :         }
     769           0 :         sjis += l;
     770           0 :         len -= l;
     771             :     }
     772           6 :     *p = '\0';
     773             : 
     774           6 :     return sjis - start;
     775             : }

Generated by: LCOV version 1.16