Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * pgstrcasecmp.c 4 : * Portable SQL-like case-independent comparisons and conversions. 5 : * 6 : * SQL99 specifies Unicode-aware case normalization, which we don't yet 7 : * have the infrastructure for. Instead we use tolower() to provide a 8 : * locale-aware translation. However, there are some locales where this 9 : * is not right either (eg, Turkish may do strange things with 'i' and 10 : * 'I'). Our current compromise is to use tolower() for characters with 11 : * the high bit set, and use an ASCII-only downcasing for 7-bit 12 : * characters. 13 : * 14 : * NB: this code should match downcase_truncate_identifier() in scansup.c. 15 : * 16 : * 17 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group 18 : * 19 : * src/port/pgstrcasecmp.c 20 : * 21 : *------------------------------------------------------------------------- 22 : */ 23 : #include "c.h" 24 : 25 : #include <ctype.h> 26 : 27 : 28 : /* 29 : * Case-independent comparison of two null-terminated strings. 30 : */ 31 : int 32 21744998 : pg_strcasecmp(const char *s1, const char *s2) 33 : { 34 : for (;;) 35 8818026 : { 36 30563024 : unsigned char ch1 = (unsigned char) *s1++; 37 30563024 : unsigned char ch2 = (unsigned char) *s2++; 38 : 39 30563024 : if (ch1 != ch2) 40 : { 41 21485372 : if (ch1 >= 'A' && ch1 <= 'Z') 42 7954084 : ch1 += 'a' - 'A'; 43 13531288 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) 44 0 : ch1 = tolower(ch1); 45 : 46 21485372 : if (ch2 >= 'A' && ch2 <= 'Z') 47 5065112 : ch2 += 'a' - 'A'; 48 16420260 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) 49 0 : ch2 = tolower(ch2); 50 : 51 21485372 : if (ch1 != ch2) 52 19679754 : return (int) ch1 - (int) ch2; 53 : } 54 10883270 : if (ch1 == 0) 55 2065244 : break; 56 : } 57 2065244 : return 0; 58 : } 59 : 60 : /* 61 : * Case-independent comparison of two not-necessarily-null-terminated strings. 62 : * At most n bytes will be examined from each string. 63 : */ 64 : int 65 11832668 : pg_strncasecmp(const char *s1, const char *s2, size_t n) 66 : { 67 17026274 : while (n-- > 0) 68 : { 69 13280118 : unsigned char ch1 = (unsigned char) *s1++; 70 13280118 : unsigned char ch2 = (unsigned char) *s2++; 71 : 72 13280118 : if (ch1 != ch2) 73 : { 74 8275832 : if (ch1 >= 'A' && ch1 <= 'Z') 75 5812822 : ch1 += 'a' - 'A'; 76 2463010 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) 77 0 : ch1 = tolower(ch1); 78 : 79 8275832 : if (ch2 >= 'A' && ch2 <= 'Z') 80 392646 : ch2 += 'a' - 'A'; 81 7883186 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) 82 0 : ch2 = tolower(ch2); 83 : 84 8275832 : if (ch1 != ch2) 85 8086512 : return (int) ch1 - (int) ch2; 86 : } 87 5193606 : if (ch1 == 0) 88 0 : break; 89 : } 90 3746156 : return 0; 91 : } 92 : 93 : /* 94 : * Fold a character to upper case. 95 : * 96 : * Unlike some versions of toupper(), this is safe to apply to characters 97 : * that aren't lower case letters. Note however that the whole thing is 98 : * a bit bogus for multibyte character sets. 99 : */ 100 : unsigned char 101 350654 : pg_toupper(unsigned char ch) 102 : { 103 350654 : if (ch >= 'a' && ch <= 'z') 104 207730 : ch += 'A' - 'a'; 105 142924 : else if (IS_HIGHBIT_SET(ch) && islower(ch)) 106 0 : ch = toupper(ch); 107 350654 : return ch; 108 : } 109 : 110 : /* 111 : * Fold a character to lower case. 112 : * 113 : * Unlike some versions of tolower(), this is safe to apply to characters 114 : * that aren't upper case letters. Note however that the whole thing is 115 : * a bit bogus for multibyte character sets. 116 : */ 117 : unsigned char 118 15055006 : pg_tolower(unsigned char ch) 119 : { 120 15055006 : if (ch >= 'A' && ch <= 'Z') 121 10148274 : ch += 'a' - 'A'; 122 4906732 : else if (IS_HIGHBIT_SET(ch) && isupper(ch)) 123 0 : ch = tolower(ch); 124 15055006 : return ch; 125 : }