Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * pgstrcasecmp.c 4 : * Portable SQL-like case-independent comparisons and conversions. 5 : * 6 : * SQL99 specifies Unicode-aware case normalization, which we don't yet 7 : * have the infrastructure for. Instead we use tolower() to provide a 8 : * locale-aware translation. However, there are some locales where this 9 : * is not right either (eg, Turkish may do strange things with 'i' and 10 : * 'I'). Our current compromise is to use tolower() for characters with 11 : * the high bit set, and use an ASCII-only downcasing for 7-bit 12 : * characters. 13 : * 14 : * NB: this code should match downcase_truncate_identifier() in scansup.c. 15 : * 16 : * We also provide strict ASCII-only case conversion functions, which can 17 : * be used to implement C/POSIX case folding semantics no matter what the 18 : * C library thinks the locale is. 19 : * 20 : * 21 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group 22 : * 23 : * src/port/pgstrcasecmp.c 24 : * 25 : *------------------------------------------------------------------------- 26 : */ 27 : #include "c.h" 28 : 29 : #include <ctype.h> 30 : 31 : 32 : /* 33 : * Case-independent comparison of two null-terminated strings. 34 : */ 35 : int 36 26240704 : pg_strcasecmp(const char *s1, const char *s2) 37 : { 38 : for (;;) 39 7598536 : { 40 26240704 : unsigned char ch1 = (unsigned char) *s1++; 41 26240704 : unsigned char ch2 = (unsigned char) *s2++; 42 : 43 26240704 : if (ch1 != ch2) 44 : { 45 18440930 : if (ch1 >= 'A' && ch1 <= 'Z') 46 6821472 : ch1 += 'a' - 'A'; 47 11619458 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) 48 0 : ch1 = tolower(ch1); 49 : 50 18440930 : if (ch2 >= 'A' && ch2 <= 'Z') 51 4155096 : ch2 += 'a' - 'A'; 52 14285834 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) 53 0 : ch2 = tolower(ch2); 54 : 55 18440930 : if (ch1 != ch2) 56 16821510 : return (int) ch1 - (int) ch2; 57 : } 58 9419194 : if (ch1 == 0) 59 1820658 : break; 60 : } 61 1820658 : return 0; 62 : } 63 : 64 : /* 65 : * Case-independent comparison of two not-necessarily-null-terminated strings. 66 : * At most n bytes will be examined from each string. 67 : */ 68 : int 69 10272032 : pg_strncasecmp(const char *s1, const char *s2, size_t n) 70 : { 71 14832656 : while (n-- > 0) 72 : { 73 11518430 : unsigned char ch1 = (unsigned char) *s1++; 74 11518430 : unsigned char ch2 = (unsigned char) *s2++; 75 : 76 11518430 : if (ch1 != ch2) 77 : { 78 7132506 : if (ch1 >= 'A' && ch1 <= 'Z') 79 5010180 : ch1 += 'a' - 'A'; 80 2122326 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) 81 0 : ch1 = tolower(ch1); 82 : 83 7132506 : if (ch2 >= 'A' && ch2 <= 'Z') 84 358740 : ch2 += 'a' - 'A'; 85 6773766 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) 86 0 : ch2 = tolower(ch2); 87 : 88 7132506 : if (ch1 != ch2) 89 6957806 : return (int) ch1 - (int) ch2; 90 : } 91 4560624 : if (ch1 == 0) 92 0 : break; 93 : } 94 3314226 : return 0; 95 : } 96 : 97 : /* 98 : * Fold a character to upper case. 99 : * 100 : * Unlike some versions of toupper(), this is safe to apply to characters 101 : * that aren't lower case letters. Note however that the whole thing is 102 : * a bit bogus for multibyte character sets. 103 : */ 104 : unsigned char 105 275720 : pg_toupper(unsigned char ch) 106 : { 107 275720 : if (ch >= 'a' && ch <= 'z') 108 154150 : ch += 'A' - 'a'; 109 121570 : else if (IS_HIGHBIT_SET(ch) && islower(ch)) 110 0 : ch = toupper(ch); 111 275720 : return ch; 112 : } 113 : 114 : /* 115 : * Fold a character to lower case. 116 : * 117 : * Unlike some versions of tolower(), this is safe to apply to characters 118 : * that aren't upper case letters. Note however that the whole thing is 119 : * a bit bogus for multibyte character sets. 120 : */ 121 : unsigned char 122 11920448 : pg_tolower(unsigned char ch) 123 : { 124 11920448 : if (ch >= 'A' && ch <= 'Z') 125 8740632 : ch += 'a' - 'A'; 126 3179816 : else if (IS_HIGHBIT_SET(ch) && isupper(ch)) 127 0 : ch = tolower(ch); 128 11920448 : return ch; 129 : } 130 : 131 : /* 132 : * Fold a character to upper case, following C/POSIX locale rules. 133 : */ 134 : unsigned char 135 152620 : pg_ascii_toupper(unsigned char ch) 136 : { 137 152620 : if (ch >= 'a' && ch <= 'z') 138 114122 : ch += 'A' - 'a'; 139 152620 : return ch; 140 : } 141 : 142 : /* 143 : * Fold a character to lower case, following C/POSIX locale rules. 144 : */ 145 : unsigned char 146 344960 : pg_ascii_tolower(unsigned char ch) 147 : { 148 344960 : if (ch >= 'A' && ch <= 'Z') 149 6660 : ch += 'a' - 'A'; 150 344960 : return ch; 151 : }