Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * pgstrcasecmp.c 4 : * Portable SQL-like case-independent comparisons and conversions. 5 : * 6 : * SQL99 specifies Unicode-aware case normalization, which we don't yet 7 : * have the infrastructure for. Instead we use tolower() to provide a 8 : * locale-aware translation. However, there are some locales where this 9 : * is not right either (eg, Turkish may do strange things with 'i' and 10 : * 'I'). Our current compromise is to use tolower() for characters with 11 : * the high bit set, and use an ASCII-only downcasing for 7-bit 12 : * characters. 13 : * 14 : * NB: this code should match downcase_truncate_identifier() in scansup.c. 15 : * 16 : * 17 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group 18 : * 19 : * src/port/pgstrcasecmp.c 20 : * 21 : *------------------------------------------------------------------------- 22 : */ 23 : #include "c.h" 24 : 25 : #include <ctype.h> 26 : 27 : 28 : /* 29 : * Case-independent comparison of two null-terminated strings. 30 : */ 31 : int 32 21455506 : pg_strcasecmp(const char *s1, const char *s2) 33 : { 34 : for (;;) 35 8647792 : { 36 30103298 : unsigned char ch1 = (unsigned char) *s1++; 37 30103298 : unsigned char ch2 = (unsigned char) *s2++; 38 : 39 30103298 : if (ch1 != ch2) 40 : { 41 21200922 : if (ch1 >= 'A' && ch1 <= 'Z') 42 7849972 : ch1 += 'a' - 'A'; 43 13350950 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) 44 0 : ch1 = tolower(ch1); 45 : 46 21200922 : if (ch2 >= 'A' && ch2 <= 'Z') 47 4952458 : ch2 += 'a' - 'A'; 48 16248464 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) 49 0 : ch2 = tolower(ch2); 50 : 51 21200922 : if (ch1 != ch2) 52 19427150 : return (int) ch1 - (int) ch2; 53 : } 54 10676148 : if (ch1 == 0) 55 2028356 : break; 56 : } 57 2028356 : return 0; 58 : } 59 : 60 : /* 61 : * Case-independent comparison of two not-necessarily-null-terminated strings. 62 : * At most n bytes will be examined from each string. 63 : */ 64 : int 65 11587102 : pg_strncasecmp(const char *s1, const char *s2, size_t n) 66 : { 67 16698164 : while (n-- > 0) 68 : { 69 12990582 : unsigned char ch1 = (unsigned char) *s1++; 70 12990582 : unsigned char ch2 = (unsigned char) *s2++; 71 : 72 12990582 : if (ch1 != ch2) 73 : { 74 8066296 : if (ch1 >= 'A' && ch1 <= 'Z') 75 5670468 : ch1 += 'a' - 'A'; 76 2395828 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) 77 0 : ch1 = tolower(ch1); 78 : 79 8066296 : if (ch2 >= 'A' && ch2 <= 'Z') 80 387210 : ch2 += 'a' - 'A'; 81 7679086 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) 82 0 : ch2 = tolower(ch2); 83 : 84 8066296 : if (ch1 != ch2) 85 7879520 : return (int) ch1 - (int) ch2; 86 : } 87 5111062 : if (ch1 == 0) 88 0 : break; 89 : } 90 3707582 : return 0; 91 : } 92 : 93 : /* 94 : * Fold a character to upper case. 95 : * 96 : * Unlike some versions of toupper(), this is safe to apply to characters 97 : * that aren't lower case letters. Note however that the whole thing is 98 : * a bit bogus for multibyte character sets. 99 : */ 100 : unsigned char 101 345236 : pg_toupper(unsigned char ch) 102 : { 103 345236 : if (ch >= 'a' && ch <= 'z') 104 205378 : ch += 'A' - 'a'; 105 139858 : else if (IS_HIGHBIT_SET(ch) && islower(ch)) 106 0 : ch = toupper(ch); 107 345236 : return ch; 108 : } 109 : 110 : /* 111 : * Fold a character to lower case. 112 : * 113 : * Unlike some versions of tolower(), this is safe to apply to characters 114 : * that aren't upper case letters. Note however that the whole thing is 115 : * a bit bogus for multibyte character sets. 116 : */ 117 : unsigned char 118 14774384 : pg_tolower(unsigned char ch) 119 : { 120 14774384 : if (ch >= 'A' && ch <= 'Z') 121 9895688 : ch += 'a' - 'A'; 122 4878696 : else if (IS_HIGHBIT_SET(ch) && isupper(ch)) 123 0 : ch = tolower(ch); 124 14774384 : return ch; 125 : }