Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * pgstrcasecmp.c 4 : * Portable SQL-like case-independent comparisons and conversions. 5 : * 6 : * SQL99 specifies Unicode-aware case normalization, which we don't yet 7 : * have the infrastructure for. Instead we use tolower() to provide a 8 : * locale-aware translation. However, there are some locales where this 9 : * is not right either (eg, Turkish may do strange things with 'i' and 10 : * 'I'). Our current compromise is to use tolower() for characters with 11 : * the high bit set, and use an ASCII-only downcasing for 7-bit 12 : * characters. 13 : * 14 : * NB: this code should match downcase_truncate_identifier() in scansup.c. 15 : * 16 : * 17 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group 18 : * 19 : * src/port/pgstrcasecmp.c 20 : * 21 : *------------------------------------------------------------------------- 22 : */ 23 : #include "c.h" 24 : 25 : #include <ctype.h> 26 : 27 : 28 : /* 29 : * Case-independent comparison of two null-terminated strings. 30 : */ 31 : int 32 20745714 : pg_strcasecmp(const char *s1, const char *s2) 33 : { 34 : for (;;) 35 8587752 : { 36 29333466 : unsigned char ch1 = (unsigned char) *s1++; 37 29333466 : unsigned char ch2 = (unsigned char) *s2++; 38 : 39 29333466 : if (ch1 != ch2) 40 : { 41 20457426 : if (ch1 >= 'A' && ch1 <= 'Z') 42 7454628 : ch1 += 'a' - 'A'; 43 13002798 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) 44 0 : ch1 = tolower(ch1); 45 : 46 20457426 : if (ch2 >= 'A' && ch2 <= 'Z') 47 4940690 : ch2 += 'a' - 'A'; 48 15516736 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) 49 0 : ch2 = tolower(ch2); 50 : 51 20457426 : if (ch1 != ch2) 52 18722232 : return (int) ch1 - (int) ch2; 53 : } 54 10611234 : if (ch1 == 0) 55 2023482 : break; 56 : } 57 2023482 : return 0; 58 : } 59 : 60 : /* 61 : * Case-independent comparison of two not-necessarily-null-terminated strings. 62 : * At most n bytes will be examined from each string. 63 : */ 64 : int 65 11504572 : pg_strncasecmp(const char *s1, const char *s2, size_t n) 66 : { 67 16575416 : while (n-- > 0) 68 : { 69 12900804 : unsigned char ch1 = (unsigned char) *s1++; 70 12900804 : unsigned char ch2 = (unsigned char) *s2++; 71 : 72 12900804 : if (ch1 != ch2) 73 : { 74 8016300 : if (ch1 >= 'A' && ch1 <= 'Z') 75 5652996 : ch1 += 'a' - 'A'; 76 2363304 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) 77 0 : ch1 = tolower(ch1); 78 : 79 8016300 : if (ch2 >= 'A' && ch2 <= 'Z') 80 385654 : ch2 += 'a' - 'A'; 81 7630646 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) 82 0 : ch2 = tolower(ch2); 83 : 84 8016300 : if (ch1 != ch2) 85 7829960 : return (int) ch1 - (int) ch2; 86 : } 87 5070844 : if (ch1 == 0) 88 0 : break; 89 : } 90 3674612 : return 0; 91 : } 92 : 93 : /* 94 : * Fold a character to upper case. 95 : * 96 : * Unlike some versions of toupper(), this is safe to apply to characters 97 : * that aren't lower case letters. Note however that the whole thing is 98 : * a bit bogus for multibyte character sets. 99 : */ 100 : unsigned char 101 344862 : pg_toupper(unsigned char ch) 102 : { 103 344862 : if (ch >= 'a' && ch <= 'z') 104 205236 : ch += 'A' - 'a'; 105 139626 : else if (IS_HIGHBIT_SET(ch) && islower(ch)) 106 0 : ch = toupper(ch); 107 344862 : return ch; 108 : } 109 : 110 : /* 111 : * Fold a character to lower case. 112 : * 113 : * Unlike some versions of tolower(), this is safe to apply to characters 114 : * that aren't upper case letters. Note however that the whole thing is 115 : * a bit bogus for multibyte character sets. 116 : */ 117 : unsigned char 118 14725078 : pg_tolower(unsigned char ch) 119 : { 120 14725078 : if (ch >= 'A' && ch <= 'Z') 121 9865604 : ch += 'a' - 'A'; 122 4859474 : else if (IS_HIGHBIT_SET(ch) && isupper(ch)) 123 0 : ch = tolower(ch); 124 14725078 : return ch; 125 : }