Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pgstrcasecmp.c
4 : * Portable SQL-like case-independent comparisons and conversions.
5 : *
6 : * SQL99 specifies Unicode-aware case normalization, which we don't yet
7 : * have the infrastructure for. Instead we use tolower() to provide a
8 : * locale-aware translation. However, there are some locales where this
9 : * is not right either (eg, Turkish may do strange things with 'i' and
10 : * 'I'). Our current compromise is to use tolower() for characters with
11 : * the high bit set, and use an ASCII-only downcasing for 7-bit
12 : * characters.
13 : *
14 : * NB: this code should match downcase_truncate_identifier() in scansup.c.
15 : *
16 : *
17 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
18 : *
19 : * src/port/pgstrcasecmp.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "c.h"
24 :
25 : #include <ctype.h>
26 :
27 :
28 : /*
29 : * Case-independent comparison of two null-terminated strings.
30 : */
31 : int
32 10904811 : pg_strcasecmp(const char *s1, const char *s2)
33 : {
34 : for (;;)
35 4410029 : {
36 15314840 : unsigned char ch1 = (unsigned char) *s1++;
37 15314840 : unsigned char ch2 = (unsigned char) *s2++;
38 :
39 15314840 : if (ch1 != ch2)
40 : {
41 10775000 : if (ch1 >= 'A' && ch1 <= 'Z')
42 3984660 : ch1 += 'a' - 'A';
43 6790340 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
44 0 : ch1 = tolower(ch1);
45 :
46 10775000 : if (ch2 >= 'A' && ch2 <= 'Z')
47 2533784 : ch2 += 'a' - 'A';
48 8241216 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
49 0 : ch2 = tolower(ch2);
50 :
51 10775000 : if (ch1 != ch2)
52 9871301 : return (int) ch1 - (int) ch2;
53 : }
54 5443539 : if (ch1 == 0)
55 1033510 : break;
56 : }
57 1033510 : return 0;
58 : }
59 :
60 : /*
61 : * Case-independent comparison of two not-necessarily-null-terminated strings.
62 : * At most n bytes will be examined from each string.
63 : */
64 : int
65 5918930 : pg_strncasecmp(const char *s1, const char *s2, size_t n)
66 : {
67 8508610 : while (n-- > 0)
68 : {
69 6639213 : unsigned char ch1 = (unsigned char) *s1++;
70 6639213 : unsigned char ch2 = (unsigned char) *s2++;
71 :
72 6639213 : if (ch1 != ch2)
73 : {
74 4144673 : if (ch1 >= 'A' && ch1 <= 'Z')
75 2918059 : ch1 += 'a' - 'A';
76 1226614 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
77 0 : ch1 = tolower(ch1);
78 :
79 4144673 : if (ch2 >= 'A' && ch2 <= 'Z')
80 196787 : ch2 += 'a' - 'A';
81 3947886 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
82 0 : ch2 = tolower(ch2);
83 :
84 4144673 : if (ch1 != ch2)
85 4049533 : return (int) ch1 - (int) ch2;
86 : }
87 2589680 : if (ch1 == 0)
88 0 : break;
89 : }
90 1869397 : return 0;
91 : }
92 :
93 : /*
94 : * Fold a character to upper case.
95 : *
96 : * Unlike some versions of toupper(), this is safe to apply to characters
97 : * that aren't lower case letters. Note however that the whole thing is
98 : * a bit bogus for multibyte character sets.
99 : */
100 : unsigned char
101 175812 : pg_toupper(unsigned char ch)
102 : {
103 175812 : if (ch >= 'a' && ch <= 'z')
104 104091 : ch += 'A' - 'a';
105 71721 : else if (IS_HIGHBIT_SET(ch) && islower(ch))
106 0 : ch = toupper(ch);
107 175812 : return ch;
108 : }
109 :
110 : /*
111 : * Fold a character to lower case.
112 : *
113 : * Unlike some versions of tolower(), this is safe to apply to characters
114 : * that aren't upper case letters. Note however that the whole thing is
115 : * a bit bogus for multibyte character sets.
116 : */
117 : unsigned char
118 7560789 : pg_tolower(unsigned char ch)
119 : {
120 7560789 : if (ch >= 'A' && ch <= 'Z')
121 5093392 : ch += 'a' - 'A';
122 2467397 : else if (IS_HIGHBIT_SET(ch) && isupper(ch))
123 0 : ch = tolower(ch);
124 7560789 : return ch;
125 : }
|