Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * scansup.c
4 : * scanner support routines used by the core lexer
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/parser/scansup.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <ctype.h>
18 :
19 : #include "mb/pg_wchar.h"
20 : #include "parser/scansup.h"
21 : #include "utils/pg_locale.h"
22 :
23 :
24 : /*
25 : * downcase_truncate_identifier() --- do appropriate downcasing and
26 : * truncation of an unquoted identifier. Optionally warn of truncation.
27 : *
28 : * Returns a palloc'd string containing the adjusted identifier.
29 : *
30 : * Note: in some usages the passed string is not null-terminated.
31 : *
32 : * Note: the API of this function is designed to allow for downcasing
33 : * transformations that increase the string length, but we don't yet
34 : * support that. If you want to implement it, you'll need to fix
35 : * SplitIdentifierString() in utils/adt/varlena.c.
36 : */
37 : char *
38 3565102 : downcase_truncate_identifier(const char *ident, int len, bool warn)
39 : {
40 3565102 : return downcase_identifier(ident, len, warn, true);
41 : }
42 :
43 : /*
44 : * a workhorse for downcase_truncate_identifier
45 : */
46 : char *
47 3565153 : downcase_identifier(const char *ident, int len, bool warn, bool truncate)
48 : {
49 : char *result;
50 : size_t needed pg_attribute_unused();
51 :
52 : /*
53 : * Preserves string length.
54 : *
55 : * NB: if we decide to support Unicode-aware identifier case folding, then
56 : * we need to account for a change in string length.
57 : */
58 3565153 : result = palloc(len + 1);
59 :
60 3565153 : needed = pg_downcase_ident(result, len + 1, ident, len);
61 : Assert(needed == len);
62 : Assert(result[len] == '\0');
63 :
64 3565153 : if (len >= NAMEDATALEN && truncate)
65 6 : truncate_identifier(result, len, warn);
66 :
67 3565153 : return result;
68 : }
69 :
70 :
71 : /*
72 : * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
73 : *
74 : * The given string is modified in-place, if necessary. A warning is
75 : * issued if requested.
76 : *
77 : * We require the caller to pass in the string length since this saves a
78 : * strlen() call in some common usages.
79 : */
80 : void
81 410128 : truncate_identifier(char *ident, int len, bool warn)
82 : {
83 410128 : if (len >= NAMEDATALEN)
84 : {
85 7 : len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
86 7 : if (warn)
87 7 : ereport(NOTICE,
88 : (errcode(ERRCODE_NAME_TOO_LONG),
89 : errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
90 : ident, len, ident)));
91 7 : ident[len] = '\0';
92 : }
93 410128 : }
94 :
95 : /*
96 : * scanner_isspace() --- return true if flex scanner considers char whitespace
97 : *
98 : * This should be used instead of the potentially locale-dependent isspace()
99 : * function when it's important to match the lexer's behavior.
100 : *
101 : * In principle we might need similar functions for isalnum etc, but for the
102 : * moment only isspace seems needed.
103 : */
104 : bool
105 22811761 : scanner_isspace(char ch)
106 : {
107 : /* This must match scan.l's list of {space} characters */
108 22811761 : if (ch == ' ' ||
109 22576571 : ch == '\t' ||
110 22576313 : ch == '\n' ||
111 22576304 : ch == '\r' ||
112 22576301 : ch == '\v' ||
113 : ch == '\f')
114 235463 : return true;
115 22576298 : return false;
116 : }
|