Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * scansup.c 4 : * scanner support routines used by the core lexer 5 : * 6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group 7 : * Portions Copyright (c) 1994, Regents of the University of California 8 : * 9 : * 10 : * IDENTIFICATION 11 : * src/backend/parser/scansup.c 12 : * 13 : *------------------------------------------------------------------------- 14 : */ 15 : #include "postgres.h" 16 : 17 : #include <ctype.h> 18 : 19 : #include "mb/pg_wchar.h" 20 : #include "parser/scansup.h" 21 : #include "utils/pg_locale.h" 22 : 23 : 24 : /* 25 : * downcase_truncate_identifier() --- do appropriate downcasing and 26 : * truncation of an unquoted identifier. Optionally warn of truncation. 27 : * 28 : * Returns a palloc'd string containing the adjusted identifier. 29 : * 30 : * Note: in some usages the passed string is not null-terminated. 31 : * 32 : * Note: the API of this function is designed to allow for downcasing 33 : * transformations that increase the string length, but we don't yet 34 : * support that. If you want to implement it, you'll need to fix 35 : * SplitIdentifierString() in utils/adt/varlena.c. 36 : */ 37 : char * 38 6868500 : downcase_truncate_identifier(const char *ident, int len, bool warn) 39 : { 40 6868500 : return downcase_identifier(ident, len, warn, true); 41 : } 42 : 43 : /* 44 : * a workhorse for downcase_truncate_identifier 45 : */ 46 : char * 47 6868602 : downcase_identifier(const char *ident, int len, bool warn, bool truncate) 48 : { 49 : char *result; 50 : size_t needed pg_attribute_unused(); 51 : 52 : /* 53 : * Preserves string length. 54 : * 55 : * NB: if we decide to support Unicode-aware identifier case folding, then 56 : * we need to account for a change in string length. 57 : */ 58 6868602 : result = palloc(len + 1); 59 : 60 6868602 : needed = pg_downcase_ident(result, len + 1, ident, len); 61 : Assert(needed == len); 62 : Assert(result[len] == '\0'); 63 : 64 6868602 : if (len >= NAMEDATALEN && truncate) 65 12 : truncate_identifier(result, len, warn); 66 : 67 6868602 : return result; 68 : } 69 : 70 : 71 : /* 72 : * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes. 73 : * 74 : * The given string is modified in-place, if necessary. A warning is 75 : * issued if requested. 76 : * 77 : * We require the caller to pass in the string length since this saves a 78 : * strlen() call in some common usages. 79 : */ 80 : void 81 787840 : truncate_identifier(char *ident, int len, bool warn) 82 : { 83 787840 : if (len >= NAMEDATALEN) 84 : { 85 14 : len = pg_mbcliplen(ident, len, NAMEDATALEN - 1); 86 14 : if (warn) 87 14 : ereport(NOTICE, 88 : (errcode(ERRCODE_NAME_TOO_LONG), 89 : errmsg("identifier \"%s\" will be truncated to \"%.*s\"", 90 : ident, len, ident))); 91 14 : ident[len] = '\0'; 92 : } 93 787840 : } 94 : 95 : /* 96 : * scanner_isspace() --- return true if flex scanner considers char whitespace 97 : * 98 : * This should be used instead of the potentially locale-dependent isspace() 99 : * function when it's important to match the lexer's behavior. 100 : * 101 : * In principle we might need similar functions for isalnum etc, but for the 102 : * moment only isspace seems needed. 103 : */ 104 : bool 105 39942312 : scanner_isspace(char ch) 106 : { 107 : /* This must match scan.l's list of {space} characters */ 108 39942312 : if (ch == ' ' || 109 39467664 : ch == '\t' || 110 39467324 : ch == '\n' || 111 39467306 : ch == '\r' || 112 39467300 : ch == '\v' || 113 : ch == '\f') 114 475018 : return true; 115 39467294 : return false; 116 : }