Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * kwlookup.c 4 : * Key word lookup for PostgreSQL 5 : * 6 : * 7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group 8 : * Portions Copyright (c) 1994, Regents of the University of California 9 : * 10 : * 11 : * IDENTIFICATION 12 : * src/common/kwlookup.c 13 : * 14 : *------------------------------------------------------------------------- 15 : */ 16 : #include "c.h" 17 : 18 : #include "common/kwlookup.h" 19 : 20 : 21 : /* 22 : * ScanKeywordLookup - see if a given word is a keyword 23 : * 24 : * The list of keywords to be matched against is passed as a ScanKeywordList. 25 : * 26 : * Returns the keyword number (0..N-1) of the keyword, or -1 if no match. 27 : * Callers typically use the keyword number to index into information 28 : * arrays, but that is no concern of this code. 29 : * 30 : * The match is done case-insensitively. Note that we deliberately use a 31 : * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', 32 : * even if we are in a locale where tolower() would produce more or different 33 : * translations. This is to conform to the SQL99 spec, which says that 34 : * keywords are to be matched in this way even though non-keyword identifiers 35 : * receive a different case-normalization mapping. 36 : */ 37 : int 38 13264060 : ScanKeywordLookup(const char *str, 39 : const ScanKeywordList *keywords) 40 : { 41 : size_t len; 42 : int h; 43 : const char *kw; 44 : 45 : /* 46 : * Reject immediately if too long to be any keyword. This saves useless 47 : * hashing and downcasing work on long strings. 48 : */ 49 13264060 : len = strlen(str); 50 13264060 : if (len > keywords->max_kw_len) 51 458358 : return -1; 52 : 53 : /* 54 : * Compute the hash function. We assume it was generated to produce 55 : * case-insensitive results. Since it's a perfect hash, we need only 56 : * match to the specific keyword it identifies. 57 : */ 58 12805702 : h = keywords->hash(str, len); 59 : 60 : /* An out-of-range result implies no match */ 61 12805702 : if (h < 0 || h >= keywords->num_keywords) 62 5445810 : return -1; 63 : 64 : /* 65 : * Compare character-by-character to see if we have a match, applying an 66 : * ASCII-only downcasing to the input characters. We must not use 67 : * tolower() since it may produce the wrong translation in some locales 68 : * (eg, Turkish). 69 : */ 70 7359892 : kw = GetScanKeyword(h, keywords); 71 31470642 : while (*str != '\0') 72 : { 73 26150486 : char ch = *str++; 74 : 75 26150486 : if (ch >= 'A' && ch <= 'Z') 76 16496830 : ch += 'a' - 'A'; 77 26150486 : if (ch != *kw++) 78 2039736 : return -1; 79 : } 80 5320156 : if (*kw != '\0') 81 0 : return -1; 82 : 83 : /* Success! */ 84 5320156 : return h; 85 : }