Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * varchar.c
4 : * Functions for the built-in types char(n) and varchar(n).
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/varchar.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/detoast.h"
18 : #include "access/htup_details.h"
19 : #include "catalog/pg_collation.h"
20 : #include "catalog/pg_type.h"
21 : #include "common/hashfn.h"
22 : #include "libpq/pqformat.h"
23 : #include "mb/pg_wchar.h"
24 : #include "nodes/nodeFuncs.h"
25 : #include "nodes/supportnodes.h"
26 : #include "utils/array.h"
27 : #include "utils/builtins.h"
28 : #include "utils/lsyscache.h"
29 : #include "utils/pg_locale.h"
30 : #include "utils/varlena.h"
31 :
32 : /* common code for bpchartypmodin and varchartypmodin */
33 : static int32
34 3408 : anychar_typmodin(ArrayType *ta, const char *typename)
35 : {
36 : int32 typmod;
37 : int32 *tl;
38 : int n;
39 :
40 3408 : tl = ArrayGetIntegerTypmods(ta, &n);
41 :
42 : /*
43 : * we're not too tense about good error message here because grammar
44 : * shouldn't allow wrong number of modifiers for CHAR
45 : */
46 3408 : if (n != 1)
47 0 : ereport(ERROR,
48 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
49 : errmsg("invalid type modifier")));
50 :
51 3408 : if (*tl < 1)
52 0 : ereport(ERROR,
53 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
54 : errmsg("length for type %s must be at least 1", typename)));
55 3408 : if (*tl > MaxAttrSize)
56 0 : ereport(ERROR,
57 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
58 : errmsg("length for type %s cannot exceed %d",
59 : typename, MaxAttrSize)));
60 :
61 : /*
62 : * For largely historical reasons, the typmod is VARHDRSZ plus the number
63 : * of characters; there is enough client-side code that knows about that
64 : * that we'd better not change it.
65 : */
66 3408 : typmod = VARHDRSZ + *tl;
67 :
68 3408 : return typmod;
69 : }
70 :
71 : /* common code for bpchartypmodout and varchartypmodout */
72 : static char *
73 952 : anychar_typmodout(int32 typmod)
74 : {
75 952 : char *res = (char *) palloc(64);
76 :
77 952 : if (typmod > VARHDRSZ)
78 952 : snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
79 : else
80 0 : *res = '\0';
81 :
82 952 : return res;
83 : }
84 :
85 :
86 : /*
87 : * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
88 : * is for blank-padded string whose length is specified in CREATE TABLE.
89 : * VARCHAR is for storing string whose length is at most the length specified
90 : * at CREATE TABLE time.
91 : *
92 : * It's hard to implement these types because we cannot figure out
93 : * the length of the type from the type itself. I changed (hopefully all) the
94 : * fmgr calls that invoke input functions of a data type to supply the
95 : * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
96 : * the length of the attributes and hence the exact length of the char() or
97 : * varchar(). We pass this to bpcharin() or varcharin().) In the case where
98 : * we cannot determine the length, we pass in -1 instead and the input
99 : * converter does not enforce any length check.
100 : *
101 : * We actually implement this as a varlena so that we don't have to pass in
102 : * the length for the comparison functions. (The difference between these
103 : * types and "text" is that we truncate and possibly blank-pad the string
104 : * at insertion time.)
105 : *
106 : * - ay 6/95
107 : */
108 :
109 :
110 : /*****************************************************************************
111 : * bpchar - char() *
112 : *****************************************************************************/
113 :
114 : /*
115 : * bpchar_input -- common guts of bpcharin and bpcharrecv
116 : *
117 : * s is the input text of length len (may not be null-terminated)
118 : * atttypmod is the typmod value to apply
119 : *
120 : * Note that atttypmod is measured in characters, which
121 : * is not necessarily the same as the number of bytes.
122 : *
123 : * If the input string is too long, raise an error, unless the extra
124 : * characters are spaces, in which case they're truncated. (per SQL)
125 : *
126 : * If escontext points to an ErrorSaveContext node, that is filled instead
127 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
128 : * to detect errors.
129 : */
130 : static BpChar *
131 423140 : bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
132 : {
133 : BpChar *result;
134 : char *r;
135 : size_t maxlen;
136 :
137 : /* If typmod is -1 (or invalid), use the actual string length */
138 423140 : if (atttypmod < (int32) VARHDRSZ)
139 8652 : maxlen = len;
140 : else
141 : {
142 : size_t charlen; /* number of CHARACTERS in the input */
143 :
144 414488 : maxlen = atttypmod - VARHDRSZ;
145 414488 : charlen = pg_mbstrlen_with_len(s, len);
146 414488 : if (charlen > maxlen)
147 : {
148 : /* Verify that extra characters are spaces, and clip them off */
149 42 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
150 : size_t j;
151 :
152 : /*
153 : * at this point, len is the actual BYTE length of the input
154 : * string, maxlen is the max number of CHARACTERS allowed for this
155 : * bpchar type, mbmaxlen is the length in BYTES of those chars.
156 : */
157 54 : for (j = mbmaxlen; j < len; j++)
158 : {
159 48 : if (s[j] != ' ')
160 36 : ereturn(escontext, NULL,
161 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
162 : errmsg("value too long for type character(%d)",
163 : (int) maxlen)));
164 : }
165 :
166 : /*
167 : * Now we set maxlen to the necessary byte length, not the number
168 : * of CHARACTERS!
169 : */
170 6 : maxlen = len = mbmaxlen;
171 : }
172 : else
173 : {
174 : /*
175 : * Now we set maxlen to the necessary byte length, not the number
176 : * of CHARACTERS!
177 : */
178 414446 : maxlen = len + (maxlen - charlen);
179 : }
180 : }
181 :
182 423104 : result = (BpChar *) palloc(maxlen + VARHDRSZ);
183 423104 : SET_VARSIZE(result, maxlen + VARHDRSZ);
184 423104 : r = VARDATA(result);
185 423104 : memcpy(r, s, len);
186 :
187 : /* blank pad the string if necessary */
188 423104 : if (maxlen > len)
189 402184 : memset(r + len, ' ', maxlen - len);
190 :
191 423104 : return result;
192 : }
193 :
194 : /*
195 : * Convert a C string to CHARACTER internal representation. atttypmod
196 : * is the declared length of the type plus VARHDRSZ.
197 : */
198 : Datum
199 423140 : bpcharin(PG_FUNCTION_ARGS)
200 : {
201 423140 : char *s = PG_GETARG_CSTRING(0);
202 : #ifdef NOT_USED
203 : Oid typelem = PG_GETARG_OID(1);
204 : #endif
205 423140 : int32 atttypmod = PG_GETARG_INT32(2);
206 : BpChar *result;
207 :
208 423140 : result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context);
209 423116 : PG_RETURN_BPCHAR_P(result);
210 : }
211 :
212 :
213 : /*
214 : * Convert a CHARACTER value to a C string.
215 : *
216 : * Uses the text conversion functions, which is only appropriate if BpChar
217 : * and text are equivalent types.
218 : */
219 : Datum
220 46298 : bpcharout(PG_FUNCTION_ARGS)
221 : {
222 46298 : Datum txt = PG_GETARG_DATUM(0);
223 :
224 46298 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
225 : }
226 :
227 : /*
228 : * bpcharrecv - converts external binary format to bpchar
229 : */
230 : Datum
231 0 : bpcharrecv(PG_FUNCTION_ARGS)
232 : {
233 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
234 : #ifdef NOT_USED
235 : Oid typelem = PG_GETARG_OID(1);
236 : #endif
237 0 : int32 atttypmod = PG_GETARG_INT32(2);
238 : BpChar *result;
239 : char *str;
240 : int nbytes;
241 :
242 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
243 0 : result = bpchar_input(str, nbytes, atttypmod, NULL);
244 0 : pfree(str);
245 0 : PG_RETURN_BPCHAR_P(result);
246 : }
247 :
248 : /*
249 : * bpcharsend - converts bpchar to binary format
250 : */
251 : Datum
252 4 : bpcharsend(PG_FUNCTION_ARGS)
253 : {
254 : /* Exactly the same as textsend, so share code */
255 4 : return textsend(fcinfo);
256 : }
257 :
258 :
259 : /*
260 : * Converts a CHARACTER type to the specified size.
261 : *
262 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
263 : * isExplicit is true if this is for an explicit cast to char(N).
264 : *
265 : * Truncation rules: for an explicit cast, silently truncate to the given
266 : * length; for an implicit cast, raise error unless extra characters are
267 : * all spaces. (This is sort-of per SQL: the spec would actually have us
268 : * raise a "completion condition" for the explicit cast case, but Postgres
269 : * hasn't got such a concept.)
270 : */
271 : Datum
272 12040 : bpchar(PG_FUNCTION_ARGS)
273 : {
274 12040 : BpChar *source = PG_GETARG_BPCHAR_PP(0);
275 12040 : int32 maxlen = PG_GETARG_INT32(1);
276 12040 : bool isExplicit = PG_GETARG_BOOL(2);
277 : BpChar *result;
278 : int32 len;
279 : char *r;
280 : char *s;
281 : int i;
282 : int charlen; /* number of characters in the input string +
283 : * VARHDRSZ */
284 :
285 : /* No work if typmod is invalid */
286 12040 : if (maxlen < (int32) VARHDRSZ)
287 0 : PG_RETURN_BPCHAR_P(source);
288 :
289 12040 : maxlen -= VARHDRSZ;
290 :
291 12040 : len = VARSIZE_ANY_EXHDR(source);
292 12040 : s = VARDATA_ANY(source);
293 :
294 12040 : charlen = pg_mbstrlen_with_len(s, len);
295 :
296 : /* No work if supplied data matches typmod already */
297 12040 : if (charlen == maxlen)
298 5586 : PG_RETURN_BPCHAR_P(source);
299 :
300 6454 : if (charlen > maxlen)
301 : {
302 : /* Verify that extra characters are spaces, and clip them off */
303 : size_t maxmblen;
304 :
305 36 : maxmblen = pg_mbcharcliplen(s, len, maxlen);
306 :
307 36 : if (!isExplicit)
308 : {
309 84 : for (i = maxmblen; i < len; i++)
310 72 : if (s[i] != ' ')
311 18 : ereport(ERROR,
312 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
313 : errmsg("value too long for type character(%d)",
314 : maxlen)));
315 : }
316 :
317 18 : len = maxmblen;
318 :
319 : /*
320 : * At this point, maxlen is the necessary byte length, not the number
321 : * of CHARACTERS!
322 : */
323 18 : maxlen = len;
324 : }
325 : else
326 : {
327 : /*
328 : * At this point, maxlen is the necessary byte length, not the number
329 : * of CHARACTERS!
330 : */
331 6418 : maxlen = len + (maxlen - charlen);
332 : }
333 :
334 : Assert(maxlen >= len);
335 :
336 6436 : result = palloc(maxlen + VARHDRSZ);
337 6436 : SET_VARSIZE(result, maxlen + VARHDRSZ);
338 6436 : r = VARDATA(result);
339 :
340 6436 : memcpy(r, s, len);
341 :
342 : /* blank pad the string if necessary */
343 6436 : if (maxlen > len)
344 6418 : memset(r + len, ' ', maxlen - len);
345 :
346 6436 : PG_RETURN_BPCHAR_P(result);
347 : }
348 :
349 :
350 : /* char_bpchar()
351 : * Convert char to bpchar(1).
352 : */
353 : Datum
354 0 : char_bpchar(PG_FUNCTION_ARGS)
355 : {
356 0 : char c = PG_GETARG_CHAR(0);
357 : BpChar *result;
358 :
359 0 : result = (BpChar *) palloc(VARHDRSZ + 1);
360 :
361 0 : SET_VARSIZE(result, VARHDRSZ + 1);
362 0 : *(VARDATA(result)) = c;
363 :
364 0 : PG_RETURN_BPCHAR_P(result);
365 : }
366 :
367 :
368 : /* bpchar_name()
369 : * Converts a bpchar() type to a NameData type.
370 : */
371 : Datum
372 0 : bpchar_name(PG_FUNCTION_ARGS)
373 : {
374 0 : BpChar *s = PG_GETARG_BPCHAR_PP(0);
375 : char *s_data;
376 : Name result;
377 : int len;
378 :
379 0 : len = VARSIZE_ANY_EXHDR(s);
380 0 : s_data = VARDATA_ANY(s);
381 :
382 : /* Truncate oversize input */
383 0 : if (len >= NAMEDATALEN)
384 0 : len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
385 :
386 : /* Remove trailing blanks */
387 0 : while (len > 0)
388 : {
389 0 : if (s_data[len - 1] != ' ')
390 0 : break;
391 0 : len--;
392 : }
393 :
394 : /* We use palloc0 here to ensure result is zero-padded */
395 0 : result = (Name) palloc0(NAMEDATALEN);
396 0 : memcpy(NameStr(*result), s_data, len);
397 :
398 0 : PG_RETURN_NAME(result);
399 : }
400 :
401 : /* name_bpchar()
402 : * Converts a NameData type to a bpchar type.
403 : *
404 : * Uses the text conversion functions, which is only appropriate if BpChar
405 : * and text are equivalent types.
406 : */
407 : Datum
408 6 : name_bpchar(PG_FUNCTION_ARGS)
409 : {
410 6 : Name s = PG_GETARG_NAME(0);
411 : BpChar *result;
412 :
413 6 : result = (BpChar *) cstring_to_text(NameStr(*s));
414 6 : PG_RETURN_BPCHAR_P(result);
415 : }
416 :
417 : Datum
418 2068 : bpchartypmodin(PG_FUNCTION_ARGS)
419 : {
420 2068 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
421 :
422 2068 : PG_RETURN_INT32(anychar_typmodin(ta, "char"));
423 : }
424 :
425 : Datum
426 778 : bpchartypmodout(PG_FUNCTION_ARGS)
427 : {
428 778 : int32 typmod = PG_GETARG_INT32(0);
429 :
430 778 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
431 : }
432 :
433 :
434 : /*****************************************************************************
435 : * varchar - varchar(n)
436 : *
437 : * Note: varchar piggybacks on type text for most operations, and so has no
438 : * C-coded functions except for I/O and typmod checking.
439 : *****************************************************************************/
440 :
441 : /*
442 : * varchar_input -- common guts of varcharin and varcharrecv
443 : *
444 : * s is the input text of length len (may not be null-terminated)
445 : * atttypmod is the typmod value to apply
446 : *
447 : * Note that atttypmod is measured in characters, which
448 : * is not necessarily the same as the number of bytes.
449 : *
450 : * If the input string is too long, raise an error, unless the extra
451 : * characters are spaces, in which case they're truncated. (per SQL)
452 : *
453 : * If escontext points to an ErrorSaveContext node, that is filled instead
454 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
455 : * to detect errors.
456 : */
457 : static VarChar *
458 2937320 : varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
459 : {
460 : VarChar *result;
461 : size_t maxlen;
462 :
463 2937320 : maxlen = atttypmod - VARHDRSZ;
464 :
465 2937320 : if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
466 : {
467 : /* Verify that extra characters are spaces, and clip them off */
468 30 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
469 : size_t j;
470 :
471 42 : for (j = mbmaxlen; j < len; j++)
472 : {
473 36 : if (s[j] != ' ')
474 24 : ereturn(escontext, NULL,
475 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
476 : errmsg("value too long for type character varying(%d)",
477 : (int) maxlen)));
478 : }
479 :
480 6 : len = mbmaxlen;
481 : }
482 :
483 : /*
484 : * We can use cstring_to_text_with_len because VarChar and text are
485 : * binary-compatible types.
486 : */
487 2937296 : result = (VarChar *) cstring_to_text_with_len(s, len);
488 2937296 : return result;
489 : }
490 :
491 : /*
492 : * Convert a C string to VARCHAR internal representation. atttypmod
493 : * is the declared length of the type plus VARHDRSZ.
494 : */
495 : Datum
496 2937318 : varcharin(PG_FUNCTION_ARGS)
497 : {
498 2937318 : char *s = PG_GETARG_CSTRING(0);
499 : #ifdef NOT_USED
500 : Oid typelem = PG_GETARG_OID(1);
501 : #endif
502 2937318 : int32 atttypmod = PG_GETARG_INT32(2);
503 : VarChar *result;
504 :
505 2937318 : result = varchar_input(s, strlen(s), atttypmod, fcinfo->context);
506 2937306 : PG_RETURN_VARCHAR_P(result);
507 : }
508 :
509 :
510 : /*
511 : * Convert a VARCHAR value to a C string.
512 : *
513 : * Uses the text to C string conversion function, which is only appropriate
514 : * if VarChar and text are equivalent types.
515 : */
516 : Datum
517 258876 : varcharout(PG_FUNCTION_ARGS)
518 : {
519 258876 : Datum txt = PG_GETARG_DATUM(0);
520 :
521 258876 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
522 : }
523 :
524 : /*
525 : * varcharrecv - converts external binary format to varchar
526 : */
527 : Datum
528 2 : varcharrecv(PG_FUNCTION_ARGS)
529 : {
530 2 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
531 : #ifdef NOT_USED
532 : Oid typelem = PG_GETARG_OID(1);
533 : #endif
534 2 : int32 atttypmod = PG_GETARG_INT32(2);
535 : VarChar *result;
536 : char *str;
537 : int nbytes;
538 :
539 2 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
540 2 : result = varchar_input(str, nbytes, atttypmod, NULL);
541 2 : pfree(str);
542 2 : PG_RETURN_VARCHAR_P(result);
543 : }
544 :
545 : /*
546 : * varcharsend - converts varchar to binary format
547 : */
548 : Datum
549 63338 : varcharsend(PG_FUNCTION_ARGS)
550 : {
551 : /* Exactly the same as textsend, so share code */
552 63338 : return textsend(fcinfo);
553 : }
554 :
555 :
556 : /*
557 : * varchar_support()
558 : *
559 : * Planner support function for the varchar() length coercion function.
560 : *
561 : * Currently, the only interesting thing we can do is flatten calls that set
562 : * the new maximum length >= the previous maximum length. We can ignore the
563 : * isExplicit argument, since that only affects truncation cases.
564 : */
565 : Datum
566 2122 : varchar_support(PG_FUNCTION_ARGS)
567 : {
568 2122 : Node *rawreq = (Node *) PG_GETARG_POINTER(0);
569 2122 : Node *ret = NULL;
570 :
571 2122 : if (IsA(rawreq, SupportRequestSimplify))
572 : {
573 878 : SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
574 878 : FuncExpr *expr = req->fcall;
575 : Node *typmod;
576 :
577 : Assert(list_length(expr->args) >= 2);
578 :
579 878 : typmod = (Node *) lsecond(expr->args);
580 :
581 878 : if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
582 : {
583 878 : Node *source = (Node *) linitial(expr->args);
584 878 : int32 old_typmod = exprTypmod(source);
585 878 : int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
586 878 : int32 old_max = old_typmod - VARHDRSZ;
587 878 : int32 new_max = new_typmod - VARHDRSZ;
588 :
589 878 : if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
590 30 : ret = relabel_to_typmod(source, new_typmod);
591 : }
592 : }
593 :
594 2122 : PG_RETURN_POINTER(ret);
595 : }
596 :
597 : /*
598 : * Converts a VARCHAR type to the specified size.
599 : *
600 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
601 : * isExplicit is true if this is for an explicit cast to varchar(N).
602 : *
603 : * Truncation rules: for an explicit cast, silently truncate to the given
604 : * length; for an implicit cast, raise error unless extra characters are
605 : * all spaces. (This is sort-of per SQL: the spec would actually have us
606 : * raise a "completion condition" for the explicit cast case, but Postgres
607 : * hasn't got such a concept.)
608 : */
609 : Datum
610 26788 : varchar(PG_FUNCTION_ARGS)
611 : {
612 26788 : VarChar *source = PG_GETARG_VARCHAR_PP(0);
613 26788 : int32 typmod = PG_GETARG_INT32(1);
614 26788 : bool isExplicit = PG_GETARG_BOOL(2);
615 : int32 len,
616 : maxlen;
617 : size_t maxmblen;
618 : int i;
619 : char *s_data;
620 :
621 26788 : len = VARSIZE_ANY_EXHDR(source);
622 26788 : s_data = VARDATA_ANY(source);
623 26788 : maxlen = typmod - VARHDRSZ;
624 :
625 : /* No work if typmod is invalid or supplied data fits it already */
626 26788 : if (maxlen < 0 || len <= maxlen)
627 26704 : PG_RETURN_VARCHAR_P(source);
628 :
629 : /* only reach here if string is too long... */
630 :
631 : /* truncate multibyte string preserving multibyte boundary */
632 84 : maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
633 :
634 84 : if (!isExplicit)
635 : {
636 114 : for (i = maxmblen; i < len; i++)
637 102 : if (s_data[i] != ' ')
638 42 : ereport(ERROR,
639 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
640 : errmsg("value too long for type character varying(%d)",
641 : maxlen)));
642 : }
643 :
644 42 : PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
645 : maxmblen));
646 : }
647 :
648 : Datum
649 1340 : varchartypmodin(PG_FUNCTION_ARGS)
650 : {
651 1340 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
652 :
653 1340 : PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
654 : }
655 :
656 : Datum
657 174 : varchartypmodout(PG_FUNCTION_ARGS)
658 : {
659 174 : int32 typmod = PG_GETARG_INT32(0);
660 :
661 174 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
662 : }
663 :
664 :
665 : /*****************************************************************************
666 : * Exported functions
667 : *****************************************************************************/
668 :
669 : /* "True" length (not counting trailing blanks) of a BpChar */
670 : static inline int
671 265146 : bcTruelen(BpChar *arg)
672 : {
673 265146 : return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
674 : }
675 :
676 : int
677 339534 : bpchartruelen(char *s, int len)
678 : {
679 : int i;
680 :
681 : /*
682 : * Note that we rely on the assumption that ' ' is a singleton unit on
683 : * every supported multibyte server encoding.
684 : */
685 2428112 : for (i = len - 1; i >= 0; i--)
686 : {
687 2414038 : if (s[i] != ' ')
688 325460 : break;
689 : }
690 339534 : return i + 1;
691 : }
692 :
693 : Datum
694 18 : bpcharlen(PG_FUNCTION_ARGS)
695 : {
696 18 : BpChar *arg = PG_GETARG_BPCHAR_PP(0);
697 : int len;
698 :
699 : /* get number of bytes, ignoring trailing spaces */
700 18 : len = bcTruelen(arg);
701 :
702 : /* in multibyte encoding, convert to number of characters */
703 18 : if (pg_database_encoding_max_length() != 1)
704 18 : len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
705 :
706 18 : PG_RETURN_INT32(len);
707 : }
708 :
709 : Datum
710 0 : bpcharoctetlen(PG_FUNCTION_ARGS)
711 : {
712 0 : Datum arg = PG_GETARG_DATUM(0);
713 :
714 : /* We need not detoast the input at all */
715 0 : PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
716 : }
717 :
718 :
719 : /*****************************************************************************
720 : * Comparison Functions used for bpchar
721 : *
722 : * Note: btree indexes need these routines not to leak memory; therefore,
723 : * be careful to free working copies of toasted datums. Most places don't
724 : * need to be so careful.
725 : *****************************************************************************/
726 :
727 : static void
728 25648 : check_collation_set(Oid collid)
729 : {
730 25648 : if (!OidIsValid(collid))
731 : {
732 : /*
733 : * This typically means that the parser could not resolve a conflict
734 : * of implicit collations, so report it that way.
735 : */
736 0 : ereport(ERROR,
737 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
738 : errmsg("could not determine which collation to use for string comparison"),
739 : errhint("Use the COLLATE clause to set the collation explicitly.")));
740 : }
741 25648 : }
742 :
743 : Datum
744 19240 : bpchareq(PG_FUNCTION_ARGS)
745 : {
746 19240 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
747 19240 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
748 : int len1,
749 : len2;
750 : bool result;
751 19240 : Oid collid = PG_GET_COLLATION();
752 19240 : bool locale_is_c = false;
753 19240 : pg_locale_t mylocale = 0;
754 :
755 19240 : check_collation_set(collid);
756 :
757 19240 : len1 = bcTruelen(arg1);
758 19240 : len2 = bcTruelen(arg2);
759 :
760 19240 : if (lc_collate_is_c(collid))
761 48 : locale_is_c = true;
762 : else
763 19192 : mylocale = pg_newlocale_from_collation(collid);
764 :
765 19240 : if (locale_is_c || pg_locale_deterministic(mylocale))
766 : {
767 : /*
768 : * Since we only care about equality or not-equality, we can avoid all
769 : * the expense of strcoll() here, and just do bitwise comparison.
770 : */
771 19096 : if (len1 != len2)
772 2490 : result = false;
773 : else
774 16606 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
775 : }
776 : else
777 : {
778 144 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
779 : collid) == 0);
780 : }
781 :
782 19240 : PG_FREE_IF_COPY(arg1, 0);
783 19240 : PG_FREE_IF_COPY(arg2, 1);
784 :
785 19240 : PG_RETURN_BOOL(result);
786 : }
787 :
788 : Datum
789 6408 : bpcharne(PG_FUNCTION_ARGS)
790 : {
791 6408 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
792 6408 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
793 : int len1,
794 : len2;
795 : bool result;
796 6408 : Oid collid = PG_GET_COLLATION();
797 6408 : bool locale_is_c = false;
798 6408 : pg_locale_t mylocale = 0;
799 :
800 6408 : check_collation_set(collid);
801 :
802 6408 : len1 = bcTruelen(arg1);
803 6408 : len2 = bcTruelen(arg2);
804 :
805 6408 : if (lc_collate_is_c(collid))
806 0 : locale_is_c = true;
807 : else
808 6408 : mylocale = pg_newlocale_from_collation(collid);
809 :
810 6408 : if (locale_is_c || pg_locale_deterministic(mylocale))
811 : {
812 : /*
813 : * Since we only care about equality or not-equality, we can avoid all
814 : * the expense of strcoll() here, and just do bitwise comparison.
815 : */
816 6384 : if (len1 != len2)
817 2022 : result = true;
818 : else
819 4362 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
820 : }
821 : else
822 : {
823 24 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
824 : collid) != 0);
825 : }
826 :
827 6408 : PG_FREE_IF_COPY(arg1, 0);
828 6408 : PG_FREE_IF_COPY(arg2, 1);
829 :
830 6408 : PG_RETURN_BOOL(result);
831 : }
832 :
833 : Datum
834 6022 : bpcharlt(PG_FUNCTION_ARGS)
835 : {
836 6022 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
837 6022 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
838 : int len1,
839 : len2;
840 : int cmp;
841 :
842 6022 : len1 = bcTruelen(arg1);
843 6022 : len2 = bcTruelen(arg2);
844 :
845 6022 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
846 : PG_GET_COLLATION());
847 :
848 6022 : PG_FREE_IF_COPY(arg1, 0);
849 6022 : PG_FREE_IF_COPY(arg2, 1);
850 :
851 6022 : PG_RETURN_BOOL(cmp < 0);
852 : }
853 :
854 : Datum
855 5546 : bpcharle(PG_FUNCTION_ARGS)
856 : {
857 5546 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
858 5546 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
859 : int len1,
860 : len2;
861 : int cmp;
862 :
863 5546 : len1 = bcTruelen(arg1);
864 5546 : len2 = bcTruelen(arg2);
865 :
866 5546 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
867 : PG_GET_COLLATION());
868 :
869 5546 : PG_FREE_IF_COPY(arg1, 0);
870 5546 : PG_FREE_IF_COPY(arg2, 1);
871 :
872 5546 : PG_RETURN_BOOL(cmp <= 0);
873 : }
874 :
875 : Datum
876 6252 : bpchargt(PG_FUNCTION_ARGS)
877 : {
878 6252 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
879 6252 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
880 : int len1,
881 : len2;
882 : int cmp;
883 :
884 6252 : len1 = bcTruelen(arg1);
885 6252 : len2 = bcTruelen(arg2);
886 :
887 6252 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
888 : PG_GET_COLLATION());
889 :
890 6252 : PG_FREE_IF_COPY(arg1, 0);
891 6252 : PG_FREE_IF_COPY(arg2, 1);
892 :
893 6252 : PG_RETURN_BOOL(cmp > 0);
894 : }
895 :
896 : Datum
897 5720 : bpcharge(PG_FUNCTION_ARGS)
898 : {
899 5720 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
900 5720 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
901 : int len1,
902 : len2;
903 : int cmp;
904 :
905 5720 : len1 = bcTruelen(arg1);
906 5720 : len2 = bcTruelen(arg2);
907 :
908 5720 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
909 : PG_GET_COLLATION());
910 :
911 5720 : PG_FREE_IF_COPY(arg1, 0);
912 5720 : PG_FREE_IF_COPY(arg2, 1);
913 :
914 5720 : PG_RETURN_BOOL(cmp >= 0);
915 : }
916 :
917 : Datum
918 81008 : bpcharcmp(PG_FUNCTION_ARGS)
919 : {
920 81008 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
921 81008 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
922 : int len1,
923 : len2;
924 : int cmp;
925 :
926 81008 : len1 = bcTruelen(arg1);
927 81008 : len2 = bcTruelen(arg2);
928 :
929 81008 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
930 : PG_GET_COLLATION());
931 :
932 81008 : PG_FREE_IF_COPY(arg1, 0);
933 81008 : PG_FREE_IF_COPY(arg2, 1);
934 :
935 81008 : PG_RETURN_INT32(cmp);
936 : }
937 :
938 : Datum
939 782 : bpchar_sortsupport(PG_FUNCTION_ARGS)
940 : {
941 782 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
942 782 : Oid collid = ssup->ssup_collation;
943 : MemoryContext oldcontext;
944 :
945 782 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
946 :
947 : /* Use generic string SortSupport */
948 782 : varstr_sortsupport(ssup, BPCHAROID, collid);
949 :
950 782 : MemoryContextSwitchTo(oldcontext);
951 :
952 782 : PG_RETURN_VOID();
953 : }
954 :
955 : Datum
956 0 : bpchar_larger(PG_FUNCTION_ARGS)
957 : {
958 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
959 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
960 : int len1,
961 : len2;
962 : int cmp;
963 :
964 0 : len1 = bcTruelen(arg1);
965 0 : len2 = bcTruelen(arg2);
966 :
967 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
968 : PG_GET_COLLATION());
969 :
970 0 : PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
971 : }
972 :
973 : Datum
974 0 : bpchar_smaller(PG_FUNCTION_ARGS)
975 : {
976 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
977 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
978 : int len1,
979 : len2;
980 : int cmp;
981 :
982 0 : len1 = bcTruelen(arg1);
983 0 : len2 = bcTruelen(arg2);
984 :
985 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
986 : PG_GET_COLLATION());
987 :
988 0 : PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
989 : }
990 :
991 :
992 : /*
993 : * bpchar needs a specialized hash function because we want to ignore
994 : * trailing blanks in comparisons.
995 : */
996 : Datum
997 4496 : hashbpchar(PG_FUNCTION_ARGS)
998 : {
999 4496 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1000 4496 : Oid collid = PG_GET_COLLATION();
1001 : char *keydata;
1002 : int keylen;
1003 4496 : pg_locale_t mylocale = 0;
1004 : Datum result;
1005 :
1006 4496 : if (!collid)
1007 0 : ereport(ERROR,
1008 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1009 : errmsg("could not determine which collation to use for string hashing"),
1010 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1011 :
1012 4496 : keydata = VARDATA_ANY(key);
1013 4496 : keylen = bcTruelen(key);
1014 :
1015 4496 : if (!lc_collate_is_c(collid))
1016 4496 : mylocale = pg_newlocale_from_collation(collid);
1017 :
1018 4496 : if (pg_locale_deterministic(mylocale))
1019 : {
1020 4268 : result = hash_any((unsigned char *) keydata, keylen);
1021 : }
1022 : else
1023 : {
1024 : Size bsize,
1025 : rsize;
1026 : char *buf;
1027 :
1028 228 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1029 228 : buf = palloc(bsize + 1);
1030 :
1031 228 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1032 228 : if (rsize != bsize)
1033 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1034 :
1035 : /*
1036 : * In principle, there's no reason to include the terminating NUL
1037 : * character in the hash, but it was done before and the behavior must
1038 : * be preserved.
1039 : */
1040 228 : result = hash_any((uint8_t *) buf, bsize + 1);
1041 :
1042 228 : pfree(buf);
1043 : }
1044 :
1045 : /* Avoid leaking memory for toasted inputs */
1046 4496 : PG_FREE_IF_COPY(key, 0);
1047 :
1048 4496 : return result;
1049 : }
1050 :
1051 : Datum
1052 84 : hashbpcharextended(PG_FUNCTION_ARGS)
1053 : {
1054 84 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1055 84 : Oid collid = PG_GET_COLLATION();
1056 : char *keydata;
1057 : int keylen;
1058 84 : pg_locale_t mylocale = 0;
1059 : Datum result;
1060 :
1061 84 : if (!collid)
1062 0 : ereport(ERROR,
1063 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1064 : errmsg("could not determine which collation to use for string hashing"),
1065 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1066 :
1067 84 : keydata = VARDATA_ANY(key);
1068 84 : keylen = bcTruelen(key);
1069 :
1070 84 : if (!lc_collate_is_c(collid))
1071 84 : mylocale = pg_newlocale_from_collation(collid);
1072 :
1073 84 : if (pg_locale_deterministic(mylocale))
1074 : {
1075 72 : result = hash_any_extended((unsigned char *) keydata, keylen,
1076 72 : PG_GETARG_INT64(1));
1077 : }
1078 : else
1079 : {
1080 : Size bsize,
1081 : rsize;
1082 : char *buf;
1083 :
1084 12 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1085 12 : buf = palloc(bsize + 1);
1086 :
1087 12 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1088 12 : if (rsize != bsize)
1089 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1090 :
1091 : /*
1092 : * In principle, there's no reason to include the terminating NUL
1093 : * character in the hash, but it was done before and the behavior must
1094 : * be preserved.
1095 : */
1096 12 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
1097 12 : PG_GETARG_INT64(1));
1098 :
1099 12 : pfree(buf);
1100 : }
1101 :
1102 84 : PG_FREE_IF_COPY(key, 0);
1103 :
1104 84 : return result;
1105 : }
1106 :
1107 : /*
1108 : * The following operators support character-by-character comparison
1109 : * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1110 : * Note that the regular bpchareq/bpcharne comparison operators, and
1111 : * regular support functions 1 and 2 with "C" collation are assumed to be
1112 : * compatible with these!
1113 : */
1114 :
1115 : static int
1116 78 : internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1117 : {
1118 : int result;
1119 : int len1,
1120 : len2;
1121 :
1122 78 : len1 = bcTruelen(arg1);
1123 78 : len2 = bcTruelen(arg2);
1124 :
1125 78 : result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1126 78 : if (result != 0)
1127 48 : return result;
1128 30 : else if (len1 < len2)
1129 0 : return -1;
1130 30 : else if (len1 > len2)
1131 0 : return 1;
1132 : else
1133 30 : return 0;
1134 : }
1135 :
1136 :
1137 : Datum
1138 0 : bpchar_pattern_lt(PG_FUNCTION_ARGS)
1139 : {
1140 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1141 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1142 : int result;
1143 :
1144 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1145 :
1146 0 : PG_FREE_IF_COPY(arg1, 0);
1147 0 : PG_FREE_IF_COPY(arg2, 1);
1148 :
1149 0 : PG_RETURN_BOOL(result < 0);
1150 : }
1151 :
1152 :
1153 : Datum
1154 0 : bpchar_pattern_le(PG_FUNCTION_ARGS)
1155 : {
1156 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1157 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1158 : int result;
1159 :
1160 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1161 :
1162 0 : PG_FREE_IF_COPY(arg1, 0);
1163 0 : PG_FREE_IF_COPY(arg2, 1);
1164 :
1165 0 : PG_RETURN_BOOL(result <= 0);
1166 : }
1167 :
1168 :
1169 : Datum
1170 0 : bpchar_pattern_ge(PG_FUNCTION_ARGS)
1171 : {
1172 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1173 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1174 : int result;
1175 :
1176 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1177 :
1178 0 : PG_FREE_IF_COPY(arg1, 0);
1179 0 : PG_FREE_IF_COPY(arg2, 1);
1180 :
1181 0 : PG_RETURN_BOOL(result >= 0);
1182 : }
1183 :
1184 :
1185 : Datum
1186 0 : bpchar_pattern_gt(PG_FUNCTION_ARGS)
1187 : {
1188 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1189 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1190 : int result;
1191 :
1192 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1193 :
1194 0 : PG_FREE_IF_COPY(arg1, 0);
1195 0 : PG_FREE_IF_COPY(arg2, 1);
1196 :
1197 0 : PG_RETURN_BOOL(result > 0);
1198 : }
1199 :
1200 :
1201 : Datum
1202 78 : btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1203 : {
1204 78 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1205 78 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1206 : int result;
1207 :
1208 78 : result = internal_bpchar_pattern_compare(arg1, arg2);
1209 :
1210 78 : PG_FREE_IF_COPY(arg1, 0);
1211 78 : PG_FREE_IF_COPY(arg2, 1);
1212 :
1213 78 : PG_RETURN_INT32(result);
1214 : }
1215 :
1216 :
1217 : Datum
1218 12 : btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1219 : {
1220 12 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1221 : MemoryContext oldcontext;
1222 :
1223 12 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1224 :
1225 : /* Use generic string SortSupport, forcing "C" collation */
1226 12 : varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1227 :
1228 12 : MemoryContextSwitchTo(oldcontext);
1229 :
1230 12 : PG_RETURN_VOID();
1231 : }
|