Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * varchar.c
4 : * Functions for the built-in types char(n) and varchar(n).
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/varchar.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/detoast.h"
18 : #include "access/htup_details.h"
19 : #include "catalog/pg_collation.h"
20 : #include "catalog/pg_type.h"
21 : #include "common/hashfn.h"
22 : #include "libpq/pqformat.h"
23 : #include "mb/pg_wchar.h"
24 : #include "nodes/nodeFuncs.h"
25 : #include "nodes/supportnodes.h"
26 : #include "utils/array.h"
27 : #include "utils/builtins.h"
28 : #include "utils/pg_locale.h"
29 : #include "utils/varlena.h"
30 :
31 : /* common code for bpchartypmodin and varchartypmodin */
32 : static int32
33 2488 : anychar_typmodin(ArrayType *ta, const char *typename)
34 : {
35 : int32 typmod;
36 : int32 *tl;
37 : int n;
38 :
39 2488 : tl = ArrayGetIntegerTypmods(ta, &n);
40 :
41 : /*
42 : * we're not too tense about good error message here because grammar
43 : * shouldn't allow wrong number of modifiers for CHAR
44 : */
45 2488 : if (n != 1)
46 0 : ereport(ERROR,
47 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
48 : errmsg("invalid type modifier")));
49 :
50 2488 : if (*tl < 1)
51 0 : ereport(ERROR,
52 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53 : errmsg("length for type %s must be at least 1", typename)));
54 2488 : if (*tl > MaxAttrSize)
55 0 : ereport(ERROR,
56 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
57 : errmsg("length for type %s cannot exceed %d",
58 : typename, MaxAttrSize)));
59 :
60 : /*
61 : * For largely historical reasons, the typmod is VARHDRSZ plus the number
62 : * of characters; there is enough client-side code that knows about that
63 : * that we'd better not change it.
64 : */
65 2488 : typmod = VARHDRSZ + *tl;
66 :
67 2488 : return typmod;
68 : }
69 :
70 : /* common code for bpchartypmodout and varchartypmodout */
71 : static char *
72 655 : anychar_typmodout(int32 typmod)
73 : {
74 655 : char *res = (char *) palloc(64);
75 :
76 655 : if (typmod > VARHDRSZ)
77 655 : snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
78 : else
79 0 : *res = '\0';
80 :
81 655 : return res;
82 : }
83 :
84 :
85 : /*
86 : * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
87 : * is for blank-padded string whose length is specified in CREATE TABLE.
88 : * VARCHAR is for storing string whose length is at most the length specified
89 : * at CREATE TABLE time.
90 : *
91 : * It's hard to implement these types because we cannot figure out
92 : * the length of the type from the type itself. I changed (hopefully all) the
93 : * fmgr calls that invoke input functions of a data type to supply the
94 : * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
95 : * the length of the attributes and hence the exact length of the char() or
96 : * varchar(). We pass this to bpcharin() or varcharin().) In the case where
97 : * we cannot determine the length, we pass in -1 instead and the input
98 : * converter does not enforce any length check.
99 : *
100 : * We actually implement this as a varlena so that we don't have to pass in
101 : * the length for the comparison functions. (The difference between these
102 : * types and "text" is that we truncate and possibly blank-pad the string
103 : * at insertion time.)
104 : *
105 : * - ay 6/95
106 : */
107 :
108 :
109 : /*****************************************************************************
110 : * bpchar - char() *
111 : *****************************************************************************/
112 :
113 : /*
114 : * bpchar_input -- common guts of bpcharin and bpcharrecv
115 : *
116 : * s is the input text of length len (may not be null-terminated)
117 : * atttypmod is the typmod value to apply
118 : *
119 : * Note that atttypmod is measured in characters, which
120 : * is not necessarily the same as the number of bytes.
121 : *
122 : * If the input string is too long, raise an error, unless the extra
123 : * characters are spaces, in which case they're truncated. (per SQL)
124 : *
125 : * If escontext points to an ErrorSaveContext node, that is filled instead
126 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
127 : * to detect errors.
128 : */
129 : static BpChar *
130 212148 : bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
131 : {
132 : BpChar *result;
133 : char *r;
134 : size_t maxlen;
135 :
136 : /* If typmod is -1 (or invalid), use the actual string length */
137 212148 : if (atttypmod < (int32) VARHDRSZ)
138 5473 : maxlen = len;
139 : else
140 : {
141 : size_t charlen; /* number of CHARACTERS in the input */
142 :
143 206675 : maxlen = atttypmod - VARHDRSZ;
144 206675 : charlen = pg_mbstrlen_with_len(s, len);
145 206675 : if (charlen > maxlen)
146 : {
147 : /* Verify that extra characters are spaces, and clip them off */
148 132 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
149 : size_t j;
150 :
151 : /*
152 : * at this point, len is the actual BYTE length of the input
153 : * string, maxlen is the max number of CHARACTERS allowed for this
154 : * bpchar type, mbmaxlen is the length in BYTES of those chars.
155 : */
156 140 : for (j = mbmaxlen; j < len; j++)
157 : {
158 136 : if (s[j] != ' ')
159 128 : ereturn(escontext, NULL,
160 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
161 : errmsg("value too long for type character(%zu)",
162 : maxlen)));
163 : }
164 :
165 : /*
166 : * Now we set maxlen to the necessary byte length, not the number
167 : * of CHARACTERS!
168 : */
169 4 : maxlen = len = mbmaxlen;
170 : }
171 : else
172 : {
173 : /*
174 : * Now we set maxlen to the necessary byte length, not the number
175 : * of CHARACTERS!
176 : */
177 206543 : maxlen = len + (maxlen - charlen);
178 : }
179 : }
180 :
181 212020 : result = (BpChar *) palloc(maxlen + VARHDRSZ);
182 212020 : SET_VARSIZE(result, maxlen + VARHDRSZ);
183 212020 : r = VARDATA(result);
184 212020 : memcpy(r, s, len);
185 :
186 : /* blank pad the string if necessary */
187 212020 : if (maxlen > len)
188 201152 : memset(r + len, ' ', maxlen - len);
189 :
190 212020 : return result;
191 : }
192 :
193 : /*
194 : * Convert a C string to CHARACTER internal representation. atttypmod
195 : * is the declared length of the type plus VARHDRSZ.
196 : */
197 : Datum
198 212148 : bpcharin(PG_FUNCTION_ARGS)
199 : {
200 212148 : char *s = PG_GETARG_CSTRING(0);
201 : #ifdef NOT_USED
202 : Oid typelem = PG_GETARG_OID(1);
203 : #endif
204 212148 : int32 atttypmod = PG_GETARG_INT32(2);
205 : BpChar *result;
206 :
207 212148 : result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context);
208 212100 : PG_RETURN_BPCHAR_P(result);
209 : }
210 :
211 :
212 : /*
213 : * Convert a CHARACTER value to a C string.
214 : *
215 : * Uses the text conversion functions, which is only appropriate if BpChar
216 : * and text are equivalent types.
217 : */
218 : Datum
219 24279 : bpcharout(PG_FUNCTION_ARGS)
220 : {
221 24279 : Datum txt = PG_GETARG_DATUM(0);
222 :
223 24279 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
224 : }
225 :
226 : /*
227 : * bpcharrecv - converts external binary format to bpchar
228 : */
229 : Datum
230 0 : bpcharrecv(PG_FUNCTION_ARGS)
231 : {
232 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
233 : #ifdef NOT_USED
234 : Oid typelem = PG_GETARG_OID(1);
235 : #endif
236 0 : int32 atttypmod = PG_GETARG_INT32(2);
237 : BpChar *result;
238 : char *str;
239 : int nbytes;
240 :
241 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
242 0 : result = bpchar_input(str, nbytes, atttypmod, NULL);
243 0 : pfree(str);
244 0 : PG_RETURN_BPCHAR_P(result);
245 : }
246 :
247 : /*
248 : * bpcharsend - converts bpchar to binary format
249 : */
250 : Datum
251 2 : bpcharsend(PG_FUNCTION_ARGS)
252 : {
253 : /* Exactly the same as textsend, so share code */
254 2 : return textsend(fcinfo);
255 : }
256 :
257 :
258 : /*
259 : * Converts a CHARACTER type to the specified size.
260 : *
261 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
262 : * isExplicit is true if this is for an explicit cast to char(N).
263 : *
264 : * Truncation rules: for an explicit cast, silently truncate to the given
265 : * length; for an implicit cast, raise error unless extra characters are
266 : * all spaces. (This is sort-of per SQL: the spec would actually have us
267 : * raise a "completion condition" for the explicit cast case, but Postgres
268 : * hasn't got such a concept.)
269 : */
270 : Datum
271 8543 : bpchar(PG_FUNCTION_ARGS)
272 : {
273 8543 : BpChar *source = PG_GETARG_BPCHAR_PP(0);
274 8543 : int32 maxlen = PG_GETARG_INT32(1);
275 8543 : bool isExplicit = PG_GETARG_BOOL(2);
276 : BpChar *result;
277 : int32 len;
278 : char *r;
279 : char *s;
280 : int i;
281 : int charlen; /* number of characters in the input string +
282 : * VARHDRSZ */
283 :
284 : /* No work if typmod is invalid */
285 8543 : if (maxlen < (int32) VARHDRSZ)
286 0 : PG_RETURN_BPCHAR_P(source);
287 :
288 8543 : maxlen -= VARHDRSZ;
289 :
290 8543 : len = VARSIZE_ANY_EXHDR(source);
291 8543 : s = VARDATA_ANY(source);
292 :
293 8543 : charlen = pg_mbstrlen_with_len(s, len);
294 :
295 : /* No work if supplied data matches typmod already */
296 8543 : if (charlen == maxlen)
297 3944 : PG_RETURN_BPCHAR_P(source);
298 :
299 4599 : if (charlen > maxlen)
300 : {
301 : /* Verify that extra characters are spaces, and clip them off */
302 : size_t maxmblen;
303 :
304 30 : maxmblen = pg_mbcharcliplen(s, len, maxlen);
305 :
306 30 : if (!isExplicit)
307 : {
308 71 : for (i = maxmblen; i < len; i++)
309 61 : if (s[i] != ' ')
310 16 : ereturn(fcinfo->context, (Datum) 0,
311 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
312 : errmsg("value too long for type character(%d)",
313 : maxlen)));
314 : }
315 :
316 14 : len = maxmblen;
317 :
318 : /*
319 : * At this point, maxlen is the necessary byte length, not the number
320 : * of CHARACTERS!
321 : */
322 14 : maxlen = len;
323 : }
324 : else
325 : {
326 : /*
327 : * At this point, maxlen is the necessary byte length, not the number
328 : * of CHARACTERS!
329 : */
330 4569 : maxlen = len + (maxlen - charlen);
331 : }
332 :
333 : Assert(maxlen >= len);
334 :
335 4583 : result = palloc(maxlen + VARHDRSZ);
336 4583 : SET_VARSIZE(result, maxlen + VARHDRSZ);
337 4583 : r = VARDATA(result);
338 :
339 4583 : memcpy(r, s, len);
340 :
341 : /* blank pad the string if necessary */
342 4583 : if (maxlen > len)
343 4569 : memset(r + len, ' ', maxlen - len);
344 :
345 4583 : PG_RETURN_BPCHAR_P(result);
346 : }
347 :
348 :
349 : /*
350 : * char_bpchar()
351 : * Convert char to bpchar(1).
352 : */
353 : Datum
354 0 : char_bpchar(PG_FUNCTION_ARGS)
355 : {
356 0 : char c = PG_GETARG_CHAR(0);
357 : BpChar *result;
358 :
359 0 : result = (BpChar *) palloc(VARHDRSZ + 1);
360 :
361 0 : SET_VARSIZE(result, VARHDRSZ + 1);
362 0 : *(VARDATA(result)) = c;
363 :
364 0 : PG_RETURN_BPCHAR_P(result);
365 : }
366 :
367 :
368 : /*
369 : * bpchar_name()
370 : * Converts a bpchar() type to a NameData type.
371 : */
372 : Datum
373 0 : bpchar_name(PG_FUNCTION_ARGS)
374 : {
375 0 : BpChar *s = PG_GETARG_BPCHAR_PP(0);
376 : char *s_data;
377 : Name result;
378 : int len;
379 :
380 0 : len = VARSIZE_ANY_EXHDR(s);
381 0 : s_data = VARDATA_ANY(s);
382 :
383 : /* Truncate oversize input */
384 0 : if (len >= NAMEDATALEN)
385 0 : len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
386 :
387 : /* Remove trailing blanks */
388 0 : while (len > 0)
389 : {
390 0 : if (s_data[len - 1] != ' ')
391 0 : break;
392 0 : len--;
393 : }
394 :
395 : /* We use palloc0 here to ensure result is zero-padded */
396 0 : result = (Name) palloc0(NAMEDATALEN);
397 0 : memcpy(NameStr(*result), s_data, len);
398 :
399 0 : PG_RETURN_NAME(result);
400 : }
401 :
402 : /*
403 : * name_bpchar()
404 : * Converts a NameData type to a bpchar type.
405 : *
406 : * Uses the text conversion functions, which is only appropriate if BpChar
407 : * and text are equivalent types.
408 : */
409 : Datum
410 5 : name_bpchar(PG_FUNCTION_ARGS)
411 : {
412 5 : Name s = PG_GETARG_NAME(0);
413 : BpChar *result;
414 :
415 5 : result = (BpChar *) cstring_to_text(NameStr(*s));
416 5 : PG_RETURN_BPCHAR_P(result);
417 : }
418 :
419 : Datum
420 1392 : bpchartypmodin(PG_FUNCTION_ARGS)
421 : {
422 1392 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
423 :
424 1392 : PG_RETURN_INT32(anychar_typmodin(ta, "char"));
425 : }
426 :
427 : Datum
428 442 : bpchartypmodout(PG_FUNCTION_ARGS)
429 : {
430 442 : int32 typmod = PG_GETARG_INT32(0);
431 :
432 442 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
433 : }
434 :
435 :
436 : /*****************************************************************************
437 : * varchar - varchar(n)
438 : *
439 : * Note: varchar piggybacks on type text for most operations, and so has no
440 : * C-coded functions except for I/O and typmod checking.
441 : *****************************************************************************/
442 :
443 : /*
444 : * varchar_input -- common guts of varcharin and varcharrecv
445 : *
446 : * s is the input text of length len (may not be null-terminated)
447 : * atttypmod is the typmod value to apply
448 : *
449 : * Note that atttypmod is measured in characters, which
450 : * is not necessarily the same as the number of bytes.
451 : *
452 : * If the input string is too long, raise an error, unless the extra
453 : * characters are spaces, in which case they're truncated. (per SQL)
454 : *
455 : * If escontext points to an ErrorSaveContext node, that is filled instead
456 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
457 : * to detect errors.
458 : */
459 : static VarChar *
460 375373 : varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
461 : {
462 : VarChar *result;
463 : size_t maxlen;
464 :
465 375373 : maxlen = atttypmod - VARHDRSZ;
466 :
467 375373 : if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
468 : {
469 : /* Verify that extra characters are spaces, and clip them off */
470 52 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
471 : size_t j;
472 :
473 60 : for (j = mbmaxlen; j < len; j++)
474 : {
475 56 : if (s[j] != ' ')
476 48 : ereturn(escontext, NULL,
477 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
478 : errmsg("value too long for type character varying(%zu)",
479 : maxlen)));
480 : }
481 :
482 4 : len = mbmaxlen;
483 : }
484 :
485 : /*
486 : * We can use cstring_to_text_with_len because VarChar and text are
487 : * binary-compatible types.
488 : */
489 375325 : result = (VarChar *) cstring_to_text_with_len(s, len);
490 375325 : return result;
491 : }
492 :
493 : /*
494 : * Convert a C string to VARCHAR internal representation. atttypmod
495 : * is the declared length of the type plus VARHDRSZ.
496 : */
497 : Datum
498 375372 : varcharin(PG_FUNCTION_ARGS)
499 : {
500 375372 : char *s = PG_GETARG_CSTRING(0);
501 : #ifdef NOT_USED
502 : Oid typelem = PG_GETARG_OID(1);
503 : #endif
504 375372 : int32 atttypmod = PG_GETARG_INT32(2);
505 : VarChar *result;
506 :
507 375372 : result = varchar_input(s, strlen(s), atttypmod, fcinfo->context);
508 375356 : PG_RETURN_VARCHAR_P(result);
509 : }
510 :
511 :
512 : /*
513 : * Convert a VARCHAR value to a C string.
514 : *
515 : * Uses the text to C string conversion function, which is only appropriate
516 : * if VarChar and text are equivalent types.
517 : */
518 : Datum
519 111387 : varcharout(PG_FUNCTION_ARGS)
520 : {
521 111387 : Datum txt = PG_GETARG_DATUM(0);
522 :
523 111387 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
524 : }
525 :
526 : /*
527 : * varcharrecv - converts external binary format to varchar
528 : */
529 : Datum
530 1 : varcharrecv(PG_FUNCTION_ARGS)
531 : {
532 1 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
533 : #ifdef NOT_USED
534 : Oid typelem = PG_GETARG_OID(1);
535 : #endif
536 1 : int32 atttypmod = PG_GETARG_INT32(2);
537 : VarChar *result;
538 : char *str;
539 : int nbytes;
540 :
541 1 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
542 1 : result = varchar_input(str, nbytes, atttypmod, NULL);
543 1 : pfree(str);
544 1 : PG_RETURN_VARCHAR_P(result);
545 : }
546 :
547 : /*
548 : * varcharsend - converts varchar to binary format
549 : */
550 : Datum
551 1 : varcharsend(PG_FUNCTION_ARGS)
552 : {
553 : /* Exactly the same as textsend, so share code */
554 1 : return textsend(fcinfo);
555 : }
556 :
557 :
558 : /*
559 : * varchar_support()
560 : *
561 : * Planner support function for the varchar() length coercion function.
562 : *
563 : * Currently, the only interesting thing we can do is flatten calls that set
564 : * the new maximum length >= the previous maximum length. We can ignore the
565 : * isExplicit argument, since that only affects truncation cases.
566 : */
567 : Datum
568 2147 : varchar_support(PG_FUNCTION_ARGS)
569 : {
570 2147 : Node *rawreq = (Node *) PG_GETARG_POINTER(0);
571 2147 : Node *ret = NULL;
572 :
573 2147 : if (IsA(rawreq, SupportRequestSimplify))
574 : {
575 923 : SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
576 923 : FuncExpr *expr = req->fcall;
577 : Node *typmod;
578 :
579 : Assert(list_length(expr->args) >= 2);
580 :
581 923 : typmod = (Node *) lsecond(expr->args);
582 :
583 923 : if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
584 : {
585 923 : Node *source = (Node *) linitial(expr->args);
586 923 : int32 old_typmod = exprTypmod(source);
587 923 : int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
588 923 : int32 old_max = old_typmod - VARHDRSZ;
589 923 : int32 new_max = new_typmod - VARHDRSZ;
590 :
591 923 : if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
592 20 : ret = relabel_to_typmod(source, new_typmod);
593 : }
594 : }
595 :
596 2147 : PG_RETURN_POINTER(ret);
597 : }
598 :
599 : /*
600 : * Converts a VARCHAR type to the specified size.
601 : *
602 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
603 : * isExplicit is true if this is for an explicit cast to varchar(N).
604 : *
605 : * Truncation rules: for an explicit cast, silently truncate to the given
606 : * length; for an implicit cast, raise error unless extra characters are
607 : * all spaces. (This is sort-of per SQL: the spec would actually have us
608 : * raise a "completion condition" for the explicit cast case, but Postgres
609 : * hasn't got such a concept.)
610 : */
611 : Datum
612 17183 : varchar(PG_FUNCTION_ARGS)
613 : {
614 17183 : VarChar *source = PG_GETARG_VARCHAR_PP(0);
615 17183 : int32 typmod = PG_GETARG_INT32(1);
616 17183 : bool isExplicit = PG_GETARG_BOOL(2);
617 : int32 len,
618 : maxlen;
619 : size_t maxmblen;
620 : int i;
621 : char *s_data;
622 :
623 17183 : len = VARSIZE_ANY_EXHDR(source);
624 17183 : s_data = VARDATA_ANY(source);
625 17183 : maxlen = typmod - VARHDRSZ;
626 :
627 : /* No work if typmod is invalid or supplied data fits it already */
628 17183 : if (maxlen < 0 || len <= maxlen)
629 17093 : PG_RETURN_VARCHAR_P(source);
630 :
631 : /* only reach here if string is too long... */
632 :
633 : /* truncate multibyte string preserving multibyte boundary */
634 90 : maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
635 :
636 90 : if (!isExplicit)
637 : {
638 115 : for (i = maxmblen; i < len; i++)
639 105 : if (s_data[i] != ' ')
640 56 : ereturn(fcinfo->context, (Datum) 0,
641 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
642 : errmsg("value too long for type character varying(%d)",
643 : maxlen)));
644 : }
645 :
646 34 : PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
647 : maxmblen));
648 : }
649 :
650 : Datum
651 1096 : varchartypmodin(PG_FUNCTION_ARGS)
652 : {
653 1096 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
654 :
655 1096 : PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
656 : }
657 :
658 : Datum
659 213 : varchartypmodout(PG_FUNCTION_ARGS)
660 : {
661 213 : int32 typmod = PG_GETARG_INT32(0);
662 :
663 213 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
664 : }
665 :
666 :
667 : /*****************************************************************************
668 : * Exported functions
669 : *****************************************************************************/
670 :
671 : /* "True" length (not counting trailing blanks) of a BpChar */
672 : static inline int
673 194072 : bcTruelen(BpChar *arg)
674 : {
675 194072 : return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
676 : }
677 :
678 : int
679 294368 : bpchartruelen(char *s, int len)
680 : {
681 : int i;
682 :
683 : /*
684 : * Note that we rely on the assumption that ' ' is a singleton unit on
685 : * every supported multibyte server encoding.
686 : */
687 7050753 : for (i = len - 1; i >= 0; i--)
688 : {
689 6971269 : if (s[i] != ' ')
690 214884 : break;
691 : }
692 294368 : return i + 1;
693 : }
694 :
695 : Datum
696 10 : bpcharlen(PG_FUNCTION_ARGS)
697 : {
698 10 : BpChar *arg = PG_GETARG_BPCHAR_PP(0);
699 : int len;
700 :
701 : /* get number of bytes, ignoring trailing spaces */
702 10 : len = bcTruelen(arg);
703 :
704 : /* in multibyte encoding, convert to number of characters */
705 10 : if (pg_database_encoding_max_length() != 1)
706 10 : len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
707 :
708 10 : PG_RETURN_INT32(len);
709 : }
710 :
711 : Datum
712 0 : bpcharoctetlen(PG_FUNCTION_ARGS)
713 : {
714 0 : Datum arg = PG_GETARG_DATUM(0);
715 :
716 : /* We need not detoast the input at all */
717 0 : PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
718 : }
719 :
720 :
721 : /*****************************************************************************
722 : * Comparison Functions used for bpchar
723 : *
724 : * Note: btree indexes need these routines not to leak memory; therefore,
725 : * be careful to free working copies of toasted datums. Most places don't
726 : * need to be so careful.
727 : *****************************************************************************/
728 :
729 : static void
730 16720 : check_collation_set(Oid collid)
731 : {
732 16720 : if (!OidIsValid(collid))
733 : {
734 : /*
735 : * This typically means that the parser could not resolve a conflict
736 : * of implicit collations, so report it that way.
737 : */
738 0 : ereport(ERROR,
739 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
740 : errmsg("could not determine which collation to use for string comparison"),
741 : errhint("Use the COLLATE clause to set the collation explicitly.")));
742 : }
743 16720 : }
744 :
745 : Datum
746 12424 : bpchareq(PG_FUNCTION_ARGS)
747 : {
748 12424 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
749 12424 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
750 : int len1,
751 : len2;
752 : bool result;
753 12424 : Oid collid = PG_GET_COLLATION();
754 : pg_locale_t mylocale;
755 :
756 12424 : check_collation_set(collid);
757 :
758 12424 : len1 = bcTruelen(arg1);
759 12424 : len2 = bcTruelen(arg2);
760 :
761 12424 : mylocale = pg_newlocale_from_collation(collid);
762 :
763 12424 : if (mylocale->deterministic)
764 : {
765 : /*
766 : * Since we only care about equality or not-equality, we can avoid all
767 : * the expense of strcoll() here, and just do bitwise comparison.
768 : */
769 12312 : if (len1 != len2)
770 1406 : result = false;
771 : else
772 10906 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
773 : }
774 : else
775 : {
776 112 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
777 : collid) == 0);
778 : }
779 :
780 12424 : PG_FREE_IF_COPY(arg1, 0);
781 12424 : PG_FREE_IF_COPY(arg2, 1);
782 :
783 12424 : PG_RETURN_BOOL(result);
784 : }
785 :
786 : Datum
787 4296 : bpcharne(PG_FUNCTION_ARGS)
788 : {
789 4296 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
790 4296 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
791 : int len1,
792 : len2;
793 : bool result;
794 4296 : Oid collid = PG_GET_COLLATION();
795 : pg_locale_t mylocale;
796 :
797 4296 : check_collation_set(collid);
798 :
799 4296 : len1 = bcTruelen(arg1);
800 4296 : len2 = bcTruelen(arg2);
801 :
802 4296 : mylocale = pg_newlocale_from_collation(collid);
803 :
804 4296 : if (mylocale->deterministic)
805 : {
806 : /*
807 : * Since we only care about equality or not-equality, we can avoid all
808 : * the expense of strcoll() here, and just do bitwise comparison.
809 : */
810 4280 : if (len1 != len2)
811 1364 : result = true;
812 : else
813 2916 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
814 : }
815 : else
816 : {
817 16 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
818 : collid) != 0);
819 : }
820 :
821 4296 : PG_FREE_IF_COPY(arg1, 0);
822 4296 : PG_FREE_IF_COPY(arg2, 1);
823 :
824 4296 : PG_RETURN_BOOL(result);
825 : }
826 :
827 : Datum
828 3590 : bpcharlt(PG_FUNCTION_ARGS)
829 : {
830 3590 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
831 3590 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
832 : int len1,
833 : len2;
834 : int cmp;
835 :
836 3590 : len1 = bcTruelen(arg1);
837 3590 : len2 = bcTruelen(arg2);
838 :
839 3590 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
840 : PG_GET_COLLATION());
841 :
842 3590 : PG_FREE_IF_COPY(arg1, 0);
843 3590 : PG_FREE_IF_COPY(arg2, 1);
844 :
845 3590 : PG_RETURN_BOOL(cmp < 0);
846 : }
847 :
848 : Datum
849 3342 : bpcharle(PG_FUNCTION_ARGS)
850 : {
851 3342 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
852 3342 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
853 : int len1,
854 : len2;
855 : int cmp;
856 :
857 3342 : len1 = bcTruelen(arg1);
858 3342 : len2 = bcTruelen(arg2);
859 :
860 3342 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
861 : PG_GET_COLLATION());
862 :
863 3342 : PG_FREE_IF_COPY(arg1, 0);
864 3342 : PG_FREE_IF_COPY(arg2, 1);
865 :
866 3342 : PG_RETURN_BOOL(cmp <= 0);
867 : }
868 :
869 : Datum
870 3668 : bpchargt(PG_FUNCTION_ARGS)
871 : {
872 3668 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
873 3668 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
874 : int len1,
875 : len2;
876 : int cmp;
877 :
878 3668 : len1 = bcTruelen(arg1);
879 3668 : len2 = bcTruelen(arg2);
880 :
881 3668 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
882 : PG_GET_COLLATION());
883 :
884 3668 : PG_FREE_IF_COPY(arg1, 0);
885 3668 : PG_FREE_IF_COPY(arg2, 1);
886 :
887 3668 : PG_RETURN_BOOL(cmp > 0);
888 : }
889 :
890 : Datum
891 3338 : bpcharge(PG_FUNCTION_ARGS)
892 : {
893 3338 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
894 3338 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
895 : int len1,
896 : len2;
897 : int cmp;
898 :
899 3338 : len1 = bcTruelen(arg1);
900 3338 : len2 = bcTruelen(arg2);
901 :
902 3338 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
903 : PG_GET_COLLATION());
904 :
905 3338 : PG_FREE_IF_COPY(arg1, 0);
906 3338 : PG_FREE_IF_COPY(arg2, 1);
907 :
908 3338 : PG_RETURN_BOOL(cmp >= 0);
909 : }
910 :
911 : Datum
912 64810 : bpcharcmp(PG_FUNCTION_ARGS)
913 : {
914 64810 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
915 64810 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
916 : int len1,
917 : len2;
918 : int cmp;
919 :
920 64810 : len1 = bcTruelen(arg1);
921 64810 : len2 = bcTruelen(arg2);
922 :
923 64810 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
924 : PG_GET_COLLATION());
925 :
926 64810 : PG_FREE_IF_COPY(arg1, 0);
927 64810 : PG_FREE_IF_COPY(arg2, 1);
928 :
929 64810 : PG_RETURN_INT32(cmp);
930 : }
931 :
932 : Datum
933 593 : bpchar_sortsupport(PG_FUNCTION_ARGS)
934 : {
935 593 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
936 593 : Oid collid = ssup->ssup_collation;
937 : MemoryContext oldcontext;
938 :
939 593 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
940 :
941 : /* Use generic string SortSupport */
942 593 : varstr_sortsupport(ssup, BPCHAROID, collid);
943 :
944 593 : MemoryContextSwitchTo(oldcontext);
945 :
946 593 : PG_RETURN_VOID();
947 : }
948 :
949 : Datum
950 0 : bpchar_larger(PG_FUNCTION_ARGS)
951 : {
952 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
953 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
954 : int len1,
955 : len2;
956 : int cmp;
957 :
958 0 : len1 = bcTruelen(arg1);
959 0 : len2 = bcTruelen(arg2);
960 :
961 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
962 : PG_GET_COLLATION());
963 :
964 0 : PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
965 : }
966 :
967 : Datum
968 0 : bpchar_smaller(PG_FUNCTION_ARGS)
969 : {
970 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
971 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
972 : int len1,
973 : len2;
974 : int cmp;
975 :
976 0 : len1 = bcTruelen(arg1);
977 0 : len2 = bcTruelen(arg2);
978 :
979 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
980 : PG_GET_COLLATION());
981 :
982 0 : PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
983 : }
984 :
985 :
986 : /*
987 : * bpchar needs a specialized hash function because we want to ignore
988 : * trailing blanks in comparisons.
989 : */
990 : Datum
991 2918 : hashbpchar(PG_FUNCTION_ARGS)
992 : {
993 2918 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
994 2918 : Oid collid = PG_GET_COLLATION();
995 : char *keydata;
996 : int keylen;
997 : pg_locale_t mylocale;
998 : Datum result;
999 :
1000 2918 : if (!collid)
1001 0 : ereport(ERROR,
1002 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1003 : errmsg("could not determine which collation to use for string hashing"),
1004 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1005 :
1006 2918 : keydata = VARDATA_ANY(key);
1007 2918 : keylen = bcTruelen(key);
1008 :
1009 2918 : mylocale = pg_newlocale_from_collation(collid);
1010 :
1011 2918 : if (mylocale->deterministic)
1012 : {
1013 2806 : result = hash_any((unsigned char *) keydata, keylen);
1014 : }
1015 : else
1016 : {
1017 : Size bsize,
1018 : rsize;
1019 : char *buf;
1020 :
1021 112 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1022 112 : buf = palloc(bsize + 1);
1023 :
1024 112 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1025 :
1026 : /* the second call may return a smaller value than the first */
1027 112 : if (rsize > bsize)
1028 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1029 :
1030 : /*
1031 : * In principle, there's no reason to include the terminating NUL
1032 : * character in the hash, but it was done before and the behavior must
1033 : * be preserved.
1034 : */
1035 112 : result = hash_any((uint8_t *) buf, bsize + 1);
1036 :
1037 112 : pfree(buf);
1038 : }
1039 :
1040 : /* Avoid leaking memory for toasted inputs */
1041 2918 : PG_FREE_IF_COPY(key, 0);
1042 :
1043 2918 : return result;
1044 : }
1045 :
1046 : Datum
1047 56 : hashbpcharextended(PG_FUNCTION_ARGS)
1048 : {
1049 56 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1050 56 : Oid collid = PG_GET_COLLATION();
1051 : char *keydata;
1052 : int keylen;
1053 : pg_locale_t mylocale;
1054 : Datum result;
1055 :
1056 56 : if (!collid)
1057 0 : ereport(ERROR,
1058 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1059 : errmsg("could not determine which collation to use for string hashing"),
1060 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1061 :
1062 56 : keydata = VARDATA_ANY(key);
1063 56 : keylen = bcTruelen(key);
1064 :
1065 56 : mylocale = pg_newlocale_from_collation(collid);
1066 :
1067 56 : if (mylocale->deterministic)
1068 : {
1069 48 : result = hash_any_extended((unsigned char *) keydata, keylen,
1070 48 : PG_GETARG_INT64(1));
1071 : }
1072 : else
1073 : {
1074 : Size bsize,
1075 : rsize;
1076 : char *buf;
1077 :
1078 8 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1079 8 : buf = palloc(bsize + 1);
1080 :
1081 8 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1082 :
1083 : /* the second call may return a smaller value than the first */
1084 8 : if (rsize > bsize)
1085 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1086 :
1087 : /*
1088 : * In principle, there's no reason to include the terminating NUL
1089 : * character in the hash, but it was done before and the behavior must
1090 : * be preserved.
1091 : */
1092 8 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
1093 8 : PG_GETARG_INT64(1));
1094 :
1095 8 : pfree(buf);
1096 : }
1097 :
1098 56 : PG_FREE_IF_COPY(key, 0);
1099 :
1100 56 : return result;
1101 : }
1102 :
1103 : /*
1104 : * The following operators support character-by-character comparison
1105 : * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1106 : * Note that the regular bpchareq/bpcharne comparison operators, and
1107 : * regular support functions 1 and 2 with "C" collation are assumed to be
1108 : * compatible with these!
1109 : */
1110 :
1111 : static int
1112 76 : internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1113 : {
1114 : int result;
1115 : int len1,
1116 : len2;
1117 :
1118 76 : len1 = bcTruelen(arg1);
1119 76 : len2 = bcTruelen(arg2);
1120 :
1121 76 : result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1122 76 : if (result != 0)
1123 44 : return result;
1124 32 : else if (len1 < len2)
1125 0 : return -1;
1126 32 : else if (len1 > len2)
1127 0 : return 1;
1128 : else
1129 32 : return 0;
1130 : }
1131 :
1132 :
1133 : Datum
1134 0 : bpchar_pattern_lt(PG_FUNCTION_ARGS)
1135 : {
1136 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1137 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1138 : int result;
1139 :
1140 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1141 :
1142 0 : PG_FREE_IF_COPY(arg1, 0);
1143 0 : PG_FREE_IF_COPY(arg2, 1);
1144 :
1145 0 : PG_RETURN_BOOL(result < 0);
1146 : }
1147 :
1148 :
1149 : Datum
1150 0 : bpchar_pattern_le(PG_FUNCTION_ARGS)
1151 : {
1152 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1153 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1154 : int result;
1155 :
1156 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1157 :
1158 0 : PG_FREE_IF_COPY(arg1, 0);
1159 0 : PG_FREE_IF_COPY(arg2, 1);
1160 :
1161 0 : PG_RETURN_BOOL(result <= 0);
1162 : }
1163 :
1164 :
1165 : Datum
1166 0 : bpchar_pattern_ge(PG_FUNCTION_ARGS)
1167 : {
1168 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1169 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1170 : int result;
1171 :
1172 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1173 :
1174 0 : PG_FREE_IF_COPY(arg1, 0);
1175 0 : PG_FREE_IF_COPY(arg2, 1);
1176 :
1177 0 : PG_RETURN_BOOL(result >= 0);
1178 : }
1179 :
1180 :
1181 : Datum
1182 0 : bpchar_pattern_gt(PG_FUNCTION_ARGS)
1183 : {
1184 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1185 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1186 : int result;
1187 :
1188 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1189 :
1190 0 : PG_FREE_IF_COPY(arg1, 0);
1191 0 : PG_FREE_IF_COPY(arg2, 1);
1192 :
1193 0 : PG_RETURN_BOOL(result > 0);
1194 : }
1195 :
1196 :
1197 : Datum
1198 76 : btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1199 : {
1200 76 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1201 76 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1202 : int result;
1203 :
1204 76 : result = internal_bpchar_pattern_compare(arg1, arg2);
1205 :
1206 76 : PG_FREE_IF_COPY(arg1, 0);
1207 76 : PG_FREE_IF_COPY(arg2, 1);
1208 :
1209 76 : PG_RETURN_INT32(result);
1210 : }
1211 :
1212 :
1213 : Datum
1214 8 : btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1215 : {
1216 8 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1217 : MemoryContext oldcontext;
1218 :
1219 8 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1220 :
1221 : /* Use generic string SortSupport, forcing "C" collation */
1222 8 : varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1223 :
1224 8 : MemoryContextSwitchTo(oldcontext);
1225 :
1226 8 : PG_RETURN_VOID();
1227 : }
|