Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * varchar.c
4 : * Functions for the built-in types char(n) and varchar(n).
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/varchar.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/detoast.h"
18 : #include "access/htup_details.h"
19 : #include "catalog/pg_collation.h"
20 : #include "catalog/pg_type.h"
21 : #include "common/hashfn.h"
22 : #include "libpq/pqformat.h"
23 : #include "mb/pg_wchar.h"
24 : #include "nodes/nodeFuncs.h"
25 : #include "nodes/supportnodes.h"
26 : #include "utils/array.h"
27 : #include "utils/builtins.h"
28 : #include "utils/pg_locale.h"
29 : #include "utils/varlena.h"
30 :
31 : /* common code for bpchartypmodin and varchartypmodin */
32 : static int32
33 3150 : anychar_typmodin(ArrayType *ta, const char *typename)
34 : {
35 : int32 typmod;
36 : int32 *tl;
37 : int n;
38 :
39 3150 : tl = ArrayGetIntegerTypmods(ta, &n);
40 :
41 : /*
42 : * we're not too tense about good error message here because grammar
43 : * shouldn't allow wrong number of modifiers for CHAR
44 : */
45 3150 : if (n != 1)
46 0 : ereport(ERROR,
47 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
48 : errmsg("invalid type modifier")));
49 :
50 3150 : if (*tl < 1)
51 0 : ereport(ERROR,
52 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53 : errmsg("length for type %s must be at least 1", typename)));
54 3150 : if (*tl > MaxAttrSize)
55 0 : ereport(ERROR,
56 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
57 : errmsg("length for type %s cannot exceed %d",
58 : typename, MaxAttrSize)));
59 :
60 : /*
61 : * For largely historical reasons, the typmod is VARHDRSZ plus the number
62 : * of characters; there is enough client-side code that knows about that
63 : * that we'd better not change it.
64 : */
65 3150 : typmod = VARHDRSZ + *tl;
66 :
67 3150 : return typmod;
68 : }
69 :
70 : /* common code for bpchartypmodout and varchartypmodout */
71 : static char *
72 1028 : anychar_typmodout(int32 typmod)
73 : {
74 1028 : char *res = (char *) palloc(64);
75 :
76 1028 : if (typmod > VARHDRSZ)
77 1028 : snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
78 : else
79 0 : *res = '\0';
80 :
81 1028 : return res;
82 : }
83 :
84 :
85 : /*
86 : * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
87 : * is for blank-padded string whose length is specified in CREATE TABLE.
88 : * VARCHAR is for storing string whose length is at most the length specified
89 : * at CREATE TABLE time.
90 : *
91 : * It's hard to implement these types because we cannot figure out
92 : * the length of the type from the type itself. I changed (hopefully all) the
93 : * fmgr calls that invoke input functions of a data type to supply the
94 : * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
95 : * the length of the attributes and hence the exact length of the char() or
96 : * varchar(). We pass this to bpcharin() or varcharin().) In the case where
97 : * we cannot determine the length, we pass in -1 instead and the input
98 : * converter does not enforce any length check.
99 : *
100 : * We actually implement this as a varlena so that we don't have to pass in
101 : * the length for the comparison functions. (The difference between these
102 : * types and "text" is that we truncate and possibly blank-pad the string
103 : * at insertion time.)
104 : *
105 : * - ay 6/95
106 : */
107 :
108 :
109 : /*****************************************************************************
110 : * bpchar - char() *
111 : *****************************************************************************/
112 :
113 : /*
114 : * bpchar_input -- common guts of bpcharin and bpcharrecv
115 : *
116 : * s is the input text of length len (may not be null-terminated)
117 : * atttypmod is the typmod value to apply
118 : *
119 : * Note that atttypmod is measured in characters, which
120 : * is not necessarily the same as the number of bytes.
121 : *
122 : * If the input string is too long, raise an error, unless the extra
123 : * characters are spaces, in which case they're truncated. (per SQL)
124 : *
125 : * If escontext points to an ErrorSaveContext node, that is filled instead
126 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
127 : * to detect errors.
128 : */
129 : static BpChar *
130 421694 : bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
131 : {
132 : BpChar *result;
133 : char *r;
134 : size_t maxlen;
135 :
136 : /* If typmod is -1 (or invalid), use the actual string length */
137 421694 : if (atttypmod < (int32) VARHDRSZ)
138 8800 : maxlen = len;
139 : else
140 : {
141 : size_t charlen; /* number of CHARACTERS in the input */
142 :
143 412894 : maxlen = atttypmod - VARHDRSZ;
144 412894 : charlen = pg_mbstrlen_with_len(s, len);
145 412894 : if (charlen > maxlen)
146 : {
147 : /* Verify that extra characters are spaces, and clip them off */
148 198 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
149 : size_t j;
150 :
151 : /*
152 : * at this point, len is the actual BYTE length of the input
153 : * string, maxlen is the max number of CHARACTERS allowed for this
154 : * bpchar type, mbmaxlen is the length in BYTES of those chars.
155 : */
156 210 : for (j = mbmaxlen; j < len; j++)
157 : {
158 204 : if (s[j] != ' ')
159 192 : ereturn(escontext, NULL,
160 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
161 : errmsg("value too long for type character(%d)",
162 : (int) maxlen)));
163 : }
164 :
165 : /*
166 : * Now we set maxlen to the necessary byte length, not the number
167 : * of CHARACTERS!
168 : */
169 6 : maxlen = len = mbmaxlen;
170 : }
171 : else
172 : {
173 : /*
174 : * Now we set maxlen to the necessary byte length, not the number
175 : * of CHARACTERS!
176 : */
177 412696 : maxlen = len + (maxlen - charlen);
178 : }
179 : }
180 :
181 421502 : result = (BpChar *) palloc(maxlen + VARHDRSZ);
182 421502 : SET_VARSIZE(result, maxlen + VARHDRSZ);
183 421502 : r = VARDATA(result);
184 421502 : memcpy(r, s, len);
185 :
186 : /* blank pad the string if necessary */
187 421502 : if (maxlen > len)
188 402244 : memset(r + len, ' ', maxlen - len);
189 :
190 421502 : return result;
191 : }
192 :
193 : /*
194 : * Convert a C string to CHARACTER internal representation. atttypmod
195 : * is the declared length of the type plus VARHDRSZ.
196 : */
197 : Datum
198 421694 : bpcharin(PG_FUNCTION_ARGS)
199 : {
200 421694 : char *s = PG_GETARG_CSTRING(0);
201 : #ifdef NOT_USED
202 : Oid typelem = PG_GETARG_OID(1);
203 : #endif
204 421694 : int32 atttypmod = PG_GETARG_INT32(2);
205 : BpChar *result;
206 :
207 421694 : result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context);
208 421622 : PG_RETURN_BPCHAR_P(result);
209 : }
210 :
211 :
212 : /*
213 : * Convert a CHARACTER value to a C string.
214 : *
215 : * Uses the text conversion functions, which is only appropriate if BpChar
216 : * and text are equivalent types.
217 : */
218 : Datum
219 45012 : bpcharout(PG_FUNCTION_ARGS)
220 : {
221 45012 : Datum txt = PG_GETARG_DATUM(0);
222 :
223 45012 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
224 : }
225 :
226 : /*
227 : * bpcharrecv - converts external binary format to bpchar
228 : */
229 : Datum
230 0 : bpcharrecv(PG_FUNCTION_ARGS)
231 : {
232 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
233 : #ifdef NOT_USED
234 : Oid typelem = PG_GETARG_OID(1);
235 : #endif
236 0 : int32 atttypmod = PG_GETARG_INT32(2);
237 : BpChar *result;
238 : char *str;
239 : int nbytes;
240 :
241 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
242 0 : result = bpchar_input(str, nbytes, atttypmod, NULL);
243 0 : pfree(str);
244 0 : PG_RETURN_BPCHAR_P(result);
245 : }
246 :
247 : /*
248 : * bpcharsend - converts bpchar to binary format
249 : */
250 : Datum
251 4 : bpcharsend(PG_FUNCTION_ARGS)
252 : {
253 : /* Exactly the same as textsend, so share code */
254 4 : return textsend(fcinfo);
255 : }
256 :
257 :
258 : /*
259 : * Converts a CHARACTER type to the specified size.
260 : *
261 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
262 : * isExplicit is true if this is for an explicit cast to char(N).
263 : *
264 : * Truncation rules: for an explicit cast, silently truncate to the given
265 : * length; for an implicit cast, raise error unless extra characters are
266 : * all spaces. (This is sort-of per SQL: the spec would actually have us
267 : * raise a "completion condition" for the explicit cast case, but Postgres
268 : * hasn't got such a concept.)
269 : */
270 : Datum
271 12096 : bpchar(PG_FUNCTION_ARGS)
272 : {
273 12096 : BpChar *source = PG_GETARG_BPCHAR_PP(0);
274 12096 : int32 maxlen = PG_GETARG_INT32(1);
275 12096 : bool isExplicit = PG_GETARG_BOOL(2);
276 : BpChar *result;
277 : int32 len;
278 : char *r;
279 : char *s;
280 : int i;
281 : int charlen; /* number of characters in the input string +
282 : * VARHDRSZ */
283 :
284 : /* No work if typmod is invalid */
285 12096 : if (maxlen < (int32) VARHDRSZ)
286 0 : PG_RETURN_BPCHAR_P(source);
287 :
288 12096 : maxlen -= VARHDRSZ;
289 :
290 12096 : len = VARSIZE_ANY_EXHDR(source);
291 12096 : s = VARDATA_ANY(source);
292 :
293 12096 : charlen = pg_mbstrlen_with_len(s, len);
294 :
295 : /* No work if supplied data matches typmod already */
296 12096 : if (charlen == maxlen)
297 5610 : PG_RETURN_BPCHAR_P(source);
298 :
299 6486 : if (charlen > maxlen)
300 : {
301 : /* Verify that extra characters are spaces, and clip them off */
302 : size_t maxmblen;
303 :
304 42 : maxmblen = pg_mbcharcliplen(s, len, maxlen);
305 :
306 42 : if (!isExplicit)
307 : {
308 90 : for (i = maxmblen; i < len; i++)
309 78 : if (s[i] != ' ')
310 24 : ereport(ERROR,
311 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
312 : errmsg("value too long for type character(%d)",
313 : maxlen)));
314 : }
315 :
316 18 : len = maxmblen;
317 :
318 : /*
319 : * At this point, maxlen is the necessary byte length, not the number
320 : * of CHARACTERS!
321 : */
322 18 : maxlen = len;
323 : }
324 : else
325 : {
326 : /*
327 : * At this point, maxlen is the necessary byte length, not the number
328 : * of CHARACTERS!
329 : */
330 6444 : maxlen = len + (maxlen - charlen);
331 : }
332 :
333 : Assert(maxlen >= len);
334 :
335 6462 : result = palloc(maxlen + VARHDRSZ);
336 6462 : SET_VARSIZE(result, maxlen + VARHDRSZ);
337 6462 : r = VARDATA(result);
338 :
339 6462 : memcpy(r, s, len);
340 :
341 : /* blank pad the string if necessary */
342 6462 : if (maxlen > len)
343 6444 : memset(r + len, ' ', maxlen - len);
344 :
345 6462 : PG_RETURN_BPCHAR_P(result);
346 : }
347 :
348 :
349 : /* char_bpchar()
350 : * Convert char to bpchar(1).
351 : */
352 : Datum
353 0 : char_bpchar(PG_FUNCTION_ARGS)
354 : {
355 0 : char c = PG_GETARG_CHAR(0);
356 : BpChar *result;
357 :
358 0 : result = (BpChar *) palloc(VARHDRSZ + 1);
359 :
360 0 : SET_VARSIZE(result, VARHDRSZ + 1);
361 0 : *(VARDATA(result)) = c;
362 :
363 0 : PG_RETURN_BPCHAR_P(result);
364 : }
365 :
366 :
367 : /* bpchar_name()
368 : * Converts a bpchar() type to a NameData type.
369 : */
370 : Datum
371 0 : bpchar_name(PG_FUNCTION_ARGS)
372 : {
373 0 : BpChar *s = PG_GETARG_BPCHAR_PP(0);
374 : char *s_data;
375 : Name result;
376 : int len;
377 :
378 0 : len = VARSIZE_ANY_EXHDR(s);
379 0 : s_data = VARDATA_ANY(s);
380 :
381 : /* Truncate oversize input */
382 0 : if (len >= NAMEDATALEN)
383 0 : len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
384 :
385 : /* Remove trailing blanks */
386 0 : while (len > 0)
387 : {
388 0 : if (s_data[len - 1] != ' ')
389 0 : break;
390 0 : len--;
391 : }
392 :
393 : /* We use palloc0 here to ensure result is zero-padded */
394 0 : result = (Name) palloc0(NAMEDATALEN);
395 0 : memcpy(NameStr(*result), s_data, len);
396 :
397 0 : PG_RETURN_NAME(result);
398 : }
399 :
400 : /* name_bpchar()
401 : * Converts a NameData type to a bpchar type.
402 : *
403 : * Uses the text conversion functions, which is only appropriate if BpChar
404 : * and text are equivalent types.
405 : */
406 : Datum
407 6 : name_bpchar(PG_FUNCTION_ARGS)
408 : {
409 6 : Name s = PG_GETARG_NAME(0);
410 : BpChar *result;
411 :
412 6 : result = (BpChar *) cstring_to_text(NameStr(*s));
413 6 : PG_RETURN_BPCHAR_P(result);
414 : }
415 :
416 : Datum
417 2238 : bpchartypmodin(PG_FUNCTION_ARGS)
418 : {
419 2238 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
420 :
421 2238 : PG_RETURN_INT32(anychar_typmodin(ta, "char"));
422 : }
423 :
424 : Datum
425 836 : bpchartypmodout(PG_FUNCTION_ARGS)
426 : {
427 836 : int32 typmod = PG_GETARG_INT32(0);
428 :
429 836 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
430 : }
431 :
432 :
433 : /*****************************************************************************
434 : * varchar - varchar(n)
435 : *
436 : * Note: varchar piggybacks on type text for most operations, and so has no
437 : * C-coded functions except for I/O and typmod checking.
438 : *****************************************************************************/
439 :
440 : /*
441 : * varchar_input -- common guts of varcharin and varcharrecv
442 : *
443 : * s is the input text of length len (may not be null-terminated)
444 : * atttypmod is the typmod value to apply
445 : *
446 : * Note that atttypmod is measured in characters, which
447 : * is not necessarily the same as the number of bytes.
448 : *
449 : * If the input string is too long, raise an error, unless the extra
450 : * characters are spaces, in which case they're truncated. (per SQL)
451 : *
452 : * If escontext points to an ErrorSaveContext node, that is filled instead
453 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
454 : * to detect errors.
455 : */
456 : static VarChar *
457 532250 : varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
458 : {
459 : VarChar *result;
460 : size_t maxlen;
461 :
462 532250 : maxlen = atttypmod - VARHDRSZ;
463 :
464 532250 : if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
465 : {
466 : /* Verify that extra characters are spaces, and clip them off */
467 66 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
468 : size_t j;
469 :
470 78 : for (j = mbmaxlen; j < len; j++)
471 : {
472 72 : if (s[j] != ' ')
473 60 : ereturn(escontext, NULL,
474 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
475 : errmsg("value too long for type character varying(%d)",
476 : (int) maxlen)));
477 : }
478 :
479 6 : len = mbmaxlen;
480 : }
481 :
482 : /*
483 : * We can use cstring_to_text_with_len because VarChar and text are
484 : * binary-compatible types.
485 : */
486 532190 : result = (VarChar *) cstring_to_text_with_len(s, len);
487 532190 : return result;
488 : }
489 :
490 : /*
491 : * Convert a C string to VARCHAR internal representation. atttypmod
492 : * is the declared length of the type plus VARHDRSZ.
493 : */
494 : Datum
495 532248 : varcharin(PG_FUNCTION_ARGS)
496 : {
497 532248 : char *s = PG_GETARG_CSTRING(0);
498 : #ifdef NOT_USED
499 : Oid typelem = PG_GETARG_OID(1);
500 : #endif
501 532248 : int32 atttypmod = PG_GETARG_INT32(2);
502 : VarChar *result;
503 :
504 532248 : result = varchar_input(s, strlen(s), atttypmod, fcinfo->context);
505 532236 : PG_RETURN_VARCHAR_P(result);
506 : }
507 :
508 :
509 : /*
510 : * Convert a VARCHAR value to a C string.
511 : *
512 : * Uses the text to C string conversion function, which is only appropriate
513 : * if VarChar and text are equivalent types.
514 : */
515 : Datum
516 184144 : varcharout(PG_FUNCTION_ARGS)
517 : {
518 184144 : Datum txt = PG_GETARG_DATUM(0);
519 :
520 184144 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
521 : }
522 :
523 : /*
524 : * varcharrecv - converts external binary format to varchar
525 : */
526 : Datum
527 2 : varcharrecv(PG_FUNCTION_ARGS)
528 : {
529 2 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
530 : #ifdef NOT_USED
531 : Oid typelem = PG_GETARG_OID(1);
532 : #endif
533 2 : int32 atttypmod = PG_GETARG_INT32(2);
534 : VarChar *result;
535 : char *str;
536 : int nbytes;
537 :
538 2 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
539 2 : result = varchar_input(str, nbytes, atttypmod, NULL);
540 2 : pfree(str);
541 2 : PG_RETURN_VARCHAR_P(result);
542 : }
543 :
544 : /*
545 : * varcharsend - converts varchar to binary format
546 : */
547 : Datum
548 2 : varcharsend(PG_FUNCTION_ARGS)
549 : {
550 : /* Exactly the same as textsend, so share code */
551 2 : return textsend(fcinfo);
552 : }
553 :
554 :
555 : /*
556 : * varchar_support()
557 : *
558 : * Planner support function for the varchar() length coercion function.
559 : *
560 : * Currently, the only interesting thing we can do is flatten calls that set
561 : * the new maximum length >= the previous maximum length. We can ignore the
562 : * isExplicit argument, since that only affects truncation cases.
563 : */
564 : Datum
565 2220 : varchar_support(PG_FUNCTION_ARGS)
566 : {
567 2220 : Node *rawreq = (Node *) PG_GETARG_POINTER(0);
568 2220 : Node *ret = NULL;
569 :
570 2220 : if (IsA(rawreq, SupportRequestSimplify))
571 : {
572 922 : SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
573 922 : FuncExpr *expr = req->fcall;
574 : Node *typmod;
575 :
576 : Assert(list_length(expr->args) >= 2);
577 :
578 922 : typmod = (Node *) lsecond(expr->args);
579 :
580 922 : if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
581 : {
582 922 : Node *source = (Node *) linitial(expr->args);
583 922 : int32 old_typmod = exprTypmod(source);
584 922 : int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
585 922 : int32 old_max = old_typmod - VARHDRSZ;
586 922 : int32 new_max = new_typmod - VARHDRSZ;
587 :
588 922 : if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
589 30 : ret = relabel_to_typmod(source, new_typmod);
590 : }
591 : }
592 :
593 2220 : PG_RETURN_POINTER(ret);
594 : }
595 :
596 : /*
597 : * Converts a VARCHAR type to the specified size.
598 : *
599 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
600 : * isExplicit is true if this is for an explicit cast to varchar(N).
601 : *
602 : * Truncation rules: for an explicit cast, silently truncate to the given
603 : * length; for an implicit cast, raise error unless extra characters are
604 : * all spaces. (This is sort-of per SQL: the spec would actually have us
605 : * raise a "completion condition" for the explicit cast case, but Postgres
606 : * hasn't got such a concept.)
607 : */
608 : Datum
609 23294 : varchar(PG_FUNCTION_ARGS)
610 : {
611 23294 : VarChar *source = PG_GETARG_VARCHAR_PP(0);
612 23294 : int32 typmod = PG_GETARG_INT32(1);
613 23294 : bool isExplicit = PG_GETARG_BOOL(2);
614 : int32 len,
615 : maxlen;
616 : size_t maxmblen;
617 : int i;
618 : char *s_data;
619 :
620 23294 : len = VARSIZE_ANY_EXHDR(source);
621 23294 : s_data = VARDATA_ANY(source);
622 23294 : maxlen = typmod - VARHDRSZ;
623 :
624 : /* No work if typmod is invalid or supplied data fits it already */
625 23294 : if (maxlen < 0 || len <= maxlen)
626 23168 : PG_RETURN_VARCHAR_P(source);
627 :
628 : /* only reach here if string is too long... */
629 :
630 : /* truncate multibyte string preserving multibyte boundary */
631 126 : maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
632 :
633 126 : if (!isExplicit)
634 : {
635 156 : for (i = maxmblen; i < len; i++)
636 144 : if (s_data[i] != ' ')
637 84 : ereport(ERROR,
638 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
639 : errmsg("value too long for type character varying(%d)",
640 : maxlen)));
641 : }
642 :
643 42 : PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
644 : maxmblen));
645 : }
646 :
647 : Datum
648 912 : varchartypmodin(PG_FUNCTION_ARGS)
649 : {
650 912 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
651 :
652 912 : PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
653 : }
654 :
655 : Datum
656 192 : varchartypmodout(PG_FUNCTION_ARGS)
657 : {
658 192 : int32 typmod = PG_GETARG_INT32(0);
659 :
660 192 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
661 : }
662 :
663 :
664 : /*****************************************************************************
665 : * Exported functions
666 : *****************************************************************************/
667 :
668 : /* "True" length (not counting trailing blanks) of a BpChar */
669 : static inline int
670 263294 : bcTruelen(BpChar *arg)
671 : {
672 263294 : return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
673 : }
674 :
675 : int
676 465152 : bpchartruelen(char *s, int len)
677 : {
678 : int i;
679 :
680 : /*
681 : * Note that we rely on the assumption that ' ' is a singleton unit on
682 : * every supported multibyte server encoding.
683 : */
684 12689342 : for (i = len - 1; i >= 0; i--)
685 : {
686 12555096 : if (s[i] != ' ')
687 330906 : break;
688 : }
689 465152 : return i + 1;
690 : }
691 :
692 : Datum
693 18 : bpcharlen(PG_FUNCTION_ARGS)
694 : {
695 18 : BpChar *arg = PG_GETARG_BPCHAR_PP(0);
696 : int len;
697 :
698 : /* get number of bytes, ignoring trailing spaces */
699 18 : len = bcTruelen(arg);
700 :
701 : /* in multibyte encoding, convert to number of characters */
702 18 : if (pg_database_encoding_max_length() != 1)
703 18 : len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
704 :
705 18 : PG_RETURN_INT32(len);
706 : }
707 :
708 : Datum
709 0 : bpcharoctetlen(PG_FUNCTION_ARGS)
710 : {
711 0 : Datum arg = PG_GETARG_DATUM(0);
712 :
713 : /* We need not detoast the input at all */
714 0 : PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
715 : }
716 :
717 :
718 : /*****************************************************************************
719 : * Comparison Functions used for bpchar
720 : *
721 : * Note: btree indexes need these routines not to leak memory; therefore,
722 : * be careful to free working copies of toasted datums. Most places don't
723 : * need to be so careful.
724 : *****************************************************************************/
725 :
726 : static void
727 25414 : check_collation_set(Oid collid)
728 : {
729 25414 : if (!OidIsValid(collid))
730 : {
731 : /*
732 : * This typically means that the parser could not resolve a conflict
733 : * of implicit collations, so report it that way.
734 : */
735 0 : ereport(ERROR,
736 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
737 : errmsg("could not determine which collation to use for string comparison"),
738 : errhint("Use the COLLATE clause to set the collation explicitly.")));
739 : }
740 25414 : }
741 :
742 : Datum
743 18988 : bpchareq(PG_FUNCTION_ARGS)
744 : {
745 18988 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
746 18988 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
747 : int len1,
748 : len2;
749 : bool result;
750 18988 : Oid collid = PG_GET_COLLATION();
751 : pg_locale_t mylocale;
752 :
753 18988 : check_collation_set(collid);
754 :
755 18988 : len1 = bcTruelen(arg1);
756 18988 : len2 = bcTruelen(arg2);
757 :
758 18988 : mylocale = pg_newlocale_from_collation(collid);
759 :
760 18988 : if (mylocale->deterministic)
761 : {
762 : /*
763 : * Since we only care about equality or not-equality, we can avoid all
764 : * the expense of strcoll() here, and just do bitwise comparison.
765 : */
766 18820 : if (len1 != len2)
767 2502 : result = false;
768 : else
769 16318 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
770 : }
771 : else
772 : {
773 168 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
774 : collid) == 0);
775 : }
776 :
777 18988 : PG_FREE_IF_COPY(arg1, 0);
778 18988 : PG_FREE_IF_COPY(arg2, 1);
779 :
780 18988 : PG_RETURN_BOOL(result);
781 : }
782 :
783 : Datum
784 6426 : bpcharne(PG_FUNCTION_ARGS)
785 : {
786 6426 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
787 6426 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
788 : int len1,
789 : len2;
790 : bool result;
791 6426 : Oid collid = PG_GET_COLLATION();
792 : pg_locale_t mylocale;
793 :
794 6426 : check_collation_set(collid);
795 :
796 6426 : len1 = bcTruelen(arg1);
797 6426 : len2 = bcTruelen(arg2);
798 :
799 6426 : mylocale = pg_newlocale_from_collation(collid);
800 :
801 6426 : if (mylocale->deterministic)
802 : {
803 : /*
804 : * Since we only care about equality or not-equality, we can avoid all
805 : * the expense of strcoll() here, and just do bitwise comparison.
806 : */
807 6402 : if (len1 != len2)
808 2034 : result = true;
809 : else
810 4368 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
811 : }
812 : else
813 : {
814 24 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
815 : collid) != 0);
816 : }
817 :
818 6426 : PG_FREE_IF_COPY(arg1, 0);
819 6426 : PG_FREE_IF_COPY(arg2, 1);
820 :
821 6426 : PG_RETURN_BOOL(result);
822 : }
823 :
824 : Datum
825 6022 : bpcharlt(PG_FUNCTION_ARGS)
826 : {
827 6022 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
828 6022 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
829 : int len1,
830 : len2;
831 : int cmp;
832 :
833 6022 : len1 = bcTruelen(arg1);
834 6022 : len2 = bcTruelen(arg2);
835 :
836 6022 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
837 : PG_GET_COLLATION());
838 :
839 6022 : PG_FREE_IF_COPY(arg1, 0);
840 6022 : PG_FREE_IF_COPY(arg2, 1);
841 :
842 6022 : PG_RETURN_BOOL(cmp < 0);
843 : }
844 :
845 : Datum
846 5546 : bpcharle(PG_FUNCTION_ARGS)
847 : {
848 5546 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
849 5546 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
850 : int len1,
851 : len2;
852 : int cmp;
853 :
854 5546 : len1 = bcTruelen(arg1);
855 5546 : len2 = bcTruelen(arg2);
856 :
857 5546 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
858 : PG_GET_COLLATION());
859 :
860 5546 : PG_FREE_IF_COPY(arg1, 0);
861 5546 : PG_FREE_IF_COPY(arg2, 1);
862 :
863 5546 : PG_RETURN_BOOL(cmp <= 0);
864 : }
865 :
866 : Datum
867 6252 : bpchargt(PG_FUNCTION_ARGS)
868 : {
869 6252 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
870 6252 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
871 : int len1,
872 : len2;
873 : int cmp;
874 :
875 6252 : len1 = bcTruelen(arg1);
876 6252 : len2 = bcTruelen(arg2);
877 :
878 6252 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
879 : PG_GET_COLLATION());
880 :
881 6252 : PG_FREE_IF_COPY(arg1, 0);
882 6252 : PG_FREE_IF_COPY(arg2, 1);
883 :
884 6252 : PG_RETURN_BOOL(cmp > 0);
885 : }
886 :
887 : Datum
888 5708 : bpcharge(PG_FUNCTION_ARGS)
889 : {
890 5708 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
891 5708 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
892 : int len1,
893 : len2;
894 : int cmp;
895 :
896 5708 : len1 = bcTruelen(arg1);
897 5708 : len2 = bcTruelen(arg2);
898 :
899 5708 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
900 : PG_GET_COLLATION());
901 :
902 5708 : PG_FREE_IF_COPY(arg1, 0);
903 5708 : PG_FREE_IF_COPY(arg2, 1);
904 :
905 5708 : PG_RETURN_BOOL(cmp >= 0);
906 : }
907 :
908 : Datum
909 80384 : bpcharcmp(PG_FUNCTION_ARGS)
910 : {
911 80384 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
912 80384 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
913 : int len1,
914 : len2;
915 : int cmp;
916 :
917 80384 : len1 = bcTruelen(arg1);
918 80384 : len2 = bcTruelen(arg2);
919 :
920 80384 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
921 : PG_GET_COLLATION());
922 :
923 80384 : PG_FREE_IF_COPY(arg1, 0);
924 80384 : PG_FREE_IF_COPY(arg2, 1);
925 :
926 80384 : PG_RETURN_INT32(cmp);
927 : }
928 :
929 : Datum
930 920 : bpchar_sortsupport(PG_FUNCTION_ARGS)
931 : {
932 920 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
933 920 : Oid collid = ssup->ssup_collation;
934 : MemoryContext oldcontext;
935 :
936 920 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
937 :
938 : /* Use generic string SortSupport */
939 920 : varstr_sortsupport(ssup, BPCHAROID, collid);
940 :
941 920 : MemoryContextSwitchTo(oldcontext);
942 :
943 920 : PG_RETURN_VOID();
944 : }
945 :
946 : Datum
947 0 : bpchar_larger(PG_FUNCTION_ARGS)
948 : {
949 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
950 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
951 : int len1,
952 : len2;
953 : int cmp;
954 :
955 0 : len1 = bcTruelen(arg1);
956 0 : len2 = bcTruelen(arg2);
957 :
958 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
959 : PG_GET_COLLATION());
960 :
961 0 : PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
962 : }
963 :
964 : Datum
965 0 : bpchar_smaller(PG_FUNCTION_ARGS)
966 : {
967 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
968 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
969 : int len1,
970 : len2;
971 : int cmp;
972 :
973 0 : len1 = bcTruelen(arg1);
974 0 : len2 = bcTruelen(arg2);
975 :
976 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
977 : PG_GET_COLLATION());
978 :
979 0 : PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
980 : }
981 :
982 :
983 : /*
984 : * bpchar needs a specialized hash function because we want to ignore
985 : * trailing blanks in comparisons.
986 : */
987 : Datum
988 4384 : hashbpchar(PG_FUNCTION_ARGS)
989 : {
990 4384 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
991 4384 : Oid collid = PG_GET_COLLATION();
992 : char *keydata;
993 : int keylen;
994 : pg_locale_t mylocale;
995 : Datum result;
996 :
997 4384 : if (!collid)
998 0 : ereport(ERROR,
999 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1000 : errmsg("could not determine which collation to use for string hashing"),
1001 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1002 :
1003 4384 : keydata = VARDATA_ANY(key);
1004 4384 : keylen = bcTruelen(key);
1005 :
1006 4384 : mylocale = pg_newlocale_from_collation(collid);
1007 :
1008 4384 : if (mylocale->deterministic)
1009 : {
1010 4216 : result = hash_any((unsigned char *) keydata, keylen);
1011 : }
1012 : else
1013 : {
1014 : Size bsize,
1015 : rsize;
1016 : char *buf;
1017 :
1018 168 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1019 168 : buf = palloc(bsize + 1);
1020 :
1021 168 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1022 :
1023 : /* the second call may return a smaller value than the first */
1024 168 : if (rsize > bsize)
1025 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1026 :
1027 : /*
1028 : * In principle, there's no reason to include the terminating NUL
1029 : * character in the hash, but it was done before and the behavior must
1030 : * be preserved.
1031 : */
1032 168 : result = hash_any((uint8_t *) buf, bsize + 1);
1033 :
1034 168 : pfree(buf);
1035 : }
1036 :
1037 : /* Avoid leaking memory for toasted inputs */
1038 4384 : PG_FREE_IF_COPY(key, 0);
1039 :
1040 4384 : return result;
1041 : }
1042 :
1043 : Datum
1044 84 : hashbpcharextended(PG_FUNCTION_ARGS)
1045 : {
1046 84 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1047 84 : Oid collid = PG_GET_COLLATION();
1048 : char *keydata;
1049 : int keylen;
1050 : pg_locale_t mylocale;
1051 : Datum result;
1052 :
1053 84 : if (!collid)
1054 0 : ereport(ERROR,
1055 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1056 : errmsg("could not determine which collation to use for string hashing"),
1057 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1058 :
1059 84 : keydata = VARDATA_ANY(key);
1060 84 : keylen = bcTruelen(key);
1061 :
1062 84 : mylocale = pg_newlocale_from_collation(collid);
1063 :
1064 84 : if (mylocale->deterministic)
1065 : {
1066 72 : result = hash_any_extended((unsigned char *) keydata, keylen,
1067 72 : PG_GETARG_INT64(1));
1068 : }
1069 : else
1070 : {
1071 : Size bsize,
1072 : rsize;
1073 : char *buf;
1074 :
1075 12 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1076 12 : buf = palloc(bsize + 1);
1077 :
1078 12 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1079 :
1080 : /* the second call may return a smaller value than the first */
1081 12 : if (rsize > bsize)
1082 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1083 :
1084 : /*
1085 : * In principle, there's no reason to include the terminating NUL
1086 : * character in the hash, but it was done before and the behavior must
1087 : * be preserved.
1088 : */
1089 12 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
1090 12 : PG_GETARG_INT64(1));
1091 :
1092 12 : pfree(buf);
1093 : }
1094 :
1095 84 : PG_FREE_IF_COPY(key, 0);
1096 :
1097 84 : return result;
1098 : }
1099 :
1100 : /*
1101 : * The following operators support character-by-character comparison
1102 : * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1103 : * Note that the regular bpchareq/bpcharne comparison operators, and
1104 : * regular support functions 1 and 2 with "C" collation are assumed to be
1105 : * compatible with these!
1106 : */
1107 :
1108 : static int
1109 78 : internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1110 : {
1111 : int result;
1112 : int len1,
1113 : len2;
1114 :
1115 78 : len1 = bcTruelen(arg1);
1116 78 : len2 = bcTruelen(arg2);
1117 :
1118 78 : result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1119 78 : if (result != 0)
1120 48 : return result;
1121 30 : else if (len1 < len2)
1122 0 : return -1;
1123 30 : else if (len1 > len2)
1124 0 : return 1;
1125 : else
1126 30 : return 0;
1127 : }
1128 :
1129 :
1130 : Datum
1131 0 : bpchar_pattern_lt(PG_FUNCTION_ARGS)
1132 : {
1133 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1134 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1135 : int result;
1136 :
1137 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1138 :
1139 0 : PG_FREE_IF_COPY(arg1, 0);
1140 0 : PG_FREE_IF_COPY(arg2, 1);
1141 :
1142 0 : PG_RETURN_BOOL(result < 0);
1143 : }
1144 :
1145 :
1146 : Datum
1147 0 : bpchar_pattern_le(PG_FUNCTION_ARGS)
1148 : {
1149 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1150 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1151 : int result;
1152 :
1153 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1154 :
1155 0 : PG_FREE_IF_COPY(arg1, 0);
1156 0 : PG_FREE_IF_COPY(arg2, 1);
1157 :
1158 0 : PG_RETURN_BOOL(result <= 0);
1159 : }
1160 :
1161 :
1162 : Datum
1163 0 : bpchar_pattern_ge(PG_FUNCTION_ARGS)
1164 : {
1165 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1166 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1167 : int result;
1168 :
1169 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1170 :
1171 0 : PG_FREE_IF_COPY(arg1, 0);
1172 0 : PG_FREE_IF_COPY(arg2, 1);
1173 :
1174 0 : PG_RETURN_BOOL(result >= 0);
1175 : }
1176 :
1177 :
1178 : Datum
1179 0 : bpchar_pattern_gt(PG_FUNCTION_ARGS)
1180 : {
1181 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1182 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1183 : int result;
1184 :
1185 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1186 :
1187 0 : PG_FREE_IF_COPY(arg1, 0);
1188 0 : PG_FREE_IF_COPY(arg2, 1);
1189 :
1190 0 : PG_RETURN_BOOL(result > 0);
1191 : }
1192 :
1193 :
1194 : Datum
1195 78 : btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1196 : {
1197 78 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1198 78 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1199 : int result;
1200 :
1201 78 : result = internal_bpchar_pattern_compare(arg1, arg2);
1202 :
1203 78 : PG_FREE_IF_COPY(arg1, 0);
1204 78 : PG_FREE_IF_COPY(arg2, 1);
1205 :
1206 78 : PG_RETURN_INT32(result);
1207 : }
1208 :
1209 :
1210 : Datum
1211 12 : btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1212 : {
1213 12 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1214 : MemoryContext oldcontext;
1215 :
1216 12 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1217 :
1218 : /* Use generic string SortSupport, forcing "C" collation */
1219 12 : varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1220 :
1221 12 : MemoryContextSwitchTo(oldcontext);
1222 :
1223 12 : PG_RETURN_VOID();
1224 : }
|