Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * varchar.c
4 : * Functions for the built-in types char(n) and varchar(n).
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/varchar.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/detoast.h"
18 : #include "access/htup_details.h"
19 : #include "catalog/pg_collation.h"
20 : #include "catalog/pg_type.h"
21 : #include "common/hashfn.h"
22 : #include "libpq/pqformat.h"
23 : #include "mb/pg_wchar.h"
24 : #include "nodes/nodeFuncs.h"
25 : #include "nodes/supportnodes.h"
26 : #include "utils/array.h"
27 : #include "utils/builtins.h"
28 : #include "utils/pg_locale.h"
29 : #include "utils/varlena.h"
30 :
31 : /* common code for bpchartypmodin and varchartypmodin */
32 : static int32
33 3620 : anychar_typmodin(ArrayType *ta, const char *typename)
34 : {
35 : int32 typmod;
36 : int32 *tl;
37 : int n;
38 :
39 3620 : tl = ArrayGetIntegerTypmods(ta, &n);
40 :
41 : /*
42 : * we're not too tense about good error message here because grammar
43 : * shouldn't allow wrong number of modifiers for CHAR
44 : */
45 3620 : if (n != 1)
46 0 : ereport(ERROR,
47 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
48 : errmsg("invalid type modifier")));
49 :
50 3620 : if (*tl < 1)
51 0 : ereport(ERROR,
52 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53 : errmsg("length for type %s must be at least 1", typename)));
54 3620 : if (*tl > MaxAttrSize)
55 0 : ereport(ERROR,
56 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
57 : errmsg("length for type %s cannot exceed %d",
58 : typename, MaxAttrSize)));
59 :
60 : /*
61 : * For largely historical reasons, the typmod is VARHDRSZ plus the number
62 : * of characters; there is enough client-side code that knows about that
63 : * that we'd better not change it.
64 : */
65 3620 : typmod = VARHDRSZ + *tl;
66 :
67 3620 : return typmod;
68 : }
69 :
70 : /* common code for bpchartypmodout and varchartypmodout */
71 : static char *
72 1006 : anychar_typmodout(int32 typmod)
73 : {
74 1006 : char *res = (char *) palloc(64);
75 :
76 1006 : if (typmod > VARHDRSZ)
77 1006 : snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
78 : else
79 0 : *res = '\0';
80 :
81 1006 : return res;
82 : }
83 :
84 :
85 : /*
86 : * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
87 : * is for blank-padded string whose length is specified in CREATE TABLE.
88 : * VARCHAR is for storing string whose length is at most the length specified
89 : * at CREATE TABLE time.
90 : *
91 : * It's hard to implement these types because we cannot figure out
92 : * the length of the type from the type itself. I changed (hopefully all) the
93 : * fmgr calls that invoke input functions of a data type to supply the
94 : * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
95 : * the length of the attributes and hence the exact length of the char() or
96 : * varchar(). We pass this to bpcharin() or varcharin().) In the case where
97 : * we cannot determine the length, we pass in -1 instead and the input
98 : * converter does not enforce any length check.
99 : *
100 : * We actually implement this as a varlena so that we don't have to pass in
101 : * the length for the comparison functions. (The difference between these
102 : * types and "text" is that we truncate and possibly blank-pad the string
103 : * at insertion time.)
104 : *
105 : * - ay 6/95
106 : */
107 :
108 :
109 : /*****************************************************************************
110 : * bpchar - char() *
111 : *****************************************************************************/
112 :
113 : /*
114 : * bpchar_input -- common guts of bpcharin and bpcharrecv
115 : *
116 : * s is the input text of length len (may not be null-terminated)
117 : * atttypmod is the typmod value to apply
118 : *
119 : * Note that atttypmod is measured in characters, which
120 : * is not necessarily the same as the number of bytes.
121 : *
122 : * If the input string is too long, raise an error, unless the extra
123 : * characters are spaces, in which case they're truncated. (per SQL)
124 : *
125 : * If escontext points to an ErrorSaveContext node, that is filled instead
126 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
127 : * to detect errors.
128 : */
129 : static BpChar *
130 421440 : bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
131 : {
132 : BpChar *result;
133 : char *r;
134 : size_t maxlen;
135 :
136 : /* If typmod is -1 (or invalid), use the actual string length */
137 421440 : if (atttypmod < (int32) VARHDRSZ)
138 8896 : maxlen = len;
139 : else
140 : {
141 : size_t charlen; /* number of CHARACTERS in the input */
142 :
143 412544 : maxlen = atttypmod - VARHDRSZ;
144 412544 : charlen = pg_mbstrlen_with_len(s, len);
145 412544 : if (charlen > maxlen)
146 : {
147 : /* Verify that extra characters are spaces, and clip them off */
148 54 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
149 : size_t j;
150 :
151 : /*
152 : * at this point, len is the actual BYTE length of the input
153 : * string, maxlen is the max number of CHARACTERS allowed for this
154 : * bpchar type, mbmaxlen is the length in BYTES of those chars.
155 : */
156 66 : for (j = mbmaxlen; j < len; j++)
157 : {
158 60 : if (s[j] != ' ')
159 48 : ereturn(escontext, NULL,
160 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
161 : errmsg("value too long for type character(%d)",
162 : (int) maxlen)));
163 : }
164 :
165 : /*
166 : * Now we set maxlen to the necessary byte length, not the number
167 : * of CHARACTERS!
168 : */
169 6 : maxlen = len = mbmaxlen;
170 : }
171 : else
172 : {
173 : /*
174 : * Now we set maxlen to the necessary byte length, not the number
175 : * of CHARACTERS!
176 : */
177 412490 : maxlen = len + (maxlen - charlen);
178 : }
179 : }
180 :
181 421392 : result = (BpChar *) palloc(maxlen + VARHDRSZ);
182 421392 : SET_VARSIZE(result, maxlen + VARHDRSZ);
183 421392 : r = VARDATA(result);
184 421392 : memcpy(r, s, len);
185 :
186 : /* blank pad the string if necessary */
187 421392 : if (maxlen > len)
188 402172 : memset(r + len, ' ', maxlen - len);
189 :
190 421392 : return result;
191 : }
192 :
193 : /*
194 : * Convert a C string to CHARACTER internal representation. atttypmod
195 : * is the declared length of the type plus VARHDRSZ.
196 : */
197 : Datum
198 421440 : bpcharin(PG_FUNCTION_ARGS)
199 : {
200 421440 : char *s = PG_GETARG_CSTRING(0);
201 : #ifdef NOT_USED
202 : Oid typelem = PG_GETARG_OID(1);
203 : #endif
204 421440 : int32 atttypmod = PG_GETARG_INT32(2);
205 : BpChar *result;
206 :
207 421440 : result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context);
208 421410 : PG_RETURN_BPCHAR_P(result);
209 : }
210 :
211 :
212 : /*
213 : * Convert a CHARACTER value to a C string.
214 : *
215 : * Uses the text conversion functions, which is only appropriate if BpChar
216 : * and text are equivalent types.
217 : */
218 : Datum
219 44672 : bpcharout(PG_FUNCTION_ARGS)
220 : {
221 44672 : Datum txt = PG_GETARG_DATUM(0);
222 :
223 44672 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
224 : }
225 :
226 : /*
227 : * bpcharrecv - converts external binary format to bpchar
228 : */
229 : Datum
230 0 : bpcharrecv(PG_FUNCTION_ARGS)
231 : {
232 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
233 : #ifdef NOT_USED
234 : Oid typelem = PG_GETARG_OID(1);
235 : #endif
236 0 : int32 atttypmod = PG_GETARG_INT32(2);
237 : BpChar *result;
238 : char *str;
239 : int nbytes;
240 :
241 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
242 0 : result = bpchar_input(str, nbytes, atttypmod, NULL);
243 0 : pfree(str);
244 0 : PG_RETURN_BPCHAR_P(result);
245 : }
246 :
247 : /*
248 : * bpcharsend - converts bpchar to binary format
249 : */
250 : Datum
251 4 : bpcharsend(PG_FUNCTION_ARGS)
252 : {
253 : /* Exactly the same as textsend, so share code */
254 4 : return textsend(fcinfo);
255 : }
256 :
257 :
258 : /*
259 : * Converts a CHARACTER type to the specified size.
260 : *
261 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
262 : * isExplicit is true if this is for an explicit cast to char(N).
263 : *
264 : * Truncation rules: for an explicit cast, silently truncate to the given
265 : * length; for an implicit cast, raise error unless extra characters are
266 : * all spaces. (This is sort-of per SQL: the spec would actually have us
267 : * raise a "completion condition" for the explicit cast case, but Postgres
268 : * hasn't got such a concept.)
269 : */
270 : Datum
271 12298 : bpchar(PG_FUNCTION_ARGS)
272 : {
273 12298 : BpChar *source = PG_GETARG_BPCHAR_PP(0);
274 12298 : int32 maxlen = PG_GETARG_INT32(1);
275 12298 : bool isExplicit = PG_GETARG_BOOL(2);
276 : BpChar *result;
277 : int32 len;
278 : char *r;
279 : char *s;
280 : int i;
281 : int charlen; /* number of characters in the input string +
282 : * VARHDRSZ */
283 :
284 : /* No work if typmod is invalid */
285 12298 : if (maxlen < (int32) VARHDRSZ)
286 0 : PG_RETURN_BPCHAR_P(source);
287 :
288 12298 : maxlen -= VARHDRSZ;
289 :
290 12298 : len = VARSIZE_ANY_EXHDR(source);
291 12298 : s = VARDATA_ANY(source);
292 :
293 12298 : charlen = pg_mbstrlen_with_len(s, len);
294 :
295 : /* No work if supplied data matches typmod already */
296 12298 : if (charlen == maxlen)
297 5622 : PG_RETURN_BPCHAR_P(source);
298 :
299 6676 : if (charlen > maxlen)
300 : {
301 : /* Verify that extra characters are spaces, and clip them off */
302 : size_t maxmblen;
303 :
304 138 : maxmblen = pg_mbcharcliplen(s, len, maxlen);
305 :
306 138 : if (!isExplicit)
307 : {
308 84 : for (i = maxmblen; i < len; i++)
309 72 : if (s[i] != ' ')
310 18 : ereport(ERROR,
311 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
312 : errmsg("value too long for type character(%d)",
313 : maxlen)));
314 : }
315 :
316 120 : len = maxmblen;
317 :
318 : /*
319 : * At this point, maxlen is the necessary byte length, not the number
320 : * of CHARACTERS!
321 : */
322 120 : maxlen = len;
323 : }
324 : else
325 : {
326 : /*
327 : * At this point, maxlen is the necessary byte length, not the number
328 : * of CHARACTERS!
329 : */
330 6538 : maxlen = len + (maxlen - charlen);
331 : }
332 :
333 : Assert(maxlen >= len);
334 :
335 6658 : result = palloc(maxlen + VARHDRSZ);
336 6658 : SET_VARSIZE(result, maxlen + VARHDRSZ);
337 6658 : r = VARDATA(result);
338 :
339 6658 : memcpy(r, s, len);
340 :
341 : /* blank pad the string if necessary */
342 6658 : if (maxlen > len)
343 6538 : memset(r + len, ' ', maxlen - len);
344 :
345 6658 : PG_RETURN_BPCHAR_P(result);
346 : }
347 :
348 :
349 : /* char_bpchar()
350 : * Convert char to bpchar(1).
351 : */
352 : Datum
353 0 : char_bpchar(PG_FUNCTION_ARGS)
354 : {
355 0 : char c = PG_GETARG_CHAR(0);
356 : BpChar *result;
357 :
358 0 : result = (BpChar *) palloc(VARHDRSZ + 1);
359 :
360 0 : SET_VARSIZE(result, VARHDRSZ + 1);
361 0 : *(VARDATA(result)) = c;
362 :
363 0 : PG_RETURN_BPCHAR_P(result);
364 : }
365 :
366 :
367 : /* bpchar_name()
368 : * Converts a bpchar() type to a NameData type.
369 : */
370 : Datum
371 0 : bpchar_name(PG_FUNCTION_ARGS)
372 : {
373 0 : BpChar *s = PG_GETARG_BPCHAR_PP(0);
374 : char *s_data;
375 : Name result;
376 : int len;
377 :
378 0 : len = VARSIZE_ANY_EXHDR(s);
379 0 : s_data = VARDATA_ANY(s);
380 :
381 : /* Truncate oversize input */
382 0 : if (len >= NAMEDATALEN)
383 0 : len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
384 :
385 : /* Remove trailing blanks */
386 0 : while (len > 0)
387 : {
388 0 : if (s_data[len - 1] != ' ')
389 0 : break;
390 0 : len--;
391 : }
392 :
393 : /* We use palloc0 here to ensure result is zero-padded */
394 0 : result = (Name) palloc0(NAMEDATALEN);
395 0 : memcpy(NameStr(*result), s_data, len);
396 :
397 0 : PG_RETURN_NAME(result);
398 : }
399 :
400 : /* name_bpchar()
401 : * Converts a NameData type to a bpchar type.
402 : *
403 : * Uses the text conversion functions, which is only appropriate if BpChar
404 : * and text are equivalent types.
405 : */
406 : Datum
407 6 : name_bpchar(PG_FUNCTION_ARGS)
408 : {
409 6 : Name s = PG_GETARG_NAME(0);
410 : BpChar *result;
411 :
412 6 : result = (BpChar *) cstring_to_text(NameStr(*s));
413 6 : PG_RETURN_BPCHAR_P(result);
414 : }
415 :
416 : Datum
417 2176 : bpchartypmodin(PG_FUNCTION_ARGS)
418 : {
419 2176 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
420 :
421 2176 : PG_RETURN_INT32(anychar_typmodin(ta, "char"));
422 : }
423 :
424 : Datum
425 814 : bpchartypmodout(PG_FUNCTION_ARGS)
426 : {
427 814 : int32 typmod = PG_GETARG_INT32(0);
428 :
429 814 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
430 : }
431 :
432 :
433 : /*****************************************************************************
434 : * varchar - varchar(n)
435 : *
436 : * Note: varchar piggybacks on type text for most operations, and so has no
437 : * C-coded functions except for I/O and typmod checking.
438 : *****************************************************************************/
439 :
440 : /*
441 : * varchar_input -- common guts of varcharin and varcharrecv
442 : *
443 : * s is the input text of length len (may not be null-terminated)
444 : * atttypmod is the typmod value to apply
445 : *
446 : * Note that atttypmod is measured in characters, which
447 : * is not necessarily the same as the number of bytes.
448 : *
449 : * If the input string is too long, raise an error, unless the extra
450 : * characters are spaces, in which case they're truncated. (per SQL)
451 : *
452 : * If escontext points to an ErrorSaveContext node, that is filled instead
453 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
454 : * to detect errors.
455 : */
456 : static VarChar *
457 487866 : varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
458 : {
459 : VarChar *result;
460 : size_t maxlen;
461 :
462 487866 : maxlen = atttypmod - VARHDRSZ;
463 :
464 487866 : if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
465 : {
466 : /* Verify that extra characters are spaces, and clip them off */
467 30 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
468 : size_t j;
469 :
470 42 : for (j = mbmaxlen; j < len; j++)
471 : {
472 36 : if (s[j] != ' ')
473 24 : ereturn(escontext, NULL,
474 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
475 : errmsg("value too long for type character varying(%d)",
476 : (int) maxlen)));
477 : }
478 :
479 6 : len = mbmaxlen;
480 : }
481 :
482 : /*
483 : * We can use cstring_to_text_with_len because VarChar and text are
484 : * binary-compatible types.
485 : */
486 487842 : result = (VarChar *) cstring_to_text_with_len(s, len);
487 487842 : return result;
488 : }
489 :
490 : /*
491 : * Convert a C string to VARCHAR internal representation. atttypmod
492 : * is the declared length of the type plus VARHDRSZ.
493 : */
494 : Datum
495 487864 : varcharin(PG_FUNCTION_ARGS)
496 : {
497 487864 : char *s = PG_GETARG_CSTRING(0);
498 : #ifdef NOT_USED
499 : Oid typelem = PG_GETARG_OID(1);
500 : #endif
501 487864 : int32 atttypmod = PG_GETARG_INT32(2);
502 : VarChar *result;
503 :
504 487864 : result = varchar_input(s, strlen(s), atttypmod, fcinfo->context);
505 487852 : PG_RETURN_VARCHAR_P(result);
506 : }
507 :
508 :
509 : /*
510 : * Convert a VARCHAR value to a C string.
511 : *
512 : * Uses the text to C string conversion function, which is only appropriate
513 : * if VarChar and text are equivalent types.
514 : */
515 : Datum
516 182342 : varcharout(PG_FUNCTION_ARGS)
517 : {
518 182342 : Datum txt = PG_GETARG_DATUM(0);
519 :
520 182342 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
521 : }
522 :
523 : /*
524 : * varcharrecv - converts external binary format to varchar
525 : */
526 : Datum
527 2 : varcharrecv(PG_FUNCTION_ARGS)
528 : {
529 2 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
530 : #ifdef NOT_USED
531 : Oid typelem = PG_GETARG_OID(1);
532 : #endif
533 2 : int32 atttypmod = PG_GETARG_INT32(2);
534 : VarChar *result;
535 : char *str;
536 : int nbytes;
537 :
538 2 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
539 2 : result = varchar_input(str, nbytes, atttypmod, NULL);
540 2 : pfree(str);
541 2 : PG_RETURN_VARCHAR_P(result);
542 : }
543 :
544 : /*
545 : * varcharsend - converts varchar to binary format
546 : */
547 : Datum
548 2 : varcharsend(PG_FUNCTION_ARGS)
549 : {
550 : /* Exactly the same as textsend, so share code */
551 2 : return textsend(fcinfo);
552 : }
553 :
554 :
555 : /*
556 : * varchar_support()
557 : *
558 : * Planner support function for the varchar() length coercion function.
559 : *
560 : * Currently, the only interesting thing we can do is flatten calls that set
561 : * the new maximum length >= the previous maximum length. We can ignore the
562 : * isExplicit argument, since that only affects truncation cases.
563 : */
564 : Datum
565 2314 : varchar_support(PG_FUNCTION_ARGS)
566 : {
567 2314 : Node *rawreq = (Node *) PG_GETARG_POINTER(0);
568 2314 : Node *ret = NULL;
569 :
570 2314 : if (IsA(rawreq, SupportRequestSimplify))
571 : {
572 1034 : SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
573 1034 : FuncExpr *expr = req->fcall;
574 : Node *typmod;
575 :
576 : Assert(list_length(expr->args) >= 2);
577 :
578 1034 : typmod = (Node *) lsecond(expr->args);
579 :
580 1034 : if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
581 : {
582 1034 : Node *source = (Node *) linitial(expr->args);
583 1034 : int32 old_typmod = exprTypmod(source);
584 1034 : int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
585 1034 : int32 old_max = old_typmod - VARHDRSZ;
586 1034 : int32 new_max = new_typmod - VARHDRSZ;
587 :
588 1034 : if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
589 30 : ret = relabel_to_typmod(source, new_typmod);
590 : }
591 : }
592 :
593 2314 : PG_RETURN_POINTER(ret);
594 : }
595 :
596 : /*
597 : * Converts a VARCHAR type to the specified size.
598 : *
599 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
600 : * isExplicit is true if this is for an explicit cast to varchar(N).
601 : *
602 : * Truncation rules: for an explicit cast, silently truncate to the given
603 : * length; for an implicit cast, raise error unless extra characters are
604 : * all spaces. (This is sort-of per SQL: the spec would actually have us
605 : * raise a "completion condition" for the explicit cast case, but Postgres
606 : * hasn't got such a concept.)
607 : */
608 : Datum
609 23620 : varchar(PG_FUNCTION_ARGS)
610 : {
611 23620 : VarChar *source = PG_GETARG_VARCHAR_PP(0);
612 23620 : int32 typmod = PG_GETARG_INT32(1);
613 23620 : bool isExplicit = PG_GETARG_BOOL(2);
614 : int32 len,
615 : maxlen;
616 : size_t maxmblen;
617 : int i;
618 : char *s_data;
619 :
620 23620 : len = VARSIZE_ANY_EXHDR(source);
621 23620 : s_data = VARDATA_ANY(source);
622 23620 : maxlen = typmod - VARHDRSZ;
623 :
624 : /* No work if typmod is invalid or supplied data fits it already */
625 23620 : if (maxlen < 0 || len <= maxlen)
626 23488 : PG_RETURN_VARCHAR_P(source);
627 :
628 : /* only reach here if string is too long... */
629 :
630 : /* truncate multibyte string preserving multibyte boundary */
631 132 : maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
632 :
633 132 : if (!isExplicit)
634 : {
635 126 : for (i = maxmblen; i < len; i++)
636 114 : if (s_data[i] != ' ')
637 54 : ereport(ERROR,
638 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
639 : errmsg("value too long for type character varying(%d)",
640 : maxlen)));
641 : }
642 :
643 78 : PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
644 : maxmblen));
645 : }
646 :
647 : Datum
648 1444 : varchartypmodin(PG_FUNCTION_ARGS)
649 : {
650 1444 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
651 :
652 1444 : PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
653 : }
654 :
655 : Datum
656 192 : varchartypmodout(PG_FUNCTION_ARGS)
657 : {
658 192 : int32 typmod = PG_GETARG_INT32(0);
659 :
660 192 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
661 : }
662 :
663 :
664 : /*****************************************************************************
665 : * Exported functions
666 : *****************************************************************************/
667 :
668 : /* "True" length (not counting trailing blanks) of a BpChar */
669 : static inline int
670 263950 : bcTruelen(BpChar *arg)
671 : {
672 263950 : return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
673 : }
674 :
675 : int
676 454964 : bpchartruelen(char *s, int len)
677 : {
678 : int i;
679 :
680 : /*
681 : * Note that we rely on the assumption that ' ' is a singleton unit on
682 : * every supported multibyte server encoding.
683 : */
684 12610092 : for (i = len - 1; i >= 0; i--)
685 : {
686 12476022 : if (s[i] != ' ')
687 320894 : break;
688 : }
689 454964 : return i + 1;
690 : }
691 :
692 : Datum
693 18 : bpcharlen(PG_FUNCTION_ARGS)
694 : {
695 18 : BpChar *arg = PG_GETARG_BPCHAR_PP(0);
696 : int len;
697 :
698 : /* get number of bytes, ignoring trailing spaces */
699 18 : len = bcTruelen(arg);
700 :
701 : /* in multibyte encoding, convert to number of characters */
702 18 : if (pg_database_encoding_max_length() != 1)
703 18 : len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
704 :
705 18 : PG_RETURN_INT32(len);
706 : }
707 :
708 : Datum
709 0 : bpcharoctetlen(PG_FUNCTION_ARGS)
710 : {
711 0 : Datum arg = PG_GETARG_DATUM(0);
712 :
713 : /* We need not detoast the input at all */
714 0 : PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
715 : }
716 :
717 :
718 : /*****************************************************************************
719 : * Comparison Functions used for bpchar
720 : *
721 : * Note: btree indexes need these routines not to leak memory; therefore,
722 : * be careful to free working copies of toasted datums. Most places don't
723 : * need to be so careful.
724 : *****************************************************************************/
725 :
726 : static void
727 25530 : check_collation_set(Oid collid)
728 : {
729 25530 : if (!OidIsValid(collid))
730 : {
731 : /*
732 : * This typically means that the parser could not resolve a conflict
733 : * of implicit collations, so report it that way.
734 : */
735 0 : ereport(ERROR,
736 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
737 : errmsg("could not determine which collation to use for string comparison"),
738 : errhint("Use the COLLATE clause to set the collation explicitly.")));
739 : }
740 25530 : }
741 :
742 : Datum
743 19122 : bpchareq(PG_FUNCTION_ARGS)
744 : {
745 19122 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
746 19122 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
747 : int len1,
748 : len2;
749 : bool result;
750 19122 : Oid collid = PG_GET_COLLATION();
751 19122 : bool locale_is_c = false;
752 19122 : pg_locale_t mylocale = 0;
753 :
754 19122 : check_collation_set(collid);
755 :
756 19122 : len1 = bcTruelen(arg1);
757 19122 : len2 = bcTruelen(arg2);
758 :
759 19122 : if (lc_collate_is_c(collid))
760 5652 : locale_is_c = true;
761 : else
762 13470 : mylocale = pg_newlocale_from_collation(collid);
763 :
764 19122 : if (locale_is_c || pg_locale_deterministic(mylocale))
765 : {
766 : /*
767 : * Since we only care about equality or not-equality, we can avoid all
768 : * the expense of strcoll() here, and just do bitwise comparison.
769 : */
770 18954 : if (len1 != len2)
771 2502 : result = false;
772 : else
773 16452 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
774 : }
775 : else
776 : {
777 168 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
778 : collid) == 0);
779 : }
780 :
781 19122 : PG_FREE_IF_COPY(arg1, 0);
782 19122 : PG_FREE_IF_COPY(arg2, 1);
783 :
784 19122 : PG_RETURN_BOOL(result);
785 : }
786 :
787 : Datum
788 6408 : bpcharne(PG_FUNCTION_ARGS)
789 : {
790 6408 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
791 6408 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
792 : int len1,
793 : len2;
794 : bool result;
795 6408 : Oid collid = PG_GET_COLLATION();
796 6408 : bool locale_is_c = false;
797 6408 : pg_locale_t mylocale = 0;
798 :
799 6408 : check_collation_set(collid);
800 :
801 6408 : len1 = bcTruelen(arg1);
802 6408 : len2 = bcTruelen(arg2);
803 :
804 6408 : if (lc_collate_is_c(collid))
805 2128 : locale_is_c = true;
806 : else
807 4280 : mylocale = pg_newlocale_from_collation(collid);
808 :
809 6408 : if (locale_is_c || pg_locale_deterministic(mylocale))
810 : {
811 : /*
812 : * Since we only care about equality or not-equality, we can avoid all
813 : * the expense of strcoll() here, and just do bitwise comparison.
814 : */
815 6384 : if (len1 != len2)
816 2022 : result = true;
817 : else
818 4362 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
819 : }
820 : else
821 : {
822 24 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
823 : collid) != 0);
824 : }
825 :
826 6408 : PG_FREE_IF_COPY(arg1, 0);
827 6408 : PG_FREE_IF_COPY(arg2, 1);
828 :
829 6408 : PG_RETURN_BOOL(result);
830 : }
831 :
832 : Datum
833 6024 : bpcharlt(PG_FUNCTION_ARGS)
834 : {
835 6024 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
836 6024 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
837 : int len1,
838 : len2;
839 : int cmp;
840 :
841 6024 : len1 = bcTruelen(arg1);
842 6024 : len2 = bcTruelen(arg2);
843 :
844 6024 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
845 : PG_GET_COLLATION());
846 :
847 6024 : PG_FREE_IF_COPY(arg1, 0);
848 6024 : PG_FREE_IF_COPY(arg2, 1);
849 :
850 6024 : PG_RETURN_BOOL(cmp < 0);
851 : }
852 :
853 : Datum
854 5548 : bpcharle(PG_FUNCTION_ARGS)
855 : {
856 5548 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
857 5548 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
858 : int len1,
859 : len2;
860 : int cmp;
861 :
862 5548 : len1 = bcTruelen(arg1);
863 5548 : len2 = bcTruelen(arg2);
864 :
865 5548 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
866 : PG_GET_COLLATION());
867 :
868 5548 : PG_FREE_IF_COPY(arg1, 0);
869 5548 : PG_FREE_IF_COPY(arg2, 1);
870 :
871 5548 : PG_RETURN_BOOL(cmp <= 0);
872 : }
873 :
874 : Datum
875 6256 : bpchargt(PG_FUNCTION_ARGS)
876 : {
877 6256 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
878 6256 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
879 : int len1,
880 : len2;
881 : int cmp;
882 :
883 6256 : len1 = bcTruelen(arg1);
884 6256 : len2 = bcTruelen(arg2);
885 :
886 6256 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
887 : PG_GET_COLLATION());
888 :
889 6256 : PG_FREE_IF_COPY(arg1, 0);
890 6256 : PG_FREE_IF_COPY(arg2, 1);
891 :
892 6256 : PG_RETURN_BOOL(cmp > 0);
893 : }
894 :
895 : Datum
896 5704 : bpcharge(PG_FUNCTION_ARGS)
897 : {
898 5704 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
899 5704 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
900 : int len1,
901 : len2;
902 : int cmp;
903 :
904 5704 : len1 = bcTruelen(arg1);
905 5704 : len2 = bcTruelen(arg2);
906 :
907 5704 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
908 : PG_GET_COLLATION());
909 :
910 5704 : PG_FREE_IF_COPY(arg1, 0);
911 5704 : PG_FREE_IF_COPY(arg2, 1);
912 :
913 5704 : PG_RETURN_BOOL(cmp >= 0);
914 : }
915 :
916 : Datum
917 80594 : bpcharcmp(PG_FUNCTION_ARGS)
918 : {
919 80594 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
920 80594 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
921 : int len1,
922 : len2;
923 : int cmp;
924 :
925 80594 : len1 = bcTruelen(arg1);
926 80594 : len2 = bcTruelen(arg2);
927 :
928 80594 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
929 : PG_GET_COLLATION());
930 :
931 80594 : PG_FREE_IF_COPY(arg1, 0);
932 80594 : PG_FREE_IF_COPY(arg2, 1);
933 :
934 80594 : PG_RETURN_INT32(cmp);
935 : }
936 :
937 : Datum
938 814 : bpchar_sortsupport(PG_FUNCTION_ARGS)
939 : {
940 814 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
941 814 : Oid collid = ssup->ssup_collation;
942 : MemoryContext oldcontext;
943 :
944 814 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
945 :
946 : /* Use generic string SortSupport */
947 814 : varstr_sortsupport(ssup, BPCHAROID, collid);
948 :
949 814 : MemoryContextSwitchTo(oldcontext);
950 :
951 814 : PG_RETURN_VOID();
952 : }
953 :
954 : Datum
955 0 : bpchar_larger(PG_FUNCTION_ARGS)
956 : {
957 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
958 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
959 : int len1,
960 : len2;
961 : int cmp;
962 :
963 0 : len1 = bcTruelen(arg1);
964 0 : len2 = bcTruelen(arg2);
965 :
966 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
967 : PG_GET_COLLATION());
968 :
969 0 : PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
970 : }
971 :
972 : Datum
973 0 : bpchar_smaller(PG_FUNCTION_ARGS)
974 : {
975 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
976 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
977 : int len1,
978 : len2;
979 : int cmp;
980 :
981 0 : len1 = bcTruelen(arg1);
982 0 : len2 = bcTruelen(arg2);
983 :
984 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
985 : PG_GET_COLLATION());
986 :
987 0 : PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
988 : }
989 :
990 :
991 : /*
992 : * bpchar needs a specialized hash function because we want to ignore
993 : * trailing blanks in comparisons.
994 : */
995 : Datum
996 4380 : hashbpchar(PG_FUNCTION_ARGS)
997 : {
998 4380 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
999 4380 : Oid collid = PG_GET_COLLATION();
1000 : char *keydata;
1001 : int keylen;
1002 4380 : pg_locale_t mylocale = 0;
1003 : Datum result;
1004 :
1005 4380 : if (!collid)
1006 0 : ereport(ERROR,
1007 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1008 : errmsg("could not determine which collation to use for string hashing"),
1009 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1010 :
1011 4380 : keydata = VARDATA_ANY(key);
1012 4380 : keylen = bcTruelen(key);
1013 :
1014 4380 : if (!lc_collate_is_c(collid))
1015 2976 : mylocale = pg_newlocale_from_collation(collid);
1016 :
1017 4380 : if (pg_locale_deterministic(mylocale))
1018 : {
1019 4212 : result = hash_any((unsigned char *) keydata, keylen);
1020 : }
1021 : else
1022 : {
1023 : Size bsize,
1024 : rsize;
1025 : char *buf;
1026 :
1027 168 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1028 168 : buf = palloc(bsize + 1);
1029 :
1030 168 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1031 168 : if (rsize != bsize)
1032 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1033 :
1034 : /*
1035 : * In principle, there's no reason to include the terminating NUL
1036 : * character in the hash, but it was done before and the behavior must
1037 : * be preserved.
1038 : */
1039 168 : result = hash_any((uint8_t *) buf, bsize + 1);
1040 :
1041 168 : pfree(buf);
1042 : }
1043 :
1044 : /* Avoid leaking memory for toasted inputs */
1045 4380 : PG_FREE_IF_COPY(key, 0);
1046 :
1047 4380 : return result;
1048 : }
1049 :
1050 : Datum
1051 84 : hashbpcharextended(PG_FUNCTION_ARGS)
1052 : {
1053 84 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1054 84 : Oid collid = PG_GET_COLLATION();
1055 : char *keydata;
1056 : int keylen;
1057 84 : pg_locale_t mylocale = 0;
1058 : Datum result;
1059 :
1060 84 : if (!collid)
1061 0 : ereport(ERROR,
1062 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1063 : errmsg("could not determine which collation to use for string hashing"),
1064 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1065 :
1066 84 : keydata = VARDATA_ANY(key);
1067 84 : keylen = bcTruelen(key);
1068 :
1069 84 : if (!lc_collate_is_c(collid))
1070 64 : mylocale = pg_newlocale_from_collation(collid);
1071 :
1072 84 : if (pg_locale_deterministic(mylocale))
1073 : {
1074 72 : result = hash_any_extended((unsigned char *) keydata, keylen,
1075 72 : PG_GETARG_INT64(1));
1076 : }
1077 : else
1078 : {
1079 : Size bsize,
1080 : rsize;
1081 : char *buf;
1082 :
1083 12 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1084 12 : buf = palloc(bsize + 1);
1085 :
1086 12 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1087 12 : if (rsize != bsize)
1088 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1089 :
1090 : /*
1091 : * In principle, there's no reason to include the terminating NUL
1092 : * character in the hash, but it was done before and the behavior must
1093 : * be preserved.
1094 : */
1095 12 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
1096 12 : PG_GETARG_INT64(1));
1097 :
1098 12 : pfree(buf);
1099 : }
1100 :
1101 84 : PG_FREE_IF_COPY(key, 0);
1102 :
1103 84 : return result;
1104 : }
1105 :
1106 : /*
1107 : * The following operators support character-by-character comparison
1108 : * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1109 : * Note that the regular bpchareq/bpcharne comparison operators, and
1110 : * regular support functions 1 and 2 with "C" collation are assumed to be
1111 : * compatible with these!
1112 : */
1113 :
1114 : static int
1115 78 : internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1116 : {
1117 : int result;
1118 : int len1,
1119 : len2;
1120 :
1121 78 : len1 = bcTruelen(arg1);
1122 78 : len2 = bcTruelen(arg2);
1123 :
1124 78 : result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1125 78 : if (result != 0)
1126 48 : return result;
1127 30 : else if (len1 < len2)
1128 0 : return -1;
1129 30 : else if (len1 > len2)
1130 0 : return 1;
1131 : else
1132 30 : return 0;
1133 : }
1134 :
1135 :
1136 : Datum
1137 0 : bpchar_pattern_lt(PG_FUNCTION_ARGS)
1138 : {
1139 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1140 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1141 : int result;
1142 :
1143 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1144 :
1145 0 : PG_FREE_IF_COPY(arg1, 0);
1146 0 : PG_FREE_IF_COPY(arg2, 1);
1147 :
1148 0 : PG_RETURN_BOOL(result < 0);
1149 : }
1150 :
1151 :
1152 : Datum
1153 0 : bpchar_pattern_le(PG_FUNCTION_ARGS)
1154 : {
1155 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1156 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1157 : int result;
1158 :
1159 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1160 :
1161 0 : PG_FREE_IF_COPY(arg1, 0);
1162 0 : PG_FREE_IF_COPY(arg2, 1);
1163 :
1164 0 : PG_RETURN_BOOL(result <= 0);
1165 : }
1166 :
1167 :
1168 : Datum
1169 0 : bpchar_pattern_ge(PG_FUNCTION_ARGS)
1170 : {
1171 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1172 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1173 : int result;
1174 :
1175 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1176 :
1177 0 : PG_FREE_IF_COPY(arg1, 0);
1178 0 : PG_FREE_IF_COPY(arg2, 1);
1179 :
1180 0 : PG_RETURN_BOOL(result >= 0);
1181 : }
1182 :
1183 :
1184 : Datum
1185 0 : bpchar_pattern_gt(PG_FUNCTION_ARGS)
1186 : {
1187 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1188 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1189 : int result;
1190 :
1191 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1192 :
1193 0 : PG_FREE_IF_COPY(arg1, 0);
1194 0 : PG_FREE_IF_COPY(arg2, 1);
1195 :
1196 0 : PG_RETURN_BOOL(result > 0);
1197 : }
1198 :
1199 :
1200 : Datum
1201 78 : btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1202 : {
1203 78 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1204 78 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1205 : int result;
1206 :
1207 78 : result = internal_bpchar_pattern_compare(arg1, arg2);
1208 :
1209 78 : PG_FREE_IF_COPY(arg1, 0);
1210 78 : PG_FREE_IF_COPY(arg2, 1);
1211 :
1212 78 : PG_RETURN_INT32(result);
1213 : }
1214 :
1215 :
1216 : Datum
1217 12 : btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1218 : {
1219 12 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1220 : MemoryContext oldcontext;
1221 :
1222 12 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1223 :
1224 : /* Use generic string SortSupport, forcing "C" collation */
1225 12 : varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1226 :
1227 12 : MemoryContextSwitchTo(oldcontext);
1228 :
1229 12 : PG_RETURN_VOID();
1230 : }
|