Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * varchar.c
4 : * Functions for the built-in types char(n) and varchar(n).
5 : *
6 : * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/varchar.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/detoast.h"
18 : #include "catalog/pg_collation.h"
19 : #include "catalog/pg_type.h"
20 : #include "common/hashfn.h"
21 : #include "libpq/pqformat.h"
22 : #include "mb/pg_wchar.h"
23 : #include "nodes/nodeFuncs.h"
24 : #include "nodes/supportnodes.h"
25 : #include "utils/array.h"
26 : #include "utils/builtins.h"
27 : #include "utils/lsyscache.h"
28 : #include "utils/pg_locale.h"
29 : #include "utils/varlena.h"
30 :
31 : /* common code for bpchartypmodin and varchartypmodin */
32 : static int32
33 2392 : anychar_typmodin(ArrayType *ta, const char *typename)
34 : {
35 : int32 typmod;
36 : int32 *tl;
37 : int n;
38 :
39 2392 : tl = ArrayGetIntegerTypmods(ta, &n);
40 :
41 : /*
42 : * we're not too tense about good error message here because grammar
43 : * shouldn't allow wrong number of modifiers for CHAR
44 : */
45 2392 : if (n != 1)
46 0 : ereport(ERROR,
47 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
48 : errmsg("invalid type modifier")));
49 :
50 2392 : if (*tl < 1)
51 0 : ereport(ERROR,
52 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53 : errmsg("length for type %s must be at least 1", typename)));
54 2392 : if (*tl > MaxAttrSize)
55 0 : ereport(ERROR,
56 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
57 : errmsg("length for type %s cannot exceed %d",
58 : typename, MaxAttrSize)));
59 :
60 : /*
61 : * For largely historical reasons, the typmod is VARHDRSZ plus the number
62 : * of characters; there is enough client-side code that knows about that
63 : * that we'd better not change it.
64 : */
65 2392 : typmod = VARHDRSZ + *tl;
66 :
67 2392 : return typmod;
68 : }
69 :
70 : /* common code for bpchartypmodout and varchartypmodout */
71 : static char *
72 560 : anychar_typmodout(int32 typmod)
73 : {
74 560 : char *res = (char *) palloc(64);
75 :
76 560 : if (typmod > VARHDRSZ)
77 560 : snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
78 : else
79 0 : *res = '\0';
80 :
81 560 : return res;
82 : }
83 :
84 :
85 : /*
86 : * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
87 : * is for blank-padded string whose length is specified in CREATE TABLE.
88 : * VARCHAR is for storing string whose length is at most the length specified
89 : * at CREATE TABLE time.
90 : *
91 : * It's hard to implement these types because we cannot figure out
92 : * the length of the type from the type itself. I changed (hopefully all) the
93 : * fmgr calls that invoke input functions of a data type to supply the
94 : * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
95 : * the length of the attributes and hence the exact length of the char() or
96 : * varchar(). We pass this to bpcharin() or varcharin().) In the case where
97 : * we cannot determine the length, we pass in -1 instead and the input
98 : * converter does not enforce any length check.
99 : *
100 : * We actually implement this as a varlena so that we don't have to pass in
101 : * the length for the comparison functions. (The difference between these
102 : * types and "text" is that we truncate and possibly blank-pad the string
103 : * at insertion time.)
104 : *
105 : * - ay 6/95
106 : */
107 :
108 :
109 : /*****************************************************************************
110 : * bpchar - char() *
111 : *****************************************************************************/
112 :
113 : /*
114 : * bpchar_input -- common guts of bpcharin and bpcharrecv
115 : *
116 : * s is the input text of length len (may not be null-terminated)
117 : * atttypmod is the typmod value to apply
118 : *
119 : * Note that atttypmod is measured in characters, which
120 : * is not necessarily the same as the number of bytes.
121 : *
122 : * If the input string is too long, raise an error, unless the extra
123 : * characters are spaces, in which case they're truncated. (per SQL)
124 : */
125 : static BpChar *
126 420996 : bpchar_input(const char *s, size_t len, int32 atttypmod)
127 : {
128 : BpChar *result;
129 : char *r;
130 : size_t maxlen;
131 :
132 : /* If typmod is -1 (or invalid), use the actual string length */
133 420996 : if (atttypmod < (int32) VARHDRSZ)
134 6654 : maxlen = len;
135 : else
136 : {
137 : size_t charlen; /* number of CHARACTERS in the input */
138 :
139 414342 : maxlen = atttypmod - VARHDRSZ;
140 414342 : charlen = pg_mbstrlen_with_len(s, len);
141 414342 : if (charlen > maxlen)
142 : {
143 : /* Verify that extra characters are spaces, and clip them off */
144 16 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
145 : size_t j;
146 :
147 : /*
148 : * at this point, len is the actual BYTE length of the input
149 : * string, maxlen is the max number of CHARACTERS allowed for this
150 : * bpchar type, mbmaxlen is the length in BYTES of those chars.
151 : */
152 16 : for (j = mbmaxlen; j < len; j++)
153 : {
154 16 : if (s[j] != ' ')
155 16 : ereport(ERROR,
156 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
157 : errmsg("value too long for type character(%d)",
158 : (int) maxlen)));
159 : }
160 :
161 : /*
162 : * Now we set maxlen to the necessary byte length, not the number
163 : * of CHARACTERS!
164 : */
165 0 : maxlen = len = mbmaxlen;
166 : }
167 : else
168 : {
169 : /*
170 : * Now we set maxlen to the necessary byte length, not the number
171 : * of CHARACTERS!
172 : */
173 414326 : maxlen = len + (maxlen - charlen);
174 : }
175 : }
176 :
177 420980 : result = (BpChar *) palloc(maxlen + VARHDRSZ);
178 420980 : SET_VARSIZE(result, maxlen + VARHDRSZ);
179 420980 : r = VARDATA(result);
180 420980 : memcpy(r, s, len);
181 :
182 : /* blank pad the string if necessary */
183 420980 : if (maxlen > len)
184 402148 : memset(r + len, ' ', maxlen - len);
185 :
186 420980 : return result;
187 : }
188 :
189 : /*
190 : * Convert a C string to CHARACTER internal representation. atttypmod
191 : * is the declared length of the type plus VARHDRSZ.
192 : */
193 : Datum
194 420996 : bpcharin(PG_FUNCTION_ARGS)
195 : {
196 420996 : char *s = PG_GETARG_CSTRING(0);
197 :
198 : #ifdef NOT_USED
199 : Oid typelem = PG_GETARG_OID(1);
200 : #endif
201 420996 : int32 atttypmod = PG_GETARG_INT32(2);
202 : BpChar *result;
203 :
204 420996 : result = bpchar_input(s, strlen(s), atttypmod);
205 420980 : PG_RETURN_BPCHAR_P(result);
206 : }
207 :
208 :
209 : /*
210 : * Convert a CHARACTER value to a C string.
211 : *
212 : * Uses the text conversion functions, which is only appropriate if BpChar
213 : * and text are equivalent types.
214 : */
215 : Datum
216 39096 : bpcharout(PG_FUNCTION_ARGS)
217 : {
218 39096 : Datum txt = PG_GETARG_DATUM(0);
219 :
220 39096 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
221 : }
222 :
223 : /*
224 : * bpcharrecv - converts external binary format to bpchar
225 : */
226 : Datum
227 0 : bpcharrecv(PG_FUNCTION_ARGS)
228 : {
229 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
230 :
231 : #ifdef NOT_USED
232 : Oid typelem = PG_GETARG_OID(1);
233 : #endif
234 0 : int32 atttypmod = PG_GETARG_INT32(2);
235 : BpChar *result;
236 : char *str;
237 : int nbytes;
238 :
239 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
240 0 : result = bpchar_input(str, nbytes, atttypmod);
241 0 : pfree(str);
242 0 : PG_RETURN_BPCHAR_P(result);
243 : }
244 :
245 : /*
246 : * bpcharsend - converts bpchar to binary format
247 : */
248 : Datum
249 4 : bpcharsend(PG_FUNCTION_ARGS)
250 : {
251 : /* Exactly the same as textsend, so share code */
252 4 : return textsend(fcinfo);
253 : }
254 :
255 :
256 : /*
257 : * Converts a CHARACTER type to the specified size.
258 : *
259 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
260 : * isExplicit is true if this is for an explicit cast to char(N).
261 : *
262 : * Truncation rules: for an explicit cast, silently truncate to the given
263 : * length; for an implicit cast, raise error unless extra characters are
264 : * all spaces. (This is sort-of per SQL: the spec would actually have us
265 : * raise a "completion condition" for the explicit cast case, but Postgres
266 : * hasn't got such a concept.)
267 : */
268 : Datum
269 8406 : bpchar(PG_FUNCTION_ARGS)
270 : {
271 8406 : BpChar *source = PG_GETARG_BPCHAR_PP(0);
272 8406 : int32 maxlen = PG_GETARG_INT32(1);
273 8406 : bool isExplicit = PG_GETARG_BOOL(2);
274 : BpChar *result;
275 : int32 len;
276 : char *r;
277 : char *s;
278 : int i;
279 : int charlen; /* number of characters in the input string +
280 : * VARHDRSZ */
281 :
282 : /* No work if typmod is invalid */
283 8406 : if (maxlen < (int32) VARHDRSZ)
284 0 : PG_RETURN_BPCHAR_P(source);
285 :
286 8406 : maxlen -= VARHDRSZ;
287 :
288 8406 : len = VARSIZE_ANY_EXHDR(source);
289 8406 : s = VARDATA_ANY(source);
290 :
291 8406 : charlen = pg_mbstrlen_with_len(s, len);
292 :
293 : /* No work if supplied data matches typmod already */
294 8406 : if (charlen == maxlen)
295 3352 : PG_RETURN_BPCHAR_P(source);
296 :
297 5054 : if (charlen > maxlen)
298 : {
299 : /* Verify that extra characters are spaces, and clip them off */
300 : size_t maxmblen;
301 :
302 24 : maxmblen = pg_mbcharcliplen(s, len, maxlen);
303 :
304 24 : if (!isExplicit)
305 : {
306 56 : for (i = maxmblen; i < len; i++)
307 48 : if (s[i] != ' ')
308 12 : ereport(ERROR,
309 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
310 : errmsg("value too long for type character(%d)",
311 : maxlen)));
312 : }
313 :
314 12 : len = maxmblen;
315 :
316 : /*
317 : * At this point, maxlen is the necessary byte length, not the number
318 : * of CHARACTERS!
319 : */
320 12 : maxlen = len;
321 : }
322 : else
323 : {
324 : /*
325 : * At this point, maxlen is the necessary byte length, not the number
326 : * of CHARACTERS!
327 : */
328 5030 : maxlen = len + (maxlen - charlen);
329 : }
330 :
331 : Assert(maxlen >= len);
332 :
333 5042 : result = palloc(maxlen + VARHDRSZ);
334 5042 : SET_VARSIZE(result, maxlen + VARHDRSZ);
335 5042 : r = VARDATA(result);
336 :
337 5042 : memcpy(r, s, len);
338 :
339 : /* blank pad the string if necessary */
340 5042 : if (maxlen > len)
341 5030 : memset(r + len, ' ', maxlen - len);
342 :
343 5042 : PG_RETURN_BPCHAR_P(result);
344 : }
345 :
346 :
347 : /* char_bpchar()
348 : * Convert char to bpchar(1).
349 : */
350 : Datum
351 0 : char_bpchar(PG_FUNCTION_ARGS)
352 : {
353 0 : char c = PG_GETARG_CHAR(0);
354 : BpChar *result;
355 :
356 0 : result = (BpChar *) palloc(VARHDRSZ + 1);
357 :
358 0 : SET_VARSIZE(result, VARHDRSZ + 1);
359 0 : *(VARDATA(result)) = c;
360 :
361 0 : PG_RETURN_BPCHAR_P(result);
362 : }
363 :
364 :
365 : /* bpchar_name()
366 : * Converts a bpchar() type to a NameData type.
367 : */
368 : Datum
369 0 : bpchar_name(PG_FUNCTION_ARGS)
370 : {
371 0 : BpChar *s = PG_GETARG_BPCHAR_PP(0);
372 : char *s_data;
373 : Name result;
374 : int len;
375 :
376 0 : len = VARSIZE_ANY_EXHDR(s);
377 0 : s_data = VARDATA_ANY(s);
378 :
379 : /* Truncate oversize input */
380 0 : if (len >= NAMEDATALEN)
381 0 : len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
382 :
383 : /* Remove trailing blanks */
384 0 : while (len > 0)
385 : {
386 0 : if (s_data[len - 1] != ' ')
387 0 : break;
388 0 : len--;
389 : }
390 :
391 : /* We use palloc0 here to ensure result is zero-padded */
392 0 : result = (Name) palloc0(NAMEDATALEN);
393 0 : memcpy(NameStr(*result), s_data, len);
394 :
395 0 : PG_RETURN_NAME(result);
396 : }
397 :
398 : /* name_bpchar()
399 : * Converts a NameData type to a bpchar type.
400 : *
401 : * Uses the text conversion functions, which is only appropriate if BpChar
402 : * and text are equivalent types.
403 : */
404 : Datum
405 4 : name_bpchar(PG_FUNCTION_ARGS)
406 : {
407 4 : Name s = PG_GETARG_NAME(0);
408 : BpChar *result;
409 :
410 4 : result = (BpChar *) cstring_to_text(NameStr(*s));
411 4 : PG_RETURN_BPCHAR_P(result);
412 : }
413 :
414 : Datum
415 1476 : bpchartypmodin(PG_FUNCTION_ARGS)
416 : {
417 1476 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
418 :
419 1476 : PG_RETURN_INT32(anychar_typmodin(ta, "char"));
420 : }
421 :
422 : Datum
423 444 : bpchartypmodout(PG_FUNCTION_ARGS)
424 : {
425 444 : int32 typmod = PG_GETARG_INT32(0);
426 :
427 444 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
428 : }
429 :
430 :
431 : /*****************************************************************************
432 : * varchar - varchar(n)
433 : *
434 : * Note: varchar piggybacks on type text for most operations, and so has no
435 : * C-coded functions except for I/O and typmod checking.
436 : *****************************************************************************/
437 :
438 : /*
439 : * varchar_input -- common guts of varcharin and varcharrecv
440 : *
441 : * s is the input text of length len (may not be null-terminated)
442 : * atttypmod is the typmod value to apply
443 : *
444 : * Note that atttypmod is measured in characters, which
445 : * is not necessarily the same as the number of bytes.
446 : *
447 : * If the input string is too long, raise an error, unless the extra
448 : * characters are spaces, in which case they're truncated. (per SQL)
449 : *
450 : * Uses the C string to text conversion function, which is only appropriate
451 : * if VarChar and text are equivalent types.
452 : */
453 : static VarChar *
454 1860632 : varchar_input(const char *s, size_t len, int32 atttypmod)
455 : {
456 : VarChar *result;
457 : size_t maxlen;
458 :
459 1860632 : maxlen = atttypmod - VARHDRSZ;
460 :
461 1860632 : if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
462 : {
463 : /* Verify that extra characters are spaces, and clip them off */
464 8 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
465 : size_t j;
466 :
467 8 : for (j = mbmaxlen; j < len; j++)
468 : {
469 8 : if (s[j] != ' ')
470 8 : ereport(ERROR,
471 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
472 : errmsg("value too long for type character varying(%d)",
473 : (int) maxlen)));
474 : }
475 :
476 0 : len = mbmaxlen;
477 : }
478 :
479 1860624 : result = (VarChar *) cstring_to_text_with_len(s, len);
480 1860624 : return result;
481 : }
482 :
483 : /*
484 : * Convert a C string to VARCHAR internal representation. atttypmod
485 : * is the declared length of the type plus VARHDRSZ.
486 : */
487 : Datum
488 1860632 : varcharin(PG_FUNCTION_ARGS)
489 : {
490 1860632 : char *s = PG_GETARG_CSTRING(0);
491 :
492 : #ifdef NOT_USED
493 : Oid typelem = PG_GETARG_OID(1);
494 : #endif
495 1860632 : int32 atttypmod = PG_GETARG_INT32(2);
496 : VarChar *result;
497 :
498 1860632 : result = varchar_input(s, strlen(s), atttypmod);
499 1860624 : PG_RETURN_VARCHAR_P(result);
500 : }
501 :
502 :
503 : /*
504 : * Convert a VARCHAR value to a C string.
505 : *
506 : * Uses the text to C string conversion function, which is only appropriate
507 : * if VarChar and text are equivalent types.
508 : */
509 : Datum
510 127792 : varcharout(PG_FUNCTION_ARGS)
511 : {
512 127792 : Datum txt = PG_GETARG_DATUM(0);
513 :
514 127792 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
515 : }
516 :
517 : /*
518 : * varcharrecv - converts external binary format to varchar
519 : */
520 : Datum
521 0 : varcharrecv(PG_FUNCTION_ARGS)
522 : {
523 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
524 :
525 : #ifdef NOT_USED
526 : Oid typelem = PG_GETARG_OID(1);
527 : #endif
528 0 : int32 atttypmod = PG_GETARG_INT32(2);
529 : VarChar *result;
530 : char *str;
531 : int nbytes;
532 :
533 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
534 0 : result = varchar_input(str, nbytes, atttypmod);
535 0 : pfree(str);
536 0 : PG_RETURN_VARCHAR_P(result);
537 : }
538 :
539 : /*
540 : * varcharsend - converts varchar to binary format
541 : */
542 : Datum
543 31668 : varcharsend(PG_FUNCTION_ARGS)
544 : {
545 : /* Exactly the same as textsend, so share code */
546 31668 : return textsend(fcinfo);
547 : }
548 :
549 :
550 : /*
551 : * varchar_support()
552 : *
553 : * Planner support function for the varchar() length coercion function.
554 : *
555 : * Currently, the only interesting thing we can do is flatten calls that set
556 : * the new maximum length >= the previous maximum length. We can ignore the
557 : * isExplicit argument, since that only affects truncation cases.
558 : */
559 : Datum
560 1384 : varchar_support(PG_FUNCTION_ARGS)
561 : {
562 1384 : Node *rawreq = (Node *) PG_GETARG_POINTER(0);
563 1384 : Node *ret = NULL;
564 :
565 1384 : if (IsA(rawreq, SupportRequestSimplify))
566 : {
567 572 : SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
568 572 : FuncExpr *expr = req->fcall;
569 : Node *typmod;
570 :
571 : Assert(list_length(expr->args) >= 2);
572 :
573 572 : typmod = (Node *) lsecond(expr->args);
574 :
575 572 : if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
576 : {
577 572 : Node *source = (Node *) linitial(expr->args);
578 572 : int32 old_typmod = exprTypmod(source);
579 572 : int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
580 572 : int32 old_max = old_typmod - VARHDRSZ;
581 572 : int32 new_max = new_typmod - VARHDRSZ;
582 :
583 572 : if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
584 20 : ret = relabel_to_typmod(source, new_typmod);
585 : }
586 : }
587 :
588 1384 : PG_RETURN_POINTER(ret);
589 : }
590 :
591 : /*
592 : * Converts a VARCHAR type to the specified size.
593 : *
594 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
595 : * isExplicit is true if this is for an explicit cast to varchar(N).
596 : *
597 : * Truncation rules: for an explicit cast, silently truncate to the given
598 : * length; for an implicit cast, raise error unless extra characters are
599 : * all spaces. (This is sort-of per SQL: the spec would actually have us
600 : * raise a "completion condition" for the explicit cast case, but Postgres
601 : * hasn't got such a concept.)
602 : */
603 : Datum
604 18108 : varchar(PG_FUNCTION_ARGS)
605 : {
606 18108 : VarChar *source = PG_GETARG_VARCHAR_PP(0);
607 18108 : int32 typmod = PG_GETARG_INT32(1);
608 18108 : bool isExplicit = PG_GETARG_BOOL(2);
609 : int32 len,
610 : maxlen;
611 : size_t maxmblen;
612 : int i;
613 : char *s_data;
614 :
615 18108 : len = VARSIZE_ANY_EXHDR(source);
616 18108 : s_data = VARDATA_ANY(source);
617 18108 : maxlen = typmod - VARHDRSZ;
618 :
619 : /* No work if typmod is invalid or supplied data fits it already */
620 18108 : if (maxlen < 0 || len <= maxlen)
621 18052 : PG_RETURN_VARCHAR_P(source);
622 :
623 : /* only reach here if string is too long... */
624 :
625 : /* truncate multibyte string preserving multibyte boundary */
626 56 : maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
627 :
628 56 : if (!isExplicit)
629 : {
630 76 : for (i = maxmblen; i < len; i++)
631 68 : if (s_data[i] != ' ')
632 28 : ereport(ERROR,
633 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
634 : errmsg("value too long for type character varying(%d)",
635 : maxlen)));
636 : }
637 :
638 28 : PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
639 : maxmblen));
640 : }
641 :
642 : Datum
643 916 : varchartypmodin(PG_FUNCTION_ARGS)
644 : {
645 916 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
646 :
647 916 : PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
648 : }
649 :
650 : Datum
651 116 : varchartypmodout(PG_FUNCTION_ARGS)
652 : {
653 116 : int32 typmod = PG_GETARG_INT32(0);
654 :
655 116 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
656 : }
657 :
658 :
659 : /*****************************************************************************
660 : * Exported functions
661 : *****************************************************************************/
662 :
663 : /* "True" length (not counting trailing blanks) of a BpChar */
664 : static inline int
665 171164 : bcTruelen(BpChar *arg)
666 : {
667 171164 : return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
668 : }
669 :
670 : int
671 237152 : bpchartruelen(char *s, int len)
672 : {
673 : int i;
674 :
675 : /*
676 : * Note that we rely on the assumption that ' ' is a singleton unit on
677 : * every supported multibyte server encoding.
678 : */
679 1788648 : for (i = len - 1; i >= 0; i--)
680 : {
681 1778998 : if (s[i] != ' ')
682 227502 : break;
683 : }
684 237152 : return i + 1;
685 : }
686 :
687 : Datum
688 4 : bpcharlen(PG_FUNCTION_ARGS)
689 : {
690 4 : BpChar *arg = PG_GETARG_BPCHAR_PP(0);
691 : int len;
692 :
693 : /* get number of bytes, ignoring trailing spaces */
694 4 : len = bcTruelen(arg);
695 :
696 : /* in multibyte encoding, convert to number of characters */
697 4 : if (pg_database_encoding_max_length() != 1)
698 4 : len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
699 :
700 4 : PG_RETURN_INT32(len);
701 : }
702 :
703 : Datum
704 0 : bpcharoctetlen(PG_FUNCTION_ARGS)
705 : {
706 0 : Datum arg = PG_GETARG_DATUM(0);
707 :
708 : /* We need not detoast the input at all */
709 0 : PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
710 : }
711 :
712 :
713 : /*****************************************************************************
714 : * Comparison Functions used for bpchar
715 : *
716 : * Note: btree indexes need these routines not to leak memory; therefore,
717 : * be careful to free working copies of toasted datums. Most places don't
718 : * need to be so careful.
719 : *****************************************************************************/
720 :
721 : static void
722 16754 : check_collation_set(Oid collid)
723 : {
724 16754 : if (!OidIsValid(collid))
725 : {
726 : /*
727 : * This typically means that the parser could not resolve a conflict
728 : * of implicit collations, so report it that way.
729 : */
730 0 : ereport(ERROR,
731 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
732 : errmsg("could not determine which collation to use for string comparison"),
733 : errhint("Use the COLLATE clause to set the collation explicitly.")));
734 : }
735 16754 : }
736 :
737 : Datum
738 12498 : bpchareq(PG_FUNCTION_ARGS)
739 : {
740 12498 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
741 12498 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
742 : int len1,
743 : len2;
744 : bool result;
745 12498 : Oid collid = PG_GET_COLLATION();
746 :
747 12498 : check_collation_set(collid);
748 :
749 12498 : len1 = bcTruelen(arg1);
750 12498 : len2 = bcTruelen(arg2);
751 :
752 12498 : if (lc_collate_is_c(collid) ||
753 0 : collid == DEFAULT_COLLATION_OID ||
754 0 : pg_newlocale_from_collation(collid)->deterministic)
755 : {
756 : /*
757 : * Since we only care about equality or not-equality, we can avoid all
758 : * the expense of strcoll() here, and just do bitwise comparison.
759 : */
760 24996 : if (len1 != len2)
761 2224 : result = false;
762 : else
763 10274 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
764 : }
765 : else
766 : {
767 0 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
768 : collid) == 0);
769 : }
770 :
771 12498 : PG_FREE_IF_COPY(arg1, 0);
772 12498 : PG_FREE_IF_COPY(arg2, 1);
773 :
774 12498 : PG_RETURN_BOOL(result);
775 : }
776 :
777 : Datum
778 4256 : bpcharne(PG_FUNCTION_ARGS)
779 : {
780 4256 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
781 4256 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
782 : int len1,
783 : len2;
784 : bool result;
785 4256 : Oid collid = PG_GET_COLLATION();
786 :
787 4256 : check_collation_set(collid);
788 :
789 4256 : len1 = bcTruelen(arg1);
790 4256 : len2 = bcTruelen(arg2);
791 :
792 4256 : if (lc_collate_is_c(collid) ||
793 0 : collid == DEFAULT_COLLATION_OID ||
794 0 : pg_newlocale_from_collation(collid)->deterministic)
795 : {
796 : /*
797 : * Since we only care about equality or not-equality, we can avoid all
798 : * the expense of strcoll() here, and just do bitwise comparison.
799 : */
800 8512 : if (len1 != len2)
801 1348 : result = true;
802 : else
803 2908 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
804 : }
805 : else
806 : {
807 0 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
808 : collid) != 0);
809 : }
810 :
811 4256 : PG_FREE_IF_COPY(arg1, 0);
812 4256 : PG_FREE_IF_COPY(arg2, 1);
813 :
814 4256 : PG_RETURN_BOOL(result);
815 : }
816 :
817 : Datum
818 4064 : bpcharlt(PG_FUNCTION_ARGS)
819 : {
820 4064 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
821 4064 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
822 : int len1,
823 : len2;
824 : int cmp;
825 :
826 4064 : len1 = bcTruelen(arg1);
827 4064 : len2 = bcTruelen(arg2);
828 :
829 4064 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
830 : PG_GET_COLLATION());
831 :
832 4064 : PG_FREE_IF_COPY(arg1, 0);
833 4064 : PG_FREE_IF_COPY(arg2, 1);
834 :
835 4064 : PG_RETURN_BOOL(cmp < 0);
836 : }
837 :
838 : Datum
839 3644 : bpcharle(PG_FUNCTION_ARGS)
840 : {
841 3644 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
842 3644 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
843 : int len1,
844 : len2;
845 : int cmp;
846 :
847 3644 : len1 = bcTruelen(arg1);
848 3644 : len2 = bcTruelen(arg2);
849 :
850 3644 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
851 : PG_GET_COLLATION());
852 :
853 3644 : PG_FREE_IF_COPY(arg1, 0);
854 3644 : PG_FREE_IF_COPY(arg2, 1);
855 :
856 3644 : PG_RETURN_BOOL(cmp <= 0);
857 : }
858 :
859 : Datum
860 4040 : bpchargt(PG_FUNCTION_ARGS)
861 : {
862 4040 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
863 4040 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
864 : int len1,
865 : len2;
866 : int cmp;
867 :
868 4040 : len1 = bcTruelen(arg1);
869 4040 : len2 = bcTruelen(arg2);
870 :
871 4040 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
872 : PG_GET_COLLATION());
873 :
874 4040 : PG_FREE_IF_COPY(arg1, 0);
875 4040 : PG_FREE_IF_COPY(arg2, 1);
876 :
877 4040 : PG_RETURN_BOOL(cmp > 0);
878 : }
879 :
880 : Datum
881 3686 : bpcharge(PG_FUNCTION_ARGS)
882 : {
883 3686 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
884 3686 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
885 : int len1,
886 : len2;
887 : int cmp;
888 :
889 3686 : len1 = bcTruelen(arg1);
890 3686 : len2 = bcTruelen(arg2);
891 :
892 3686 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
893 : PG_GET_COLLATION());
894 :
895 3686 : PG_FREE_IF_COPY(arg1, 0);
896 3686 : PG_FREE_IF_COPY(arg2, 1);
897 :
898 3686 : PG_RETURN_BOOL(cmp >= 0);
899 : }
900 :
901 : Datum
902 52634 : bpcharcmp(PG_FUNCTION_ARGS)
903 : {
904 52634 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
905 52634 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
906 : int len1,
907 : len2;
908 : int cmp;
909 :
910 52634 : len1 = bcTruelen(arg1);
911 52634 : len2 = bcTruelen(arg2);
912 :
913 52634 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
914 : PG_GET_COLLATION());
915 :
916 52634 : PG_FREE_IF_COPY(arg1, 0);
917 52634 : PG_FREE_IF_COPY(arg2, 1);
918 :
919 52634 : PG_RETURN_INT32(cmp);
920 : }
921 :
922 : Datum
923 484 : bpchar_sortsupport(PG_FUNCTION_ARGS)
924 : {
925 484 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
926 484 : Oid collid = ssup->ssup_collation;
927 : MemoryContext oldcontext;
928 :
929 484 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
930 :
931 : /* Use generic string SortSupport */
932 484 : varstr_sortsupport(ssup, BPCHAROID, collid);
933 :
934 484 : MemoryContextSwitchTo(oldcontext);
935 :
936 484 : PG_RETURN_VOID();
937 : }
938 :
939 : Datum
940 0 : bpchar_larger(PG_FUNCTION_ARGS)
941 : {
942 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
943 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
944 : int len1,
945 : len2;
946 : int cmp;
947 :
948 0 : len1 = bcTruelen(arg1);
949 0 : len2 = bcTruelen(arg2);
950 :
951 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
952 : PG_GET_COLLATION());
953 :
954 0 : PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
955 : }
956 :
957 : Datum
958 0 : bpchar_smaller(PG_FUNCTION_ARGS)
959 : {
960 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
961 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
962 : int len1,
963 : len2;
964 : int cmp;
965 :
966 0 : len1 = bcTruelen(arg1);
967 0 : len2 = bcTruelen(arg2);
968 :
969 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
970 : PG_GET_COLLATION());
971 :
972 0 : PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
973 : }
974 :
975 :
976 : /*
977 : * bpchar needs a specialized hash function because we want to ignore
978 : * trailing blanks in comparisons.
979 : */
980 : Datum
981 1372 : hashbpchar(PG_FUNCTION_ARGS)
982 : {
983 1372 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
984 1372 : Oid collid = PG_GET_COLLATION();
985 : char *keydata;
986 : int keylen;
987 1372 : pg_locale_t mylocale = 0;
988 : Datum result;
989 :
990 1372 : if (!collid)
991 0 : ereport(ERROR,
992 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
993 : errmsg("could not determine which collation to use for string hashing"),
994 : errhint("Use the COLLATE clause to set the collation explicitly.")));
995 :
996 1372 : keydata = VARDATA_ANY(key);
997 1372 : keylen = bcTruelen(key);
998 :
999 1372 : if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1000 0 : mylocale = pg_newlocale_from_collation(collid);
1001 :
1002 1372 : if (!mylocale || mylocale->deterministic)
1003 : {
1004 1372 : result = hash_any((unsigned char *) keydata, keylen);
1005 : }
1006 : else
1007 : {
1008 : #ifdef USE_ICU
1009 : if (mylocale->provider == COLLPROVIDER_ICU)
1010 : {
1011 : int32_t ulen = -1;
1012 : UChar *uchar = NULL;
1013 : Size bsize;
1014 : uint8_t *buf;
1015 :
1016 : ulen = icu_to_uchar(&uchar, keydata, keylen);
1017 :
1018 : bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1019 : uchar, ulen, NULL, 0);
1020 : buf = palloc(bsize);
1021 : ucol_getSortKey(mylocale->info.icu.ucol,
1022 : uchar, ulen, buf, bsize);
1023 :
1024 : result = hash_any(buf, bsize);
1025 :
1026 : pfree(buf);
1027 : }
1028 : else
1029 : #endif
1030 : /* shouldn't happen */
1031 0 : elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1032 : }
1033 :
1034 : /* Avoid leaking memory for toasted inputs */
1035 1372 : PG_FREE_IF_COPY(key, 0);
1036 :
1037 1372 : return result;
1038 : }
1039 :
1040 : Datum
1041 40 : hashbpcharextended(PG_FUNCTION_ARGS)
1042 : {
1043 40 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1044 40 : Oid collid = PG_GET_COLLATION();
1045 : char *keydata;
1046 : int keylen;
1047 40 : pg_locale_t mylocale = 0;
1048 : Datum result;
1049 :
1050 40 : if (!collid)
1051 0 : ereport(ERROR,
1052 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1053 : errmsg("could not determine which collation to use for string hashing"),
1054 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1055 :
1056 40 : keydata = VARDATA_ANY(key);
1057 40 : keylen = bcTruelen(key);
1058 :
1059 40 : if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1060 0 : mylocale = pg_newlocale_from_collation(collid);
1061 :
1062 40 : if (!mylocale || mylocale->deterministic)
1063 : {
1064 40 : result = hash_any_extended((unsigned char *) keydata, keylen,
1065 40 : PG_GETARG_INT64(1));
1066 : }
1067 : else
1068 : {
1069 : #ifdef USE_ICU
1070 : if (mylocale->provider == COLLPROVIDER_ICU)
1071 : {
1072 : int32_t ulen = -1;
1073 : UChar *uchar = NULL;
1074 : Size bsize;
1075 : uint8_t *buf;
1076 :
1077 : ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
1078 :
1079 : bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1080 : uchar, ulen, NULL, 0);
1081 : buf = palloc(bsize);
1082 : ucol_getSortKey(mylocale->info.icu.ucol,
1083 : uchar, ulen, buf, bsize);
1084 :
1085 : result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
1086 :
1087 : pfree(buf);
1088 : }
1089 : else
1090 : #endif
1091 : /* shouldn't happen */
1092 0 : elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1093 : }
1094 :
1095 40 : PG_FREE_IF_COPY(key, 0);
1096 :
1097 40 : return result;
1098 : }
1099 :
1100 : /*
1101 : * The following operators support character-by-character comparison
1102 : * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1103 : * Note that the regular bpchareq/bpcharne comparison operators, and
1104 : * regular support functions 1 and 2 with "C" collation are assumed to be
1105 : * compatible with these!
1106 : */
1107 :
1108 : static int
1109 52 : internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1110 : {
1111 : int result;
1112 : int len1,
1113 : len2;
1114 :
1115 52 : len1 = bcTruelen(arg1);
1116 52 : len2 = bcTruelen(arg2);
1117 :
1118 52 : result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1119 52 : if (result != 0)
1120 32 : return result;
1121 20 : else if (len1 < len2)
1122 0 : return -1;
1123 20 : else if (len1 > len2)
1124 0 : return 1;
1125 : else
1126 20 : return 0;
1127 : }
1128 :
1129 :
1130 : Datum
1131 0 : bpchar_pattern_lt(PG_FUNCTION_ARGS)
1132 : {
1133 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1134 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1135 : int result;
1136 :
1137 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1138 :
1139 0 : PG_FREE_IF_COPY(arg1, 0);
1140 0 : PG_FREE_IF_COPY(arg2, 1);
1141 :
1142 0 : PG_RETURN_BOOL(result < 0);
1143 : }
1144 :
1145 :
1146 : Datum
1147 0 : bpchar_pattern_le(PG_FUNCTION_ARGS)
1148 : {
1149 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1150 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1151 : int result;
1152 :
1153 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1154 :
1155 0 : PG_FREE_IF_COPY(arg1, 0);
1156 0 : PG_FREE_IF_COPY(arg2, 1);
1157 :
1158 0 : PG_RETURN_BOOL(result <= 0);
1159 : }
1160 :
1161 :
1162 : Datum
1163 0 : bpchar_pattern_ge(PG_FUNCTION_ARGS)
1164 : {
1165 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1166 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1167 : int result;
1168 :
1169 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1170 :
1171 0 : PG_FREE_IF_COPY(arg1, 0);
1172 0 : PG_FREE_IF_COPY(arg2, 1);
1173 :
1174 0 : PG_RETURN_BOOL(result >= 0);
1175 : }
1176 :
1177 :
1178 : Datum
1179 0 : bpchar_pattern_gt(PG_FUNCTION_ARGS)
1180 : {
1181 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1182 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1183 : int result;
1184 :
1185 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1186 :
1187 0 : PG_FREE_IF_COPY(arg1, 0);
1188 0 : PG_FREE_IF_COPY(arg2, 1);
1189 :
1190 0 : PG_RETURN_BOOL(result > 0);
1191 : }
1192 :
1193 :
1194 : Datum
1195 52 : btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1196 : {
1197 52 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1198 52 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1199 : int result;
1200 :
1201 52 : result = internal_bpchar_pattern_compare(arg1, arg2);
1202 :
1203 52 : PG_FREE_IF_COPY(arg1, 0);
1204 52 : PG_FREE_IF_COPY(arg2, 1);
1205 :
1206 52 : PG_RETURN_INT32(result);
1207 : }
1208 :
1209 :
1210 : Datum
1211 8 : btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1212 : {
1213 8 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1214 : MemoryContext oldcontext;
1215 :
1216 8 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1217 :
1218 : /* Use generic string SortSupport, forcing "C" collation */
1219 8 : varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1220 :
1221 8 : MemoryContextSwitchTo(oldcontext);
1222 :
1223 8 : PG_RETURN_VOID();
1224 : }
|