Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * varchar.c
4 : * Functions for the built-in types char(n) and varchar(n).
5 : *
6 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/varchar.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/detoast.h"
18 : #include "catalog/pg_collation.h"
19 : #include "catalog/pg_type.h"
20 : #include "common/hashfn.h"
21 : #include "libpq/pqformat.h"
22 : #include "mb/pg_wchar.h"
23 : #include "nodes/nodeFuncs.h"
24 : #include "nodes/supportnodes.h"
25 : #include "utils/array.h"
26 : #include "utils/builtins.h"
27 : #include "utils/lsyscache.h"
28 : #include "utils/pg_locale.h"
29 : #include "utils/varlena.h"
30 :
31 : /* common code for bpchartypmodin and varchartypmodin */
32 : static int32
33 3274 : anychar_typmodin(ArrayType *ta, const char *typename)
34 : {
35 : int32 typmod;
36 : int32 *tl;
37 : int n;
38 :
39 3274 : tl = ArrayGetIntegerTypmods(ta, &n);
40 :
41 : /*
42 : * we're not too tense about good error message here because grammar
43 : * shouldn't allow wrong number of modifiers for CHAR
44 : */
45 3274 : if (n != 1)
46 0 : ereport(ERROR,
47 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
48 : errmsg("invalid type modifier")));
49 :
50 3274 : if (*tl < 1)
51 0 : ereport(ERROR,
52 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53 : errmsg("length for type %s must be at least 1", typename)));
54 3274 : if (*tl > MaxAttrSize)
55 0 : ereport(ERROR,
56 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
57 : errmsg("length for type %s cannot exceed %d",
58 : typename, MaxAttrSize)));
59 :
60 : /*
61 : * For largely historical reasons, the typmod is VARHDRSZ plus the number
62 : * of characters; there is enough client-side code that knows about that
63 : * that we'd better not change it.
64 : */
65 3274 : typmod = VARHDRSZ + *tl;
66 :
67 3274 : return typmod;
68 : }
69 :
70 : /* common code for bpchartypmodout and varchartypmodout */
71 : static char *
72 1000 : anychar_typmodout(int32 typmod)
73 : {
74 1000 : char *res = (char *) palloc(64);
75 :
76 1000 : if (typmod > VARHDRSZ)
77 1000 : snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
78 : else
79 0 : *res = '\0';
80 :
81 1000 : return res;
82 : }
83 :
84 :
85 : /*
86 : * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
87 : * is for blank-padded string whose length is specified in CREATE TABLE.
88 : * VARCHAR is for storing string whose length is at most the length specified
89 : * at CREATE TABLE time.
90 : *
91 : * It's hard to implement these types because we cannot figure out
92 : * the length of the type from the type itself. I changed (hopefully all) the
93 : * fmgr calls that invoke input functions of a data type to supply the
94 : * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
95 : * the length of the attributes and hence the exact length of the char() or
96 : * varchar(). We pass this to bpcharin() or varcharin().) In the case where
97 : * we cannot determine the length, we pass in -1 instead and the input
98 : * converter does not enforce any length check.
99 : *
100 : * We actually implement this as a varlena so that we don't have to pass in
101 : * the length for the comparison functions. (The difference between these
102 : * types and "text" is that we truncate and possibly blank-pad the string
103 : * at insertion time.)
104 : *
105 : * - ay 6/95
106 : */
107 :
108 :
109 : /*****************************************************************************
110 : * bpchar - char() *
111 : *****************************************************************************/
112 :
113 : /*
114 : * bpchar_input -- common guts of bpcharin and bpcharrecv
115 : *
116 : * s is the input text of length len (may not be null-terminated)
117 : * atttypmod is the typmod value to apply
118 : *
119 : * Note that atttypmod is measured in characters, which
120 : * is not necessarily the same as the number of bytes.
121 : *
122 : * If the input string is too long, raise an error, unless the extra
123 : * characters are spaces, in which case they're truncated. (per SQL)
124 : */
125 : static BpChar *
126 423124 : bpchar_input(const char *s, size_t len, int32 atttypmod)
127 : {
128 : BpChar *result;
129 : char *r;
130 : size_t maxlen;
131 :
132 : /* If typmod is -1 (or invalid), use the actual string length */
133 423124 : if (atttypmod < (int32) VARHDRSZ)
134 8648 : maxlen = len;
135 : else
136 : {
137 : size_t charlen; /* number of CHARACTERS in the input */
138 :
139 414476 : maxlen = atttypmod - VARHDRSZ;
140 414476 : charlen = pg_mbstrlen_with_len(s, len);
141 414476 : if (charlen > maxlen)
142 : {
143 : /* Verify that extra characters are spaces, and clip them off */
144 24 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
145 : size_t j;
146 :
147 : /*
148 : * at this point, len is the actual BYTE length of the input
149 : * string, maxlen is the max number of CHARACTERS allowed for this
150 : * bpchar type, mbmaxlen is the length in BYTES of those chars.
151 : */
152 24 : for (j = mbmaxlen; j < len; j++)
153 : {
154 24 : if (s[j] != ' ')
155 24 : ereport(ERROR,
156 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
157 : errmsg("value too long for type character(%d)",
158 : (int) maxlen)));
159 : }
160 :
161 : /*
162 : * Now we set maxlen to the necessary byte length, not the number
163 : * of CHARACTERS!
164 : */
165 0 : maxlen = len = mbmaxlen;
166 : }
167 : else
168 : {
169 : /*
170 : * Now we set maxlen to the necessary byte length, not the number
171 : * of CHARACTERS!
172 : */
173 414452 : maxlen = len + (maxlen - charlen);
174 : }
175 : }
176 :
177 423100 : result = (BpChar *) palloc(maxlen + VARHDRSZ);
178 423100 : SET_VARSIZE(result, maxlen + VARHDRSZ);
179 423100 : r = VARDATA(result);
180 423100 : memcpy(r, s, len);
181 :
182 : /* blank pad the string if necessary */
183 423100 : if (maxlen > len)
184 402190 : memset(r + len, ' ', maxlen - len);
185 :
186 423100 : return result;
187 : }
188 :
189 : /*
190 : * Convert a C string to CHARACTER internal representation. atttypmod
191 : * is the declared length of the type plus VARHDRSZ.
192 : */
193 : Datum
194 423124 : bpcharin(PG_FUNCTION_ARGS)
195 : {
196 423124 : char *s = PG_GETARG_CSTRING(0);
197 :
198 : #ifdef NOT_USED
199 : Oid typelem = PG_GETARG_OID(1);
200 : #endif
201 423124 : int32 atttypmod = PG_GETARG_INT32(2);
202 : BpChar *result;
203 :
204 423124 : result = bpchar_input(s, strlen(s), atttypmod);
205 423100 : PG_RETURN_BPCHAR_P(result);
206 : }
207 :
208 :
209 : /*
210 : * Convert a CHARACTER value to a C string.
211 : *
212 : * Uses the text conversion functions, which is only appropriate if BpChar
213 : * and text are equivalent types.
214 : */
215 : Datum
216 46284 : bpcharout(PG_FUNCTION_ARGS)
217 : {
218 46284 : Datum txt = PG_GETARG_DATUM(0);
219 :
220 46284 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
221 : }
222 :
223 : /*
224 : * bpcharrecv - converts external binary format to bpchar
225 : */
226 : Datum
227 0 : bpcharrecv(PG_FUNCTION_ARGS)
228 : {
229 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
230 :
231 : #ifdef NOT_USED
232 : Oid typelem = PG_GETARG_OID(1);
233 : #endif
234 0 : int32 atttypmod = PG_GETARG_INT32(2);
235 : BpChar *result;
236 : char *str;
237 : int nbytes;
238 :
239 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
240 0 : result = bpchar_input(str, nbytes, atttypmod);
241 0 : pfree(str);
242 0 : PG_RETURN_BPCHAR_P(result);
243 : }
244 :
245 : /*
246 : * bpcharsend - converts bpchar to binary format
247 : */
248 : Datum
249 4 : bpcharsend(PG_FUNCTION_ARGS)
250 : {
251 : /* Exactly the same as textsend, so share code */
252 4 : return textsend(fcinfo);
253 : }
254 :
255 :
256 : /*
257 : * Converts a CHARACTER type to the specified size.
258 : *
259 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
260 : * isExplicit is true if this is for an explicit cast to char(N).
261 : *
262 : * Truncation rules: for an explicit cast, silently truncate to the given
263 : * length; for an implicit cast, raise error unless extra characters are
264 : * all spaces. (This is sort-of per SQL: the spec would actually have us
265 : * raise a "completion condition" for the explicit cast case, but Postgres
266 : * hasn't got such a concept.)
267 : */
268 : Datum
269 12024 : bpchar(PG_FUNCTION_ARGS)
270 : {
271 12024 : BpChar *source = PG_GETARG_BPCHAR_PP(0);
272 12024 : int32 maxlen = PG_GETARG_INT32(1);
273 12024 : bool isExplicit = PG_GETARG_BOOL(2);
274 : BpChar *result;
275 : int32 len;
276 : char *r;
277 : char *s;
278 : int i;
279 : int charlen; /* number of characters in the input string +
280 : * VARHDRSZ */
281 :
282 : /* No work if typmod is invalid */
283 12024 : if (maxlen < (int32) VARHDRSZ)
284 0 : PG_RETURN_BPCHAR_P(source);
285 :
286 12024 : maxlen -= VARHDRSZ;
287 :
288 12024 : len = VARSIZE_ANY_EXHDR(source);
289 12024 : s = VARDATA_ANY(source);
290 :
291 12024 : charlen = pg_mbstrlen_with_len(s, len);
292 :
293 : /* No work if supplied data matches typmod already */
294 12024 : if (charlen == maxlen)
295 5478 : PG_RETURN_BPCHAR_P(source);
296 :
297 6546 : if (charlen > maxlen)
298 : {
299 : /* Verify that extra characters are spaces, and clip them off */
300 : size_t maxmblen;
301 :
302 114 : maxmblen = pg_mbcharcliplen(s, len, maxlen);
303 :
304 114 : if (!isExplicit)
305 : {
306 84 : for (i = maxmblen; i < len; i++)
307 72 : if (s[i] != ' ')
308 18 : ereport(ERROR,
309 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
310 : errmsg("value too long for type character(%d)",
311 : maxlen)));
312 : }
313 :
314 96 : len = maxmblen;
315 :
316 : /*
317 : * At this point, maxlen is the necessary byte length, not the number
318 : * of CHARACTERS!
319 : */
320 96 : maxlen = len;
321 : }
322 : else
323 : {
324 : /*
325 : * At this point, maxlen is the necessary byte length, not the number
326 : * of CHARACTERS!
327 : */
328 6432 : maxlen = len + (maxlen - charlen);
329 : }
330 :
331 : Assert(maxlen >= len);
332 :
333 6528 : result = palloc(maxlen + VARHDRSZ);
334 6528 : SET_VARSIZE(result, maxlen + VARHDRSZ);
335 6528 : r = VARDATA(result);
336 :
337 6528 : memcpy(r, s, len);
338 :
339 : /* blank pad the string if necessary */
340 6528 : if (maxlen > len)
341 6432 : memset(r + len, ' ', maxlen - len);
342 :
343 6528 : PG_RETURN_BPCHAR_P(result);
344 : }
345 :
346 :
347 : /* char_bpchar()
348 : * Convert char to bpchar(1).
349 : */
350 : Datum
351 0 : char_bpchar(PG_FUNCTION_ARGS)
352 : {
353 0 : char c = PG_GETARG_CHAR(0);
354 : BpChar *result;
355 :
356 0 : result = (BpChar *) palloc(VARHDRSZ + 1);
357 :
358 0 : SET_VARSIZE(result, VARHDRSZ + 1);
359 0 : *(VARDATA(result)) = c;
360 :
361 0 : PG_RETURN_BPCHAR_P(result);
362 : }
363 :
364 :
365 : /* bpchar_name()
366 : * Converts a bpchar() type to a NameData type.
367 : */
368 : Datum
369 0 : bpchar_name(PG_FUNCTION_ARGS)
370 : {
371 0 : BpChar *s = PG_GETARG_BPCHAR_PP(0);
372 : char *s_data;
373 : Name result;
374 : int len;
375 :
376 0 : len = VARSIZE_ANY_EXHDR(s);
377 0 : s_data = VARDATA_ANY(s);
378 :
379 : /* Truncate oversize input */
380 0 : if (len >= NAMEDATALEN)
381 0 : len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
382 :
383 : /* Remove trailing blanks */
384 0 : while (len > 0)
385 : {
386 0 : if (s_data[len - 1] != ' ')
387 0 : break;
388 0 : len--;
389 : }
390 :
391 : /* We use palloc0 here to ensure result is zero-padded */
392 0 : result = (Name) palloc0(NAMEDATALEN);
393 0 : memcpy(NameStr(*result), s_data, len);
394 :
395 0 : PG_RETURN_NAME(result);
396 : }
397 :
398 : /* name_bpchar()
399 : * Converts a NameData type to a bpchar type.
400 : *
401 : * Uses the text conversion functions, which is only appropriate if BpChar
402 : * and text are equivalent types.
403 : */
404 : Datum
405 6 : name_bpchar(PG_FUNCTION_ARGS)
406 : {
407 6 : Name s = PG_GETARG_NAME(0);
408 : BpChar *result;
409 :
410 6 : result = (BpChar *) cstring_to_text(NameStr(*s));
411 6 : PG_RETURN_BPCHAR_P(result);
412 : }
413 :
414 : Datum
415 2018 : bpchartypmodin(PG_FUNCTION_ARGS)
416 : {
417 2018 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
418 :
419 2018 : PG_RETURN_INT32(anychar_typmodin(ta, "char"));
420 : }
421 :
422 : Datum
423 814 : bpchartypmodout(PG_FUNCTION_ARGS)
424 : {
425 814 : int32 typmod = PG_GETARG_INT32(0);
426 :
427 814 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
428 : }
429 :
430 :
431 : /*****************************************************************************
432 : * varchar - varchar(n)
433 : *
434 : * Note: varchar piggybacks on type text for most operations, and so has no
435 : * C-coded functions except for I/O and typmod checking.
436 : *****************************************************************************/
437 :
438 : /*
439 : * varchar_input -- common guts of varcharin and varcharrecv
440 : *
441 : * s is the input text of length len (may not be null-terminated)
442 : * atttypmod is the typmod value to apply
443 : *
444 : * Note that atttypmod is measured in characters, which
445 : * is not necessarily the same as the number of bytes.
446 : *
447 : * If the input string is too long, raise an error, unless the extra
448 : * characters are spaces, in which case they're truncated. (per SQL)
449 : *
450 : * Uses the C string to text conversion function, which is only appropriate
451 : * if VarChar and text are equivalent types.
452 : */
453 : static VarChar *
454 2473922 : varchar_input(const char *s, size_t len, int32 atttypmod)
455 : {
456 : VarChar *result;
457 : size_t maxlen;
458 :
459 2473922 : maxlen = atttypmod - VARHDRSZ;
460 :
461 2473922 : if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
462 : {
463 : /* Verify that extra characters are spaces, and clip them off */
464 12 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
465 : size_t j;
466 :
467 12 : for (j = mbmaxlen; j < len; j++)
468 : {
469 12 : if (s[j] != ' ')
470 12 : ereport(ERROR,
471 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
472 : errmsg("value too long for type character varying(%d)",
473 : (int) maxlen)));
474 : }
475 :
476 0 : len = mbmaxlen;
477 : }
478 :
479 2473910 : result = (VarChar *) cstring_to_text_with_len(s, len);
480 2473910 : return result;
481 : }
482 :
483 : /*
484 : * Convert a C string to VARCHAR internal representation. atttypmod
485 : * is the declared length of the type plus VARHDRSZ.
486 : */
487 : Datum
488 2473922 : varcharin(PG_FUNCTION_ARGS)
489 : {
490 2473922 : char *s = PG_GETARG_CSTRING(0);
491 :
492 : #ifdef NOT_USED
493 : Oid typelem = PG_GETARG_OID(1);
494 : #endif
495 2473922 : int32 atttypmod = PG_GETARG_INT32(2);
496 : VarChar *result;
497 :
498 2473922 : result = varchar_input(s, strlen(s), atttypmod);
499 2473910 : PG_RETURN_VARCHAR_P(result);
500 : }
501 :
502 :
503 : /*
504 : * Convert a VARCHAR value to a C string.
505 : *
506 : * Uses the text to C string conversion function, which is only appropriate
507 : * if VarChar and text are equivalent types.
508 : */
509 : Datum
510 346706 : varcharout(PG_FUNCTION_ARGS)
511 : {
512 346706 : Datum txt = PG_GETARG_DATUM(0);
513 :
514 346706 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
515 : }
516 :
517 : /*
518 : * varcharrecv - converts external binary format to varchar
519 : */
520 : Datum
521 0 : varcharrecv(PG_FUNCTION_ARGS)
522 : {
523 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
524 :
525 : #ifdef NOT_USED
526 : Oid typelem = PG_GETARG_OID(1);
527 : #endif
528 0 : int32 atttypmod = PG_GETARG_INT32(2);
529 : VarChar *result;
530 : char *str;
531 : int nbytes;
532 :
533 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
534 0 : result = varchar_input(str, nbytes, atttypmod);
535 0 : pfree(str);
536 0 : PG_RETURN_VARCHAR_P(result);
537 : }
538 :
539 : /*
540 : * varcharsend - converts varchar to binary format
541 : */
542 : Datum
543 31668 : varcharsend(PG_FUNCTION_ARGS)
544 : {
545 : /* Exactly the same as textsend, so share code */
546 31668 : return textsend(fcinfo);
547 : }
548 :
549 :
550 : /*
551 : * varchar_support()
552 : *
553 : * Planner support function for the varchar() length coercion function.
554 : *
555 : * Currently, the only interesting thing we can do is flatten calls that set
556 : * the new maximum length >= the previous maximum length. We can ignore the
557 : * isExplicit argument, since that only affects truncation cases.
558 : */
559 : Datum
560 2122 : varchar_support(PG_FUNCTION_ARGS)
561 : {
562 2122 : Node *rawreq = (Node *) PG_GETARG_POINTER(0);
563 2122 : Node *ret = NULL;
564 :
565 2122 : if (IsA(rawreq, SupportRequestSimplify))
566 : {
567 878 : SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
568 878 : FuncExpr *expr = req->fcall;
569 : Node *typmod;
570 :
571 : Assert(list_length(expr->args) >= 2);
572 :
573 878 : typmod = (Node *) lsecond(expr->args);
574 :
575 878 : if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
576 : {
577 878 : Node *source = (Node *) linitial(expr->args);
578 878 : int32 old_typmod = exprTypmod(source);
579 878 : int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
580 878 : int32 old_max = old_typmod - VARHDRSZ;
581 878 : int32 new_max = new_typmod - VARHDRSZ;
582 :
583 878 : if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
584 30 : ret = relabel_to_typmod(source, new_typmod);
585 : }
586 : }
587 :
588 2122 : PG_RETURN_POINTER(ret);
589 : }
590 :
591 : /*
592 : * Converts a VARCHAR type to the specified size.
593 : *
594 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
595 : * isExplicit is true if this is for an explicit cast to varchar(N).
596 : *
597 : * Truncation rules: for an explicit cast, silently truncate to the given
598 : * length; for an implicit cast, raise error unless extra characters are
599 : * all spaces. (This is sort-of per SQL: the spec would actually have us
600 : * raise a "completion condition" for the explicit cast case, but Postgres
601 : * hasn't got such a concept.)
602 : */
603 : Datum
604 25548 : varchar(PG_FUNCTION_ARGS)
605 : {
606 25548 : VarChar *source = PG_GETARG_VARCHAR_PP(0);
607 25548 : int32 typmod = PG_GETARG_INT32(1);
608 25548 : bool isExplicit = PG_GETARG_BOOL(2);
609 : int32 len,
610 : maxlen;
611 : size_t maxmblen;
612 : int i;
613 : char *s_data;
614 :
615 25548 : len = VARSIZE_ANY_EXHDR(source);
616 25548 : s_data = VARDATA_ANY(source);
617 25548 : maxlen = typmod - VARHDRSZ;
618 :
619 : /* No work if typmod is invalid or supplied data fits it already */
620 25548 : if (maxlen < 0 || len <= maxlen)
621 25428 : PG_RETURN_VARCHAR_P(source);
622 :
623 : /* only reach here if string is too long... */
624 :
625 : /* truncate multibyte string preserving multibyte boundary */
626 120 : maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
627 :
628 120 : if (!isExplicit)
629 : {
630 114 : for (i = maxmblen; i < len; i++)
631 102 : if (s_data[i] != ' ')
632 42 : ereport(ERROR,
633 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
634 : errmsg("value too long for type character varying(%d)",
635 : maxlen)));
636 : }
637 :
638 78 : PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
639 : maxmblen));
640 : }
641 :
642 : Datum
643 1256 : varchartypmodin(PG_FUNCTION_ARGS)
644 : {
645 1256 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
646 :
647 1256 : PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
648 : }
649 :
650 : Datum
651 186 : varchartypmodout(PG_FUNCTION_ARGS)
652 : {
653 186 : int32 typmod = PG_GETARG_INT32(0);
654 :
655 186 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
656 : }
657 :
658 :
659 : /*****************************************************************************
660 : * Exported functions
661 : *****************************************************************************/
662 :
663 : /* "True" length (not counting trailing blanks) of a BpChar */
664 : static inline int
665 258502 : bcTruelen(BpChar *arg)
666 : {
667 258502 : return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
668 : }
669 :
670 : int
671 451934 : bpchartruelen(char *s, int len)
672 : {
673 : int i;
674 :
675 : /*
676 : * Note that we rely on the assumption that ' ' is a singleton unit on
677 : * every supported multibyte server encoding.
678 : */
679 12637224 : for (i = len - 1; i >= 0; i--)
680 : {
681 12503154 : if (s[i] != ' ')
682 317864 : break;
683 : }
684 451934 : return i + 1;
685 : }
686 :
687 : Datum
688 6 : bpcharlen(PG_FUNCTION_ARGS)
689 : {
690 6 : BpChar *arg = PG_GETARG_BPCHAR_PP(0);
691 : int len;
692 :
693 : /* get number of bytes, ignoring trailing spaces */
694 6 : len = bcTruelen(arg);
695 :
696 : /* in multibyte encoding, convert to number of characters */
697 6 : if (pg_database_encoding_max_length() != 1)
698 6 : len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
699 :
700 6 : PG_RETURN_INT32(len);
701 : }
702 :
703 : Datum
704 0 : bpcharoctetlen(PG_FUNCTION_ARGS)
705 : {
706 0 : Datum arg = PG_GETARG_DATUM(0);
707 :
708 : /* We need not detoast the input at all */
709 0 : PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
710 : }
711 :
712 :
713 : /*****************************************************************************
714 : * Comparison Functions used for bpchar
715 : *
716 : * Note: btree indexes need these routines not to leak memory; therefore,
717 : * be careful to free working copies of toasted datums. Most places don't
718 : * need to be so careful.
719 : *****************************************************************************/
720 :
721 : static void
722 25546 : check_collation_set(Oid collid)
723 : {
724 25546 : if (!OidIsValid(collid))
725 : {
726 : /*
727 : * This typically means that the parser could not resolve a conflict
728 : * of implicit collations, so report it that way.
729 : */
730 0 : ereport(ERROR,
731 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
732 : errmsg("could not determine which collation to use for string comparison"),
733 : errhint("Use the COLLATE clause to set the collation explicitly.")));
734 : }
735 25546 : }
736 :
737 : Datum
738 19162 : bpchareq(PG_FUNCTION_ARGS)
739 : {
740 19162 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
741 19162 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
742 : int len1,
743 : len2;
744 : bool result;
745 19162 : Oid collid = PG_GET_COLLATION();
746 19162 : bool locale_is_c = false;
747 19162 : pg_locale_t mylocale = 0;
748 :
749 19162 : check_collation_set(collid);
750 :
751 19162 : len1 = bcTruelen(arg1);
752 19162 : len2 = bcTruelen(arg2);
753 :
754 19162 : if (lc_collate_is_c(collid))
755 48 : locale_is_c = true;
756 : else
757 19114 : mylocale = pg_newlocale_from_collation(collid);
758 :
759 19162 : if (locale_is_c || !mylocale || mylocale->deterministic)
760 : {
761 : /*
762 : * Since we only care about equality or not-equality, we can avoid all
763 : * the expense of strcoll() here, and just do bitwise comparison.
764 : */
765 19162 : if (len1 != len2)
766 2490 : result = false;
767 : else
768 16672 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
769 : }
770 : else
771 : {
772 0 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
773 : collid) == 0);
774 : }
775 :
776 19162 : PG_FREE_IF_COPY(arg1, 0);
777 19162 : PG_FREE_IF_COPY(arg2, 1);
778 :
779 19162 : PG_RETURN_BOOL(result);
780 : }
781 :
782 : Datum
783 6384 : bpcharne(PG_FUNCTION_ARGS)
784 : {
785 6384 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
786 6384 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
787 : int len1,
788 : len2;
789 : bool result;
790 6384 : Oid collid = PG_GET_COLLATION();
791 6384 : bool locale_is_c = false;
792 6384 : pg_locale_t mylocale = 0;
793 :
794 6384 : check_collation_set(collid);
795 :
796 6384 : len1 = bcTruelen(arg1);
797 6384 : len2 = bcTruelen(arg2);
798 :
799 6384 : if (lc_collate_is_c(collid))
800 0 : locale_is_c = true;
801 : else
802 6384 : mylocale = pg_newlocale_from_collation(collid);
803 :
804 6384 : if (locale_is_c || !mylocale || mylocale->deterministic)
805 : {
806 : /*
807 : * Since we only care about equality or not-equality, we can avoid all
808 : * the expense of strcoll() here, and just do bitwise comparison.
809 : */
810 6384 : if (len1 != len2)
811 2022 : result = true;
812 : else
813 4362 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
814 : }
815 : else
816 : {
817 0 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
818 : collid) != 0);
819 : }
820 :
821 6384 : PG_FREE_IF_COPY(arg1, 0);
822 6384 : PG_FREE_IF_COPY(arg2, 1);
823 :
824 6384 : PG_RETURN_BOOL(result);
825 : }
826 :
827 : Datum
828 4762 : bpcharlt(PG_FUNCTION_ARGS)
829 : {
830 4762 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
831 4762 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
832 : int len1,
833 : len2;
834 : int cmp;
835 :
836 4762 : len1 = bcTruelen(arg1);
837 4762 : len2 = bcTruelen(arg2);
838 :
839 4762 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
840 : PG_GET_COLLATION());
841 :
842 4762 : PG_FREE_IF_COPY(arg1, 0);
843 4762 : PG_FREE_IF_COPY(arg2, 1);
844 :
845 4762 : PG_RETURN_BOOL(cmp < 0);
846 : }
847 :
848 : Datum
849 5546 : bpcharle(PG_FUNCTION_ARGS)
850 : {
851 5546 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
852 5546 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
853 : int len1,
854 : len2;
855 : int cmp;
856 :
857 5546 : len1 = bcTruelen(arg1);
858 5546 : len2 = bcTruelen(arg2);
859 :
860 5546 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
861 : PG_GET_COLLATION());
862 :
863 5546 : PG_FREE_IF_COPY(arg1, 0);
864 5546 : PG_FREE_IF_COPY(arg2, 1);
865 :
866 5546 : PG_RETURN_BOOL(cmp <= 0);
867 : }
868 :
869 : Datum
870 4998 : bpchargt(PG_FUNCTION_ARGS)
871 : {
872 4998 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
873 4998 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
874 : int len1,
875 : len2;
876 : int cmp;
877 :
878 4998 : len1 = bcTruelen(arg1);
879 4998 : len2 = bcTruelen(arg2);
880 :
881 4998 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
882 : PG_GET_COLLATION());
883 :
884 4998 : PG_FREE_IF_COPY(arg1, 0);
885 4998 : PG_FREE_IF_COPY(arg2, 1);
886 :
887 4998 : PG_RETURN_BOOL(cmp > 0);
888 : }
889 :
890 : Datum
891 5720 : bpcharge(PG_FUNCTION_ARGS)
892 : {
893 5720 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
894 5720 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
895 : int len1,
896 : len2;
897 : int cmp;
898 :
899 5720 : len1 = bcTruelen(arg1);
900 5720 : len2 = bcTruelen(arg2);
901 :
902 5720 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
903 : PG_GET_COLLATION());
904 :
905 5720 : PG_FREE_IF_COPY(arg1, 0);
906 5720 : PG_FREE_IF_COPY(arg2, 1);
907 :
908 5720 : PG_RETURN_BOOL(cmp >= 0);
909 : }
910 :
911 : Datum
912 81012 : bpcharcmp(PG_FUNCTION_ARGS)
913 : {
914 81012 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
915 81012 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
916 : int len1,
917 : len2;
918 : int cmp;
919 :
920 81012 : len1 = bcTruelen(arg1);
921 81012 : len2 = bcTruelen(arg2);
922 :
923 81012 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
924 : PG_GET_COLLATION());
925 :
926 81012 : PG_FREE_IF_COPY(arg1, 0);
927 81012 : PG_FREE_IF_COPY(arg2, 1);
928 :
929 81012 : PG_RETURN_INT32(cmp);
930 : }
931 :
932 : Datum
933 672 : bpchar_sortsupport(PG_FUNCTION_ARGS)
934 : {
935 672 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
936 672 : Oid collid = ssup->ssup_collation;
937 : MemoryContext oldcontext;
938 :
939 672 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
940 :
941 : /* Use generic string SortSupport */
942 672 : varstr_sortsupport(ssup, BPCHAROID, collid);
943 :
944 672 : MemoryContextSwitchTo(oldcontext);
945 :
946 672 : PG_RETURN_VOID();
947 : }
948 :
949 : Datum
950 0 : bpchar_larger(PG_FUNCTION_ARGS)
951 : {
952 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
953 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
954 : int len1,
955 : len2;
956 : int cmp;
957 :
958 0 : len1 = bcTruelen(arg1);
959 0 : len2 = bcTruelen(arg2);
960 :
961 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
962 : PG_GET_COLLATION());
963 :
964 0 : PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
965 : }
966 :
967 : Datum
968 0 : bpchar_smaller(PG_FUNCTION_ARGS)
969 : {
970 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
971 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
972 : int len1,
973 : len2;
974 : int cmp;
975 :
976 0 : len1 = bcTruelen(arg1);
977 0 : len2 = bcTruelen(arg2);
978 :
979 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
980 : PG_GET_COLLATION());
981 :
982 0 : PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
983 : }
984 :
985 :
986 : /*
987 : * bpchar needs a specialized hash function because we want to ignore
988 : * trailing blanks in comparisons.
989 : */
990 : Datum
991 3112 : hashbpchar(PG_FUNCTION_ARGS)
992 : {
993 3112 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
994 3112 : Oid collid = PG_GET_COLLATION();
995 : char *keydata;
996 : int keylen;
997 3112 : pg_locale_t mylocale = 0;
998 : Datum result;
999 :
1000 3112 : if (!collid)
1001 0 : ereport(ERROR,
1002 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1003 : errmsg("could not determine which collation to use for string hashing"),
1004 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1005 :
1006 3112 : keydata = VARDATA_ANY(key);
1007 3112 : keylen = bcTruelen(key);
1008 :
1009 3112 : if (!lc_collate_is_c(collid))
1010 3112 : mylocale = pg_newlocale_from_collation(collid);
1011 :
1012 3112 : if (!mylocale || mylocale->deterministic)
1013 : {
1014 3112 : result = hash_any((unsigned char *) keydata, keylen);
1015 : }
1016 : else
1017 : {
1018 : #ifdef USE_ICU
1019 : if (mylocale->provider == COLLPROVIDER_ICU)
1020 : {
1021 : int32_t ulen = -1;
1022 : UChar *uchar = NULL;
1023 : Size bsize;
1024 : uint8_t *buf;
1025 :
1026 : ulen = icu_to_uchar(&uchar, keydata, keylen);
1027 :
1028 : bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1029 : uchar, ulen, NULL, 0);
1030 : buf = palloc(bsize);
1031 : ucol_getSortKey(mylocale->info.icu.ucol,
1032 : uchar, ulen, buf, bsize);
1033 :
1034 : result = hash_any(buf, bsize);
1035 :
1036 : pfree(buf);
1037 : }
1038 : else
1039 : #endif
1040 : /* shouldn't happen */
1041 0 : elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1042 : }
1043 :
1044 : /* Avoid leaking memory for toasted inputs */
1045 3112 : PG_FREE_IF_COPY(key, 0);
1046 :
1047 3112 : return result;
1048 : }
1049 :
1050 : Datum
1051 60 : hashbpcharextended(PG_FUNCTION_ARGS)
1052 : {
1053 60 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1054 60 : Oid collid = PG_GET_COLLATION();
1055 : char *keydata;
1056 : int keylen;
1057 60 : pg_locale_t mylocale = 0;
1058 : Datum result;
1059 :
1060 60 : if (!collid)
1061 0 : ereport(ERROR,
1062 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1063 : errmsg("could not determine which collation to use for string hashing"),
1064 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1065 :
1066 60 : keydata = VARDATA_ANY(key);
1067 60 : keylen = bcTruelen(key);
1068 :
1069 60 : if (!lc_collate_is_c(collid))
1070 60 : mylocale = pg_newlocale_from_collation(collid);
1071 :
1072 60 : if (!mylocale || mylocale->deterministic)
1073 : {
1074 60 : result = hash_any_extended((unsigned char *) keydata, keylen,
1075 60 : PG_GETARG_INT64(1));
1076 : }
1077 : else
1078 : {
1079 : #ifdef USE_ICU
1080 : if (mylocale->provider == COLLPROVIDER_ICU)
1081 : {
1082 : int32_t ulen = -1;
1083 : UChar *uchar = NULL;
1084 : Size bsize;
1085 : uint8_t *buf;
1086 :
1087 : ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
1088 :
1089 : bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1090 : uchar, ulen, NULL, 0);
1091 : buf = palloc(bsize);
1092 : ucol_getSortKey(mylocale->info.icu.ucol,
1093 : uchar, ulen, buf, bsize);
1094 :
1095 : result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
1096 :
1097 : pfree(buf);
1098 : }
1099 : else
1100 : #endif
1101 : /* shouldn't happen */
1102 0 : elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1103 : }
1104 :
1105 60 : PG_FREE_IF_COPY(key, 0);
1106 :
1107 60 : return result;
1108 : }
1109 :
1110 : /*
1111 : * The following operators support character-by-character comparison
1112 : * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1113 : * Note that the regular bpchareq/bpcharne comparison operators, and
1114 : * regular support functions 1 and 2 with "C" collation are assumed to be
1115 : * compatible with these!
1116 : */
1117 :
1118 : static int
1119 78 : internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1120 : {
1121 : int result;
1122 : int len1,
1123 : len2;
1124 :
1125 78 : len1 = bcTruelen(arg1);
1126 78 : len2 = bcTruelen(arg2);
1127 :
1128 78 : result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1129 78 : if (result != 0)
1130 48 : return result;
1131 30 : else if (len1 < len2)
1132 0 : return -1;
1133 30 : else if (len1 > len2)
1134 0 : return 1;
1135 : else
1136 30 : return 0;
1137 : }
1138 :
1139 :
1140 : Datum
1141 0 : bpchar_pattern_lt(PG_FUNCTION_ARGS)
1142 : {
1143 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1144 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1145 : int result;
1146 :
1147 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1148 :
1149 0 : PG_FREE_IF_COPY(arg1, 0);
1150 0 : PG_FREE_IF_COPY(arg2, 1);
1151 :
1152 0 : PG_RETURN_BOOL(result < 0);
1153 : }
1154 :
1155 :
1156 : Datum
1157 0 : bpchar_pattern_le(PG_FUNCTION_ARGS)
1158 : {
1159 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1160 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1161 : int result;
1162 :
1163 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1164 :
1165 0 : PG_FREE_IF_COPY(arg1, 0);
1166 0 : PG_FREE_IF_COPY(arg2, 1);
1167 :
1168 0 : PG_RETURN_BOOL(result <= 0);
1169 : }
1170 :
1171 :
1172 : Datum
1173 0 : bpchar_pattern_ge(PG_FUNCTION_ARGS)
1174 : {
1175 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1176 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1177 : int result;
1178 :
1179 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1180 :
1181 0 : PG_FREE_IF_COPY(arg1, 0);
1182 0 : PG_FREE_IF_COPY(arg2, 1);
1183 :
1184 0 : PG_RETURN_BOOL(result >= 0);
1185 : }
1186 :
1187 :
1188 : Datum
1189 0 : bpchar_pattern_gt(PG_FUNCTION_ARGS)
1190 : {
1191 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1192 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1193 : int result;
1194 :
1195 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1196 :
1197 0 : PG_FREE_IF_COPY(arg1, 0);
1198 0 : PG_FREE_IF_COPY(arg2, 1);
1199 :
1200 0 : PG_RETURN_BOOL(result > 0);
1201 : }
1202 :
1203 :
1204 : Datum
1205 78 : btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1206 : {
1207 78 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1208 78 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1209 : int result;
1210 :
1211 78 : result = internal_bpchar_pattern_compare(arg1, arg2);
1212 :
1213 78 : PG_FREE_IF_COPY(arg1, 0);
1214 78 : PG_FREE_IF_COPY(arg2, 1);
1215 :
1216 78 : PG_RETURN_INT32(result);
1217 : }
1218 :
1219 :
1220 : Datum
1221 12 : btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1222 : {
1223 12 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1224 : MemoryContext oldcontext;
1225 :
1226 12 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1227 :
1228 : /* Use generic string SortSupport, forcing "C" collation */
1229 12 : varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1230 :
1231 12 : MemoryContextSwitchTo(oldcontext);
1232 :
1233 12 : PG_RETURN_VOID();
1234 : }
|