Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * varlena.c
4 : * Functions for the variable-length built-in types.
5 : *
6 : * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/varlena.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <ctype.h>
18 : #include <limits.h>
19 :
20 : #include "access/detoast.h"
21 : #include "catalog/pg_collation.h"
22 : #include "catalog/pg_type.h"
23 : #include "common/hashfn.h"
24 : #include "common/hex.h"
25 : #include "common/int.h"
26 : #include "common/unicode_norm.h"
27 : #include "lib/hyperloglog.h"
28 : #include "libpq/pqformat.h"
29 : #include "miscadmin.h"
30 : #include "nodes/execnodes.h"
31 : #include "parser/scansup.h"
32 : #include "port/pg_bswap.h"
33 : #include "regex/regex.h"
34 : #include "utils/builtins.h"
35 : #include "utils/bytea.h"
36 : #include "utils/lsyscache.h"
37 : #include "utils/memutils.h"
38 : #include "utils/pg_locale.h"
39 : #include "utils/sortsupport.h"
40 : #include "utils/varlena.h"
41 :
42 :
43 : /* GUC variable */
44 : int bytea_output = BYTEA_OUTPUT_HEX;
45 :
46 : typedef struct varlena unknown;
47 : typedef struct varlena VarString;
48 :
49 : /*
50 : * State for text_position_* functions.
51 : */
52 : typedef struct
53 : {
54 : bool is_multibyte; /* T if multibyte encoding */
55 : bool is_multibyte_char_in_char; /* need to check char boundaries? */
56 :
57 : char *str1; /* haystack string */
58 : char *str2; /* needle string */
59 : int len1; /* string lengths in bytes */
60 : int len2;
61 :
62 : /* Skip table for Boyer-Moore-Horspool search algorithm: */
63 : int skiptablemask; /* mask for ANDing with skiptable subscripts */
64 : int skiptable[256]; /* skip distance for given mismatched char */
65 :
66 : char *last_match; /* pointer to last match in 'str1' */
67 :
68 : /*
69 : * Sometimes we need to convert the byte position of a match to a
70 : * character position. These store the last position that was converted,
71 : * so that on the next call, we can continue from that point, rather than
72 : * count characters from the very beginning.
73 : */
74 : char *refpoint; /* pointer within original haystack string */
75 : int refpos; /* 0-based character offset of the same point */
76 : } TextPositionState;
77 :
78 : typedef struct
79 : {
80 : char *buf1; /* 1st string, or abbreviation original string
81 : * buf */
82 : char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
83 : int buflen1;
84 : int buflen2;
85 : int last_len1; /* Length of last buf1 string/strxfrm() input */
86 : int last_len2; /* Length of last buf2 string/strxfrm() blob */
87 : int last_returned; /* Last comparison result (cache) */
88 : bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
89 : bool collate_c;
90 : Oid typid; /* Actual datatype (text/bpchar/bytea/name) */
91 : hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
92 : hyperLogLogState full_card; /* Full key cardinality state */
93 : double prop_card; /* Required cardinality proportion */
94 : pg_locale_t locale;
95 : } VarStringSortSupport;
96 :
97 : /*
98 : * Output data for split_text(): we output either to an array or a table.
99 : * tupstore and tupdesc must be set up in advance to output to a table.
100 : */
101 : typedef struct
102 : {
103 : ArrayBuildState *astate;
104 : Tuplestorestate *tupstore;
105 : TupleDesc tupdesc;
106 : } SplitTextOutputData;
107 :
108 : /*
109 : * This should be large enough that most strings will fit, but small enough
110 : * that we feel comfortable putting it on the stack
111 : */
112 : #define TEXTBUFLEN 1024
113 :
114 : #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
115 : #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
116 : #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
117 : #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
118 : #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
119 :
120 : #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
121 : #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
122 :
123 : static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
124 : static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
125 : static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
126 : static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
127 : static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
128 : static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
129 : static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
130 : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
131 : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
132 : static int32 text_length(Datum str);
133 : static text *text_catenate(text *t1, text *t2);
134 : static text *text_substring(Datum str,
135 : int32 start,
136 : int32 length,
137 : bool length_not_specified);
138 : static text *text_overlay(text *t1, text *t2, int sp, int sl);
139 : static int text_position(text *t1, text *t2, Oid collid);
140 : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
141 : static bool text_position_next(TextPositionState *state);
142 : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
143 : static char *text_position_get_match_ptr(TextPositionState *state);
144 : static int text_position_get_match_pos(TextPositionState *state);
145 : static void text_position_cleanup(TextPositionState *state);
146 : static void check_collation_set(Oid collid);
147 : static int text_cmp(text *arg1, text *arg2, Oid collid);
148 : static bytea *bytea_catenate(bytea *t1, bytea *t2);
149 : static bytea *bytea_substring(Datum str,
150 : int S,
151 : int L,
152 : bool length_not_specified);
153 : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
154 : static void appendStringInfoText(StringInfo str, const text *t);
155 : static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
156 : static void split_text_accum_result(SplitTextOutputData *tstate,
157 : text *field_value,
158 : text *null_string,
159 : Oid collation);
160 : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
161 : const char *fldsep, const char *null_string);
162 : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
163 : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
164 : int *value);
165 : static const char *text_format_parse_format(const char *start_ptr,
166 : const char *end_ptr,
167 : int *argpos, int *widthpos,
168 : int *flags, int *width);
169 : static void text_format_string_conversion(StringInfo buf, char conversion,
170 : FmgrInfo *typOutputInfo,
171 : Datum value, bool isNull,
172 : int flags, int width);
173 : static void text_format_append_string(StringInfo buf, const char *str,
174 : int flags, int width);
175 :
176 :
177 : /*****************************************************************************
178 : * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
179 : *****************************************************************************/
180 :
181 : /*
182 : * cstring_to_text
183 : *
184 : * Create a text value from a null-terminated C string.
185 : *
186 : * The new text value is freshly palloc'd with a full-size VARHDR.
187 : */
188 : text *
189 7416090 : cstring_to_text(const char *s)
190 : {
191 7416090 : return cstring_to_text_with_len(s, strlen(s));
192 : }
193 :
194 : /*
195 : * cstring_to_text_with_len
196 : *
197 : * Same as cstring_to_text except the caller specifies the string length;
198 : * the string need not be null_terminated.
199 : */
200 : text *
201 11052248 : cstring_to_text_with_len(const char *s, int len)
202 : {
203 11052248 : text *result = (text *) palloc(len + VARHDRSZ);
204 :
205 11052248 : SET_VARSIZE(result, len + VARHDRSZ);
206 11052248 : memcpy(VARDATA(result), s, len);
207 :
208 11052248 : return result;
209 : }
210 :
211 : /*
212 : * text_to_cstring
213 : *
214 : * Create a palloc'd, null-terminated C string from a text value.
215 : *
216 : * We support being passed a compressed or toasted text value.
217 : * This is a bit bogus since such values shouldn't really be referred to as
218 : * "text *", but it seems useful for robustness. If we didn't handle that
219 : * case here, we'd need another routine that did, anyway.
220 : */
221 : char *
222 5830260 : text_to_cstring(const text *t)
223 : {
224 : /* must cast away the const, unfortunately */
225 5830260 : text *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
226 5830260 : int len = VARSIZE_ANY_EXHDR(tunpacked);
227 : char *result;
228 :
229 5830260 : result = (char *) palloc(len + 1);
230 5830260 : memcpy(result, VARDATA_ANY(tunpacked), len);
231 5830260 : result[len] = '\0';
232 :
233 5830260 : if (tunpacked != t)
234 74324 : pfree(tunpacked);
235 :
236 5830260 : return result;
237 : }
238 :
239 : /*
240 : * text_to_cstring_buffer
241 : *
242 : * Copy a text value into a caller-supplied buffer of size dst_len.
243 : *
244 : * The text string is truncated if necessary to fit. The result is
245 : * guaranteed null-terminated (unless dst_len == 0).
246 : *
247 : * We support being passed a compressed or toasted text value.
248 : * This is a bit bogus since such values shouldn't really be referred to as
249 : * "text *", but it seems useful for robustness. If we didn't handle that
250 : * case here, we'd need another routine that did, anyway.
251 : */
252 : void
253 390 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
254 : {
255 : /* must cast away the const, unfortunately */
256 390 : text *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
257 390 : size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
258 :
259 390 : if (dst_len > 0)
260 : {
261 390 : dst_len--;
262 390 : if (dst_len >= src_len)
263 390 : dst_len = src_len;
264 : else /* ensure truncation is encoding-safe */
265 0 : dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
266 390 : memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
267 390 : dst[dst_len] = '\0';
268 : }
269 :
270 390 : if (srcunpacked != src)
271 0 : pfree(srcunpacked);
272 390 : }
273 :
274 :
275 : /*****************************************************************************
276 : * USER I/O ROUTINES *
277 : *****************************************************************************/
278 :
279 :
280 : #define VAL(CH) ((CH) - '0')
281 : #define DIG(VAL) ((VAL) + '0')
282 :
283 : /*
284 : * byteain - converts from printable representation of byte array
285 : *
286 : * Non-printable characters must be passed as '\nnn' (octal) and are
287 : * converted to internal form. '\' must be passed as '\\'.
288 : * ereport(ERROR, ...) if bad form.
289 : *
290 : * BUGS:
291 : * The input is scanned twice.
292 : * The error checking of input is minimal.
293 : */
294 : Datum
295 10956 : byteain(PG_FUNCTION_ARGS)
296 : {
297 10956 : char *inputText = PG_GETARG_CSTRING(0);
298 : char *tp;
299 : char *rp;
300 : int bc;
301 : bytea *result;
302 :
303 : /* Recognize hex input */
304 10956 : if (inputText[0] == '\\' && inputText[1] == 'x')
305 : {
306 96 : size_t len = strlen(inputText);
307 96 : uint64 dstlen = pg_hex_dec_len(len - 2);
308 :
309 96 : bc = dstlen + VARHDRSZ; /* maximum possible length */
310 96 : result = palloc(bc);
311 :
312 96 : bc = pg_hex_decode(inputText + 2, len - 2, VARDATA(result), dstlen);
313 88 : SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
314 :
315 88 : PG_RETURN_BYTEA_P(result);
316 : }
317 :
318 : /* Else, it's the traditional escaped style */
319 200316 : for (bc = 0, tp = inputText; *tp != '\0'; bc++)
320 : {
321 189460 : if (tp[0] != '\\')
322 188718 : tp++;
323 742 : else if ((tp[0] == '\\') &&
324 742 : (tp[1] >= '0' && tp[1] <= '3') &&
325 738 : (tp[2] >= '0' && tp[2] <= '7') &&
326 738 : (tp[3] >= '0' && tp[3] <= '7'))
327 738 : tp += 4;
328 4 : else if ((tp[0] == '\\') &&
329 4 : (tp[1] == '\\'))
330 0 : tp += 2;
331 : else
332 : {
333 : /*
334 : * one backslash, not followed by another or ### valid octal
335 : */
336 4 : ereport(ERROR,
337 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
338 : errmsg("invalid input syntax for type %s", "bytea")));
339 : }
340 : }
341 :
342 10856 : bc += VARHDRSZ;
343 :
344 10856 : result = (bytea *) palloc(bc);
345 10856 : SET_VARSIZE(result, bc);
346 :
347 10856 : tp = inputText;
348 10856 : rp = VARDATA(result);
349 200304 : while (*tp != '\0')
350 : {
351 189448 : if (tp[0] != '\\')
352 188710 : *rp++ = *tp++;
353 738 : else if ((tp[0] == '\\') &&
354 738 : (tp[1] >= '0' && tp[1] <= '3') &&
355 738 : (tp[2] >= '0' && tp[2] <= '7') &&
356 738 : (tp[3] >= '0' && tp[3] <= '7'))
357 : {
358 738 : bc = VAL(tp[1]);
359 738 : bc <<= 3;
360 738 : bc += VAL(tp[2]);
361 738 : bc <<= 3;
362 738 : *rp++ = bc + VAL(tp[3]);
363 :
364 738 : tp += 4;
365 : }
366 0 : else if ((tp[0] == '\\') &&
367 0 : (tp[1] == '\\'))
368 : {
369 0 : *rp++ = '\\';
370 0 : tp += 2;
371 : }
372 : else
373 : {
374 : /*
375 : * We should never get here. The first pass should not allow it.
376 : */
377 0 : ereport(ERROR,
378 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
379 : errmsg("invalid input syntax for type %s", "bytea")));
380 : }
381 : }
382 :
383 10856 : PG_RETURN_BYTEA_P(result);
384 : }
385 :
386 : /*
387 : * byteaout - converts to printable representation of byte array
388 : *
389 : * In the traditional escaped format, non-printable characters are
390 : * printed as '\nnn' (octal) and '\' as '\\'.
391 : */
392 : Datum
393 4938 : byteaout(PG_FUNCTION_ARGS)
394 : {
395 4938 : bytea *vlena = PG_GETARG_BYTEA_PP(0);
396 : char *result;
397 : char *rp;
398 :
399 4938 : if (bytea_output == BYTEA_OUTPUT_HEX)
400 : {
401 4708 : uint64 dstlen = pg_hex_enc_len(VARSIZE_ANY_EXHDR(vlena));
402 :
403 : /* Print hex format */
404 4708 : rp = result = palloc(dstlen + 2 + 1);
405 4708 : *rp++ = '\\';
406 4708 : *rp++ = 'x';
407 :
408 4708 : rp += pg_hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp,
409 : dstlen);
410 : }
411 230 : else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
412 : {
413 : /* Print traditional escaped format */
414 : char *vp;
415 : uint64 len;
416 : int i;
417 :
418 230 : len = 1; /* empty string has 1 char */
419 230 : vp = VARDATA_ANY(vlena);
420 2204 : for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
421 : {
422 1974 : if (*vp == '\\')
423 0 : len += 2;
424 1974 : else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
425 336 : len += 4;
426 : else
427 1638 : len++;
428 : }
429 :
430 : /*
431 : * In principle len can't overflow uint32 if the input fit in 1GB, but
432 : * for safety let's check rather than relying on palloc's internal
433 : * check.
434 : */
435 230 : if (len > MaxAllocSize)
436 0 : ereport(ERROR,
437 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
438 : errmsg_internal("result of bytea output conversion is too large")));
439 230 : rp = result = (char *) palloc(len);
440 :
441 230 : vp = VARDATA_ANY(vlena);
442 2204 : for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
443 : {
444 1974 : if (*vp == '\\')
445 : {
446 0 : *rp++ = '\\';
447 0 : *rp++ = '\\';
448 : }
449 1974 : else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
450 336 : {
451 : int val; /* holds unprintable chars */
452 :
453 336 : val = *vp;
454 336 : rp[0] = '\\';
455 336 : rp[3] = DIG(val & 07);
456 336 : val >>= 3;
457 336 : rp[2] = DIG(val & 07);
458 336 : val >>= 3;
459 336 : rp[1] = DIG(val & 03);
460 336 : rp += 4;
461 : }
462 : else
463 1638 : *rp++ = *vp;
464 : }
465 : }
466 : else
467 : {
468 0 : elog(ERROR, "unrecognized bytea_output setting: %d",
469 : bytea_output);
470 : rp = result = NULL; /* keep compiler quiet */
471 : }
472 4938 : *rp = '\0';
473 4938 : PG_RETURN_CSTRING(result);
474 : }
475 :
476 : /*
477 : * bytearecv - converts external binary format to bytea
478 : */
479 : Datum
480 710 : bytearecv(PG_FUNCTION_ARGS)
481 : {
482 710 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
483 : bytea *result;
484 : int nbytes;
485 :
486 710 : nbytes = buf->len - buf->cursor;
487 710 : result = (bytea *) palloc(nbytes + VARHDRSZ);
488 710 : SET_VARSIZE(result, nbytes + VARHDRSZ);
489 710 : pq_copymsgbytes(buf, VARDATA(result), nbytes);
490 710 : PG_RETURN_BYTEA_P(result);
491 : }
492 :
493 : /*
494 : * byteasend - converts bytea to binary format
495 : *
496 : * This is a special case: just copy the input...
497 : */
498 : Datum
499 4818 : byteasend(PG_FUNCTION_ARGS)
500 : {
501 4818 : bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
502 :
503 4818 : PG_RETURN_BYTEA_P(vlena);
504 : }
505 :
506 : Datum
507 32760 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
508 : {
509 : StringInfo state;
510 :
511 32760 : state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
512 :
513 : /* Append the value unless null. */
514 32760 : if (!PG_ARGISNULL(1))
515 : {
516 32760 : bytea *value = PG_GETARG_BYTEA_PP(1);
517 :
518 : /* On the first time through, we ignore the delimiter. */
519 32760 : if (state == NULL)
520 18 : state = makeStringAggState(fcinfo);
521 32742 : else if (!PG_ARGISNULL(2))
522 : {
523 32738 : bytea *delim = PG_GETARG_BYTEA_PP(2);
524 :
525 32738 : appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
526 : }
527 :
528 32760 : appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
529 : }
530 :
531 : /*
532 : * The transition type for string_agg() is declared to be "internal",
533 : * which is a pass-by-value type the same size as a pointer.
534 : */
535 32760 : PG_RETURN_POINTER(state);
536 : }
537 :
538 : Datum
539 22 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
540 : {
541 : StringInfo state;
542 :
543 : /* cannot be called directly because of internal-type argument */
544 : Assert(AggCheckCallContext(fcinfo, NULL));
545 :
546 22 : state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
547 :
548 22 : if (state != NULL)
549 : {
550 : bytea *result;
551 :
552 18 : result = (bytea *) palloc(state->len + VARHDRSZ);
553 18 : SET_VARSIZE(result, state->len + VARHDRSZ);
554 18 : memcpy(VARDATA(result), state->data, state->len);
555 18 : PG_RETURN_BYTEA_P(result);
556 : }
557 : else
558 4 : PG_RETURN_NULL();
559 : }
560 :
561 : /*
562 : * textin - converts "..." to internal representation
563 : */
564 : Datum
565 5767454 : textin(PG_FUNCTION_ARGS)
566 : {
567 5767454 : char *inputText = PG_GETARG_CSTRING(0);
568 :
569 5767454 : PG_RETURN_TEXT_P(cstring_to_text(inputText));
570 : }
571 :
572 : /*
573 : * textout - converts internal representation to "..."
574 : */
575 : Datum
576 2367168 : textout(PG_FUNCTION_ARGS)
577 : {
578 2367168 : Datum txt = PG_GETARG_DATUM(0);
579 :
580 2367168 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
581 : }
582 :
583 : /*
584 : * textrecv - converts external binary format to text
585 : */
586 : Datum
587 53366 : textrecv(PG_FUNCTION_ARGS)
588 : {
589 53366 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
590 : text *result;
591 : char *str;
592 : int nbytes;
593 :
594 53366 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
595 :
596 53366 : result = cstring_to_text_with_len(str, nbytes);
597 53366 : pfree(str);
598 53366 : PG_RETURN_TEXT_P(result);
599 : }
600 :
601 : /*
602 : * textsend - converts text to binary format
603 : */
604 : Datum
605 36050 : textsend(PG_FUNCTION_ARGS)
606 : {
607 36050 : text *t = PG_GETARG_TEXT_PP(0);
608 : StringInfoData buf;
609 :
610 36050 : pq_begintypsend(&buf);
611 36050 : pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
612 36050 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
613 : }
614 :
615 :
616 : /*
617 : * unknownin - converts "..." to internal representation
618 : */
619 : Datum
620 0 : unknownin(PG_FUNCTION_ARGS)
621 : {
622 0 : char *str = PG_GETARG_CSTRING(0);
623 :
624 : /* representation is same as cstring */
625 0 : PG_RETURN_CSTRING(pstrdup(str));
626 : }
627 :
628 : /*
629 : * unknownout - converts internal representation to "..."
630 : */
631 : Datum
632 318 : unknownout(PG_FUNCTION_ARGS)
633 : {
634 : /* representation is same as cstring */
635 318 : char *str = PG_GETARG_CSTRING(0);
636 :
637 318 : PG_RETURN_CSTRING(pstrdup(str));
638 : }
639 :
640 : /*
641 : * unknownrecv - converts external binary format to unknown
642 : */
643 : Datum
644 0 : unknownrecv(PG_FUNCTION_ARGS)
645 : {
646 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
647 : char *str;
648 : int nbytes;
649 :
650 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
651 : /* representation is same as cstring */
652 0 : PG_RETURN_CSTRING(str);
653 : }
654 :
655 : /*
656 : * unknownsend - converts unknown to binary format
657 : */
658 : Datum
659 0 : unknownsend(PG_FUNCTION_ARGS)
660 : {
661 : /* representation is same as cstring */
662 0 : char *str = PG_GETARG_CSTRING(0);
663 : StringInfoData buf;
664 :
665 0 : pq_begintypsend(&buf);
666 0 : pq_sendtext(&buf, str, strlen(str));
667 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
668 : }
669 :
670 :
671 : /* ========== PUBLIC ROUTINES ========== */
672 :
673 : /*
674 : * textlen -
675 : * returns the logical length of a text*
676 : * (which is less than the VARSIZE of the text*)
677 : */
678 : Datum
679 202646 : textlen(PG_FUNCTION_ARGS)
680 : {
681 202646 : Datum str = PG_GETARG_DATUM(0);
682 :
683 : /* try to avoid decompressing argument */
684 202646 : PG_RETURN_INT32(text_length(str));
685 : }
686 :
687 : /*
688 : * text_length -
689 : * Does the real work for textlen()
690 : *
691 : * This is broken out so it can be called directly by other string processing
692 : * functions. Note that the argument is passed as a Datum, to indicate that
693 : * it may still be in compressed form. We can avoid decompressing it at all
694 : * in some cases.
695 : */
696 : static int32
697 202654 : text_length(Datum str)
698 : {
699 : /* fastpath when max encoding length is one */
700 202654 : if (pg_database_encoding_max_length() == 1)
701 24 : PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
702 : else
703 : {
704 202630 : text *t = DatumGetTextPP(str);
705 :
706 202630 : PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
707 : VARSIZE_ANY_EXHDR(t)));
708 : }
709 : }
710 :
711 : /*
712 : * textoctetlen -
713 : * returns the physical length of a text*
714 : * (which is less than the VARSIZE of the text*)
715 : */
716 : Datum
717 58 : textoctetlen(PG_FUNCTION_ARGS)
718 : {
719 58 : Datum str = PG_GETARG_DATUM(0);
720 :
721 : /* We need not detoast the input at all */
722 58 : PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
723 : }
724 :
725 : /*
726 : * textcat -
727 : * takes two text* and returns a text* that is the concatenation of
728 : * the two.
729 : *
730 : * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
731 : * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
732 : * Allocate space for output in all cases.
733 : * XXX - thomas 1997-07-10
734 : */
735 : Datum
736 1794664 : textcat(PG_FUNCTION_ARGS)
737 : {
738 1794664 : text *t1 = PG_GETARG_TEXT_PP(0);
739 1794664 : text *t2 = PG_GETARG_TEXT_PP(1);
740 :
741 1794664 : PG_RETURN_TEXT_P(text_catenate(t1, t2));
742 : }
743 :
744 : /*
745 : * text_catenate
746 : * Guts of textcat(), broken out so it can be used by other functions
747 : *
748 : * Arguments can be in short-header form, but not compressed or out-of-line
749 : */
750 : static text *
751 1794728 : text_catenate(text *t1, text *t2)
752 : {
753 : text *result;
754 : int len1,
755 : len2,
756 : len;
757 : char *ptr;
758 :
759 1794728 : len1 = VARSIZE_ANY_EXHDR(t1);
760 1794728 : len2 = VARSIZE_ANY_EXHDR(t2);
761 :
762 : /* paranoia ... probably should throw error instead? */
763 1794728 : if (len1 < 0)
764 0 : len1 = 0;
765 1794728 : if (len2 < 0)
766 0 : len2 = 0;
767 :
768 1794728 : len = len1 + len2 + VARHDRSZ;
769 1794728 : result = (text *) palloc(len);
770 :
771 : /* Set size of result string... */
772 1794728 : SET_VARSIZE(result, len);
773 :
774 : /* Fill data field of result string... */
775 1794728 : ptr = VARDATA(result);
776 1794728 : if (len1 > 0)
777 1791850 : memcpy(ptr, VARDATA_ANY(t1), len1);
778 1794728 : if (len2 > 0)
779 1794624 : memcpy(ptr + len1, VARDATA_ANY(t2), len2);
780 :
781 1794728 : return result;
782 : }
783 :
784 : /*
785 : * charlen_to_bytelen()
786 : * Compute the number of bytes occupied by n characters starting at *p
787 : *
788 : * It is caller's responsibility that there actually are n characters;
789 : * the string need not be null-terminated.
790 : */
791 : static int
792 5554 : charlen_to_bytelen(const char *p, int n)
793 : {
794 5554 : if (pg_database_encoding_max_length() == 1)
795 : {
796 : /* Optimization for single-byte encodings */
797 0 : return n;
798 : }
799 : else
800 : {
801 : const char *s;
802 :
803 5772768 : for (s = p; n > 0; n--)
804 5767214 : s += pg_mblen(s);
805 :
806 5554 : return s - p;
807 : }
808 : }
809 :
810 : /*
811 : * text_substr()
812 : * Return a substring starting at the specified position.
813 : * - thomas 1997-12-31
814 : *
815 : * Input:
816 : * - string
817 : * - starting position (is one-based)
818 : * - string length
819 : *
820 : * If the starting position is zero or less, then return from the start of the string
821 : * adjusting the length to be consistent with the "negative start" per SQL.
822 : * If the length is less than zero, return the remaining string.
823 : *
824 : * Added multibyte support.
825 : * - Tatsuo Ishii 1998-4-21
826 : * Changed behavior if starting position is less than one to conform to SQL behavior.
827 : * Formerly returned the entire string; now returns a portion.
828 : * - Thomas Lockhart 1998-12-10
829 : * Now uses faster TOAST-slicing interface
830 : * - John Gray 2002-02-22
831 : * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
832 : * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
833 : * error; if E < 1, return '', not entire string). Fixed MB related bug when
834 : * S > LC and < LC + 4 sometimes garbage characters are returned.
835 : * - Joe Conway 2002-08-10
836 : */
837 : Datum
838 70068 : text_substr(PG_FUNCTION_ARGS)
839 : {
840 70068 : PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
841 : PG_GETARG_INT32(1),
842 : PG_GETARG_INT32(2),
843 : false));
844 : }
845 :
846 : /*
847 : * text_substr_no_len -
848 : * Wrapper to avoid opr_sanity failure due to
849 : * one function accepting a different number of args.
850 : */
851 : Datum
852 26 : text_substr_no_len(PG_FUNCTION_ARGS)
853 : {
854 26 : PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
855 : PG_GETARG_INT32(1),
856 : -1, true));
857 : }
858 :
859 : /*
860 : * text_substring -
861 : * Does the real work for text_substr() and text_substr_no_len()
862 : *
863 : * This is broken out so it can be called directly by other string processing
864 : * functions. Note that the argument is passed as a Datum, to indicate that
865 : * it may still be in compressed/toasted form. We can avoid detoasting all
866 : * of it in some cases.
867 : *
868 : * The result is always a freshly palloc'd datum.
869 : */
870 : static text *
871 96526 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
872 : {
873 96526 : int32 eml = pg_database_encoding_max_length();
874 96526 : int32 S = start; /* start position */
875 : int32 S1; /* adjusted start position */
876 : int32 L1; /* adjusted substring length */
877 : int32 E; /* end position */
878 :
879 : /*
880 : * SQL99 says S can be zero or negative, but we still must fetch from the
881 : * start of the string.
882 : */
883 96526 : S1 = Max(S, 1);
884 :
885 : /* life is easy if the encoding max length is 1 */
886 96526 : if (eml == 1)
887 : {
888 12 : if (length_not_specified) /* special case - get length to end of
889 : * string */
890 0 : L1 = -1;
891 12 : else if (length < 0)
892 : {
893 : /* SQL99 says to throw an error for E < S, i.e., negative length */
894 0 : ereport(ERROR,
895 : (errcode(ERRCODE_SUBSTRING_ERROR),
896 : errmsg("negative substring length not allowed")));
897 : L1 = -1; /* silence stupider compilers */
898 : }
899 12 : else if (pg_add_s32_overflow(S, length, &E))
900 : {
901 : /*
902 : * L could be large enough for S + L to overflow, in which case
903 : * the substring must run to end of string.
904 : */
905 0 : L1 = -1;
906 : }
907 : else
908 : {
909 : /*
910 : * A zero or negative value for the end position can happen if the
911 : * start was negative or one. SQL99 says to return a zero-length
912 : * string.
913 : */
914 12 : if (E < 1)
915 0 : return cstring_to_text("");
916 :
917 12 : L1 = E - S1;
918 : }
919 :
920 : /*
921 : * If the start position is past the end of the string, SQL99 says to
922 : * return a zero-length string -- DatumGetTextPSlice() will do that
923 : * for us. We need only convert S1 to zero-based starting position.
924 : */
925 12 : return DatumGetTextPSlice(str, S1 - 1, L1);
926 : }
927 96514 : else if (eml > 1)
928 : {
929 : /*
930 : * When encoding max length is > 1, we can't get LC without
931 : * detoasting, so we'll grab a conservatively large slice now and go
932 : * back later to do the right thing
933 : */
934 : int32 slice_start;
935 : int32 slice_size;
936 : int32 slice_strlen;
937 : text *slice;
938 : int32 E1;
939 : int32 i;
940 : char *p;
941 : char *s;
942 : text *ret;
943 :
944 : /*
945 : * We need to start at position zero because there is no way to know
946 : * in advance which byte offset corresponds to the supplied start
947 : * position.
948 : */
949 96514 : slice_start = 0;
950 :
951 96514 : if (length_not_specified) /* special case - get length to end of
952 : * string */
953 58 : slice_size = L1 = -1;
954 96456 : else if (length < 0)
955 : {
956 : /* SQL99 says to throw an error for E < S, i.e., negative length */
957 8 : ereport(ERROR,
958 : (errcode(ERRCODE_SUBSTRING_ERROR),
959 : errmsg("negative substring length not allowed")));
960 : slice_size = L1 = -1; /* silence stupider compilers */
961 : }
962 96448 : else if (pg_add_s32_overflow(S, length, &E))
963 : {
964 : /*
965 : * L could be large enough for S + L to overflow, in which case
966 : * the substring must run to end of string.
967 : */
968 4 : slice_size = L1 = -1;
969 : }
970 : else
971 : {
972 : /*
973 : * A zero or negative value for the end position can happen if the
974 : * start was negative or one. SQL99 says to return a zero-length
975 : * string.
976 : */
977 96444 : if (E < 1)
978 0 : return cstring_to_text("");
979 :
980 : /*
981 : * if E is past the end of the string, the tuple toaster will
982 : * truncate the length for us
983 : */
984 96444 : L1 = E - S1;
985 :
986 : /*
987 : * Total slice size in bytes can't be any longer than the start
988 : * position plus substring length times the encoding max length.
989 : * If that overflows, we can just use -1.
990 : */
991 96444 : if (pg_mul_s32_overflow(E, eml, &slice_size))
992 4 : slice_size = -1;
993 : }
994 :
995 : /*
996 : * If we're working with an untoasted source, no need to do an extra
997 : * copying step.
998 : */
999 96506 : if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
1000 96482 : VARATT_IS_EXTERNAL(DatumGetPointer(str)))
1001 68 : slice = DatumGetTextPSlice(str, slice_start, slice_size);
1002 : else
1003 96438 : slice = (text *) DatumGetPointer(str);
1004 :
1005 : /* see if we got back an empty string */
1006 96506 : if (VARSIZE_ANY_EXHDR(slice) == 0)
1007 : {
1008 0 : if (slice != (text *) DatumGetPointer(str))
1009 0 : pfree(slice);
1010 0 : return cstring_to_text("");
1011 : }
1012 :
1013 : /* Now we can get the actual length of the slice in MB characters */
1014 96506 : slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
1015 96506 : VARSIZE_ANY_EXHDR(slice));
1016 :
1017 : /*
1018 : * Check that the start position wasn't > slice_strlen. If so, SQL99
1019 : * says to return a zero-length string.
1020 : */
1021 96506 : if (S1 > slice_strlen)
1022 : {
1023 20 : if (slice != (text *) DatumGetPointer(str))
1024 0 : pfree(slice);
1025 20 : return cstring_to_text("");
1026 : }
1027 :
1028 : /*
1029 : * Adjust L1 and E1 now that we know the slice string length. Again
1030 : * remember that S1 is one based, and slice_start is zero based.
1031 : */
1032 96486 : if (L1 > -1)
1033 96444 : E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
1034 : else
1035 42 : E1 = slice_start + 1 + slice_strlen;
1036 :
1037 : /*
1038 : * Find the start position in the slice; remember S1 is not zero based
1039 : */
1040 96486 : p = VARDATA_ANY(slice);
1041 3296500 : for (i = 0; i < S1 - 1; i++)
1042 3200014 : p += pg_mblen(p);
1043 :
1044 : /* hang onto a pointer to our start position */
1045 96486 : s = p;
1046 :
1047 : /*
1048 : * Count the actual bytes used by the substring of the requested
1049 : * length.
1050 : */
1051 1650486 : for (i = S1; i < E1; i++)
1052 1554000 : p += pg_mblen(p);
1053 :
1054 96486 : ret = (text *) palloc(VARHDRSZ + (p - s));
1055 96486 : SET_VARSIZE(ret, VARHDRSZ + (p - s));
1056 96486 : memcpy(VARDATA(ret), s, (p - s));
1057 :
1058 96486 : if (slice != (text *) DatumGetPointer(str))
1059 68 : pfree(slice);
1060 :
1061 96486 : return ret;
1062 : }
1063 : else
1064 0 : elog(ERROR, "invalid backend encoding: encoding max length < 1");
1065 :
1066 : /* not reached: suppress compiler warning */
1067 : return NULL;
1068 : }
1069 :
1070 : /*
1071 : * textoverlay
1072 : * Replace specified substring of first string with second
1073 : *
1074 : * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1075 : * This code is a direct implementation of what the standard says.
1076 : */
1077 : Datum
1078 24 : textoverlay(PG_FUNCTION_ARGS)
1079 : {
1080 24 : text *t1 = PG_GETARG_TEXT_PP(0);
1081 24 : text *t2 = PG_GETARG_TEXT_PP(1);
1082 24 : int sp = PG_GETARG_INT32(2); /* substring start position */
1083 24 : int sl = PG_GETARG_INT32(3); /* substring length */
1084 :
1085 24 : PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1086 : }
1087 :
1088 : Datum
1089 8 : textoverlay_no_len(PG_FUNCTION_ARGS)
1090 : {
1091 8 : text *t1 = PG_GETARG_TEXT_PP(0);
1092 8 : text *t2 = PG_GETARG_TEXT_PP(1);
1093 8 : int sp = PG_GETARG_INT32(2); /* substring start position */
1094 : int sl;
1095 :
1096 8 : sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1097 8 : PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1098 : }
1099 :
1100 : static text *
1101 32 : text_overlay(text *t1, text *t2, int sp, int sl)
1102 : {
1103 : text *result;
1104 : text *s1;
1105 : text *s2;
1106 : int sp_pl_sl;
1107 :
1108 : /*
1109 : * Check for possible integer-overflow cases. For negative sp, throw a
1110 : * "substring length" error because that's what should be expected
1111 : * according to the spec's definition of OVERLAY().
1112 : */
1113 32 : if (sp <= 0)
1114 0 : ereport(ERROR,
1115 : (errcode(ERRCODE_SUBSTRING_ERROR),
1116 : errmsg("negative substring length not allowed")));
1117 32 : if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1118 0 : ereport(ERROR,
1119 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1120 : errmsg("integer out of range")));
1121 :
1122 32 : s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1123 32 : s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1124 32 : result = text_catenate(s1, t2);
1125 32 : result = text_catenate(result, s2);
1126 :
1127 32 : return result;
1128 : }
1129 :
1130 : /*
1131 : * textpos -
1132 : * Return the position of the specified substring.
1133 : * Implements the SQL POSITION() function.
1134 : * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1135 : * - thomas 1997-07-27
1136 : */
1137 : Datum
1138 92 : textpos(PG_FUNCTION_ARGS)
1139 : {
1140 92 : text *str = PG_GETARG_TEXT_PP(0);
1141 92 : text *search_str = PG_GETARG_TEXT_PP(1);
1142 :
1143 92 : PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
1144 : }
1145 :
1146 : /*
1147 : * text_position -
1148 : * Does the real work for textpos()
1149 : *
1150 : * Inputs:
1151 : * t1 - string to be searched
1152 : * t2 - pattern to match within t1
1153 : * Result:
1154 : * Character index of the first matched char, starting from 1,
1155 : * or 0 if no match.
1156 : *
1157 : * This is broken out so it can be called directly by other string processing
1158 : * functions.
1159 : */
1160 : static int
1161 92 : text_position(text *t1, text *t2, Oid collid)
1162 : {
1163 : TextPositionState state;
1164 : int result;
1165 :
1166 : /* Empty needle always matches at position 1 */
1167 92 : if (VARSIZE_ANY_EXHDR(t2) < 1)
1168 8 : return 1;
1169 :
1170 : /* Otherwise, can't match if haystack is shorter than needle */
1171 84 : if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2))
1172 20 : return 0;
1173 :
1174 64 : text_position_setup(t1, t2, collid, &state);
1175 64 : if (!text_position_next(&state))
1176 22 : result = 0;
1177 : else
1178 42 : result = text_position_get_match_pos(&state);
1179 64 : text_position_cleanup(&state);
1180 64 : return result;
1181 : }
1182 :
1183 :
1184 : /*
1185 : * text_position_setup, text_position_next, text_position_cleanup -
1186 : * Component steps of text_position()
1187 : *
1188 : * These are broken out so that a string can be efficiently searched for
1189 : * multiple occurrences of the same pattern. text_position_next may be
1190 : * called multiple times, and it advances to the next match on each call.
1191 : * text_position_get_match_ptr() and text_position_get_match_pos() return
1192 : * a pointer or 1-based character position of the last match, respectively.
1193 : *
1194 : * The "state" variable is normally just a local variable in the caller.
1195 : *
1196 : * NOTE: text_position_next skips over the matched portion. For example,
1197 : * searching for "xx" in "xxx" returns only one match, not two.
1198 : */
1199 :
1200 : static void
1201 1848 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
1202 : {
1203 1848 : int len1 = VARSIZE_ANY_EXHDR(t1);
1204 1848 : int len2 = VARSIZE_ANY_EXHDR(t2);
1205 1848 : pg_locale_t mylocale = 0;
1206 :
1207 1848 : check_collation_set(collid);
1208 :
1209 1848 : if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1210 0 : mylocale = pg_newlocale_from_collation(collid);
1211 :
1212 1848 : if (mylocale && !mylocale->deterministic)
1213 0 : ereport(ERROR,
1214 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1215 : errmsg("nondeterministic collations are not supported for substring searches")));
1216 :
1217 : Assert(len1 > 0);
1218 : Assert(len2 > 0);
1219 :
1220 : /*
1221 : * Even with a multi-byte encoding, we perform the search using the raw
1222 : * byte sequence, ignoring multibyte issues. For UTF-8, that works fine,
1223 : * because in UTF-8 the byte sequence of one character cannot contain
1224 : * another character. For other multi-byte encodings, we do the search
1225 : * initially as a simple byte search, ignoring multibyte issues, but
1226 : * verify afterwards that the match we found is at a character boundary,
1227 : * and continue the search if it was a false match.
1228 : */
1229 1848 : if (pg_database_encoding_max_length() == 1)
1230 : {
1231 36 : state->is_multibyte = false;
1232 36 : state->is_multibyte_char_in_char = false;
1233 : }
1234 1812 : else if (GetDatabaseEncoding() == PG_UTF8)
1235 : {
1236 1812 : state->is_multibyte = true;
1237 1812 : state->is_multibyte_char_in_char = false;
1238 : }
1239 : else
1240 : {
1241 0 : state->is_multibyte = true;
1242 0 : state->is_multibyte_char_in_char = true;
1243 : }
1244 :
1245 1848 : state->str1 = VARDATA_ANY(t1);
1246 1848 : state->str2 = VARDATA_ANY(t2);
1247 1848 : state->len1 = len1;
1248 1848 : state->len2 = len2;
1249 1848 : state->last_match = NULL;
1250 1848 : state->refpoint = state->str1;
1251 1848 : state->refpos = 0;
1252 :
1253 : /*
1254 : * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1255 : * notes we use the terminology that the "haystack" is the string to be
1256 : * searched (t1) and the "needle" is the pattern being sought (t2).
1257 : *
1258 : * If the needle is empty or bigger than the haystack then there is no
1259 : * point in wasting cycles initializing the table. We also choose not to
1260 : * use B-M-H for needles of length 1, since the skip table can't possibly
1261 : * save anything in that case.
1262 : */
1263 1848 : if (len1 >= len2 && len2 > 1)
1264 : {
1265 1702 : int searchlength = len1 - len2;
1266 : int skiptablemask;
1267 : int last;
1268 : int i;
1269 1702 : const char *str2 = state->str2;
1270 :
1271 : /*
1272 : * First we must determine how much of the skip table to use. The
1273 : * declaration of TextPositionState allows up to 256 elements, but for
1274 : * short search problems we don't really want to have to initialize so
1275 : * many elements --- it would take too long in comparison to the
1276 : * actual search time. So we choose a useful skip table size based on
1277 : * the haystack length minus the needle length. The closer the needle
1278 : * length is to the haystack length the less useful skipping becomes.
1279 : *
1280 : * Note: since we use bit-masking to select table elements, the skip
1281 : * table size MUST be a power of 2, and so the mask must be 2^N-1.
1282 : */
1283 1702 : if (searchlength < 16)
1284 40 : skiptablemask = 3;
1285 1662 : else if (searchlength < 64)
1286 12 : skiptablemask = 7;
1287 1650 : else if (searchlength < 128)
1288 2 : skiptablemask = 15;
1289 1648 : else if (searchlength < 512)
1290 126 : skiptablemask = 31;
1291 1522 : else if (searchlength < 2048)
1292 1428 : skiptablemask = 63;
1293 94 : else if (searchlength < 4096)
1294 32 : skiptablemask = 127;
1295 : else
1296 62 : skiptablemask = 255;
1297 1702 : state->skiptablemask = skiptablemask;
1298 :
1299 : /*
1300 : * Initialize the skip table. We set all elements to the needle
1301 : * length, since this is the correct skip distance for any character
1302 : * not found in the needle.
1303 : */
1304 117382 : for (i = 0; i <= skiptablemask; i++)
1305 115680 : state->skiptable[i] = len2;
1306 :
1307 : /*
1308 : * Now examine the needle. For each character except the last one,
1309 : * set the corresponding table element to the appropriate skip
1310 : * distance. Note that when two characters share the same skip table
1311 : * entry, the one later in the needle must determine the skip
1312 : * distance.
1313 : */
1314 1702 : last = len2 - 1;
1315 :
1316 20938 : for (i = 0; i < last; i++)
1317 19236 : state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1318 : }
1319 1848 : }
1320 :
1321 : /*
1322 : * Advance to the next match, starting from the end of the previous match
1323 : * (or the beginning of the string, on first call). Returns true if a match
1324 : * is found.
1325 : *
1326 : * Note that this refuses to match an empty-string needle. Most callers
1327 : * will have handled that case specially and we'll never see it here.
1328 : */
1329 : static bool
1330 6986 : text_position_next(TextPositionState *state)
1331 : {
1332 6986 : int needle_len = state->len2;
1333 : char *start_ptr;
1334 : char *matchptr;
1335 :
1336 6986 : if (needle_len <= 0)
1337 0 : return false; /* result for empty pattern */
1338 :
1339 : /* Start from the point right after the previous match. */
1340 6986 : if (state->last_match)
1341 5130 : start_ptr = state->last_match + needle_len;
1342 : else
1343 1856 : start_ptr = state->str1;
1344 :
1345 6986 : retry:
1346 6986 : matchptr = text_position_next_internal(start_ptr, state);
1347 :
1348 6986 : if (!matchptr)
1349 1798 : return false;
1350 :
1351 : /*
1352 : * Found a match for the byte sequence. If this is a multibyte encoding,
1353 : * where one character's byte sequence can appear inside a longer
1354 : * multi-byte character, we need to verify that the match was at a
1355 : * character boundary, not in the middle of a multi-byte character.
1356 : */
1357 5188 : if (state->is_multibyte_char_in_char)
1358 : {
1359 : /* Walk one character at a time, until we reach the match. */
1360 :
1361 : /* the search should never move backwards. */
1362 : Assert(state->refpoint <= matchptr);
1363 :
1364 0 : while (state->refpoint < matchptr)
1365 : {
1366 : /* step to next character. */
1367 0 : state->refpoint += pg_mblen(state->refpoint);
1368 0 : state->refpos++;
1369 :
1370 : /*
1371 : * If we stepped over the match's start position, then it was a
1372 : * false positive, where the byte sequence appeared in the middle
1373 : * of a multi-byte character. Skip it, and continue the search at
1374 : * the next character boundary.
1375 : */
1376 0 : if (state->refpoint > matchptr)
1377 : {
1378 0 : start_ptr = state->refpoint;
1379 0 : goto retry;
1380 : }
1381 : }
1382 : }
1383 :
1384 5188 : state->last_match = matchptr;
1385 5188 : return true;
1386 : }
1387 :
1388 : /*
1389 : * Subroutine of text_position_next(). This searches for the raw byte
1390 : * sequence, ignoring any multi-byte encoding issues. Returns the first
1391 : * match starting at 'start_ptr', or NULL if no match is found.
1392 : */
1393 : static char *
1394 6986 : text_position_next_internal(char *start_ptr, TextPositionState *state)
1395 : {
1396 6986 : int haystack_len = state->len1;
1397 6986 : int needle_len = state->len2;
1398 6986 : int skiptablemask = state->skiptablemask;
1399 6986 : const char *haystack = state->str1;
1400 6986 : const char *needle = state->str2;
1401 6986 : const char *haystack_end = &haystack[haystack_len];
1402 : const char *hptr;
1403 :
1404 : Assert(start_ptr >= haystack && start_ptr <= haystack_end);
1405 :
1406 6986 : if (needle_len == 1)
1407 : {
1408 : /* No point in using B-M-H for a one-character needle */
1409 502 : char nchar = *needle;
1410 :
1411 502 : hptr = start_ptr;
1412 3770 : while (hptr < haystack_end)
1413 : {
1414 3658 : if (*hptr == nchar)
1415 390 : return (char *) hptr;
1416 3268 : hptr++;
1417 : }
1418 : }
1419 : else
1420 : {
1421 6484 : const char *needle_last = &needle[needle_len - 1];
1422 :
1423 : /* Start at startpos plus the length of the needle */
1424 6484 : hptr = start_ptr + needle_len - 1;
1425 168796 : while (hptr < haystack_end)
1426 : {
1427 : /* Match the needle scanning *backward* */
1428 : const char *nptr;
1429 : const char *p;
1430 :
1431 167110 : nptr = needle_last;
1432 167110 : p = hptr;
1433 236516 : while (*nptr == *p)
1434 : {
1435 : /* Matched it all? If so, return 1-based position */
1436 74204 : if (nptr == needle)
1437 4798 : return (char *) p;
1438 69406 : nptr--, p--;
1439 : }
1440 :
1441 : /*
1442 : * No match, so use the haystack char at hptr to decide how far to
1443 : * advance. If the needle had any occurrence of that character
1444 : * (or more precisely, one sharing the same skiptable entry)
1445 : * before its last character, then we advance far enough to align
1446 : * the last such needle character with that haystack position.
1447 : * Otherwise we can advance by the whole needle length.
1448 : */
1449 162312 : hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1450 : }
1451 : }
1452 :
1453 1798 : return 0; /* not found */
1454 : }
1455 :
1456 : /*
1457 : * Return a pointer to the current match.
1458 : *
1459 : * The returned pointer points into the original haystack string.
1460 : */
1461 : static char *
1462 5126 : text_position_get_match_ptr(TextPositionState *state)
1463 : {
1464 5126 : return state->last_match;
1465 : }
1466 :
1467 : /*
1468 : * Return the offset of the current match.
1469 : *
1470 : * The offset is in characters, 1-based.
1471 : */
1472 : static int
1473 42 : text_position_get_match_pos(TextPositionState *state)
1474 : {
1475 42 : if (!state->is_multibyte)
1476 0 : return state->last_match - state->str1 + 1;
1477 : else
1478 : {
1479 : /* Convert the byte position to char position. */
1480 102 : while (state->refpoint < state->last_match)
1481 : {
1482 60 : state->refpoint += pg_mblen(state->refpoint);
1483 60 : state->refpos++;
1484 : }
1485 : Assert(state->refpoint == state->last_match);
1486 42 : return state->refpos + 1;
1487 : }
1488 : }
1489 :
1490 : /*
1491 : * Reset search state to the initial state installed by text_position_setup.
1492 : *
1493 : * The next call to text_position_next will search from the beginning
1494 : * of the string.
1495 : */
1496 : static void
1497 8 : text_position_reset(TextPositionState *state)
1498 : {
1499 8 : state->last_match = NULL;
1500 8 : state->refpoint = state->str1;
1501 8 : state->refpos = 0;
1502 8 : }
1503 :
1504 : static void
1505 1848 : text_position_cleanup(TextPositionState *state)
1506 : {
1507 : /* no cleanup needed */
1508 1848 : }
1509 :
1510 :
1511 : static void
1512 8614084 : check_collation_set(Oid collid)
1513 : {
1514 8614084 : if (!OidIsValid(collid))
1515 : {
1516 : /*
1517 : * This typically means that the parser could not resolve a conflict
1518 : * of implicit collations, so report it that way.
1519 : */
1520 8 : ereport(ERROR,
1521 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1522 : errmsg("could not determine which collation to use for string comparison"),
1523 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1524 : }
1525 8614076 : }
1526 :
1527 : /* varstr_cmp()
1528 : * Comparison function for text strings with given lengths.
1529 : * Includes locale support, but must copy strings to temporary memory
1530 : * to allow null-termination for inputs to strcoll().
1531 : * Returns an integer less than, equal to, or greater than zero, indicating
1532 : * whether arg1 is less than, equal to, or greater than arg2.
1533 : *
1534 : * Note: many functions that depend on this are marked leakproof; therefore,
1535 : * avoid reporting the actual contents of the input when throwing errors.
1536 : * All errors herein should be things that can't happen except on corrupt
1537 : * data, anyway; otherwise we will have trouble with indexing strings that
1538 : * would cause them.
1539 : */
1540 : int
1541 6089722 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
1542 : {
1543 : int result;
1544 :
1545 6089722 : check_collation_set(collid);
1546 :
1547 : /*
1548 : * Unfortunately, there is no strncoll(), so in the non-C locale case we
1549 : * have to do some memory copying. This turns out to be significantly
1550 : * slower, so we optimize the case where LC_COLLATE is C. We also try to
1551 : * optimize relatively-short strings by avoiding palloc/pfree overhead.
1552 : */
1553 6089718 : if (lc_collate_is_c(collid))
1554 : {
1555 2946070 : result = memcmp(arg1, arg2, Min(len1, len2));
1556 2946070 : if ((result == 0) && (len1 != len2))
1557 74340 : result = (len1 < len2) ? -1 : 1;
1558 : }
1559 : else
1560 : {
1561 : char a1buf[TEXTBUFLEN];
1562 : char a2buf[TEXTBUFLEN];
1563 : char *a1p,
1564 : *a2p;
1565 3143648 : pg_locale_t mylocale = 0;
1566 :
1567 3143648 : if (collid != DEFAULT_COLLATION_OID)
1568 0 : mylocale = pg_newlocale_from_collation(collid);
1569 :
1570 : /*
1571 : * memcmp() can't tell us which of two unequal strings sorts first,
1572 : * but it's a cheap way to tell if they're equal. Testing shows that
1573 : * memcmp() followed by strcoll() is only trivially slower than
1574 : * strcoll() by itself, so we don't lose much if this doesn't work out
1575 : * very often, and if it does - for example, because there are many
1576 : * equal strings in the input - then we win big by avoiding expensive
1577 : * collation-aware comparisons.
1578 : */
1579 3143648 : if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1580 1186932 : return 0;
1581 :
1582 : #ifdef WIN32
1583 : /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1584 : if (GetDatabaseEncoding() == PG_UTF8
1585 : && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
1586 : {
1587 : int a1len;
1588 : int a2len;
1589 : int r;
1590 :
1591 : if (len1 >= TEXTBUFLEN / 2)
1592 : {
1593 : a1len = len1 * 2 + 2;
1594 : a1p = palloc(a1len);
1595 : }
1596 : else
1597 : {
1598 : a1len = TEXTBUFLEN;
1599 : a1p = a1buf;
1600 : }
1601 : if (len2 >= TEXTBUFLEN / 2)
1602 : {
1603 : a2len = len2 * 2 + 2;
1604 : a2p = palloc(a2len);
1605 : }
1606 : else
1607 : {
1608 : a2len = TEXTBUFLEN;
1609 : a2p = a2buf;
1610 : }
1611 :
1612 : /* stupid Microsloth API does not work for zero-length input */
1613 : if (len1 == 0)
1614 : r = 0;
1615 : else
1616 : {
1617 : r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1618 : (LPWSTR) a1p, a1len / 2);
1619 : if (!r)
1620 : ereport(ERROR,
1621 : (errmsg("could not convert string to UTF-16: error code %lu",
1622 : GetLastError())));
1623 : }
1624 : ((LPWSTR) a1p)[r] = 0;
1625 :
1626 : if (len2 == 0)
1627 : r = 0;
1628 : else
1629 : {
1630 : r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1631 : (LPWSTR) a2p, a2len / 2);
1632 : if (!r)
1633 : ereport(ERROR,
1634 : (errmsg("could not convert string to UTF-16: error code %lu",
1635 : GetLastError())));
1636 : }
1637 : ((LPWSTR) a2p)[r] = 0;
1638 :
1639 : errno = 0;
1640 : #ifdef HAVE_LOCALE_T
1641 : if (mylocale)
1642 : result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1643 : else
1644 : #endif
1645 : result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1646 : if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1647 : * headers */
1648 : ereport(ERROR,
1649 : (errmsg("could not compare Unicode strings: %m")));
1650 :
1651 : /* Break tie if necessary. */
1652 : if (result == 0 &&
1653 : (!mylocale || mylocale->deterministic))
1654 : {
1655 : result = memcmp(arg1, arg2, Min(len1, len2));
1656 : if ((result == 0) && (len1 != len2))
1657 : result = (len1 < len2) ? -1 : 1;
1658 : }
1659 :
1660 : if (a1p != a1buf)
1661 : pfree(a1p);
1662 : if (a2p != a2buf)
1663 : pfree(a2p);
1664 :
1665 : return result;
1666 : }
1667 : #endif /* WIN32 */
1668 :
1669 1956716 : if (len1 >= TEXTBUFLEN)
1670 220 : a1p = (char *) palloc(len1 + 1);
1671 : else
1672 1956496 : a1p = a1buf;
1673 1956716 : if (len2 >= TEXTBUFLEN)
1674 88 : a2p = (char *) palloc(len2 + 1);
1675 : else
1676 1956628 : a2p = a2buf;
1677 :
1678 1956716 : memcpy(a1p, arg1, len1);
1679 1956716 : a1p[len1] = '\0';
1680 1956716 : memcpy(a2p, arg2, len2);
1681 1956716 : a2p[len2] = '\0';
1682 :
1683 1956716 : if (mylocale)
1684 : {
1685 0 : if (mylocale->provider == COLLPROVIDER_ICU)
1686 : {
1687 : #ifdef USE_ICU
1688 : #ifdef HAVE_UCOL_STRCOLLUTF8
1689 : if (GetDatabaseEncoding() == PG_UTF8)
1690 : {
1691 : UErrorCode status;
1692 :
1693 : status = U_ZERO_ERROR;
1694 : result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1695 : arg1, len1,
1696 : arg2, len2,
1697 : &status);
1698 : if (U_FAILURE(status))
1699 : ereport(ERROR,
1700 : (errmsg("collation failed: %s", u_errorName(status))));
1701 : }
1702 : else
1703 : #endif
1704 : {
1705 : int32_t ulen1,
1706 : ulen2;
1707 : UChar *uchar1,
1708 : *uchar2;
1709 :
1710 : ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1711 : ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1712 :
1713 : result = ucol_strcoll(mylocale->info.icu.ucol,
1714 : uchar1, ulen1,
1715 : uchar2, ulen2);
1716 :
1717 : pfree(uchar1);
1718 : pfree(uchar2);
1719 : }
1720 : #else /* not USE_ICU */
1721 : /* shouldn't happen */
1722 0 : elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1723 : #endif /* not USE_ICU */
1724 : }
1725 : else
1726 : {
1727 : #ifdef HAVE_LOCALE_T
1728 0 : result = strcoll_l(a1p, a2p, mylocale->info.lt);
1729 : #else
1730 : /* shouldn't happen */
1731 : elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1732 : #endif
1733 : }
1734 : }
1735 : else
1736 1956716 : result = strcoll(a1p, a2p);
1737 :
1738 : /* Break tie if necessary. */
1739 1956716 : if (result == 0 &&
1740 0 : (!mylocale || mylocale->deterministic))
1741 0 : result = strcmp(a1p, a2p);
1742 :
1743 1956716 : if (a1p != a1buf)
1744 220 : pfree(a1p);
1745 1956716 : if (a2p != a2buf)
1746 88 : pfree(a2p);
1747 : }
1748 :
1749 4902786 : return result;
1750 : }
1751 :
1752 : /* text_cmp()
1753 : * Internal comparison function for text strings.
1754 : * Returns -1, 0 or 1
1755 : */
1756 : static int
1757 5008758 : text_cmp(text *arg1, text *arg2, Oid collid)
1758 : {
1759 : char *a1p,
1760 : *a2p;
1761 : int len1,
1762 : len2;
1763 :
1764 5008758 : a1p = VARDATA_ANY(arg1);
1765 5008758 : a2p = VARDATA_ANY(arg2);
1766 :
1767 5008758 : len1 = VARSIZE_ANY_EXHDR(arg1);
1768 5008758 : len2 = VARSIZE_ANY_EXHDR(arg2);
1769 :
1770 5008758 : return varstr_cmp(a1p, len1, a2p, len2, collid);
1771 : }
1772 :
1773 : /*
1774 : * Comparison functions for text strings.
1775 : *
1776 : * Note: btree indexes need these routines not to leak memory; therefore,
1777 : * be careful to free working copies of toasted datums. Most places don't
1778 : * need to be so careful.
1779 : */
1780 :
1781 : Datum
1782 2277484 : texteq(PG_FUNCTION_ARGS)
1783 : {
1784 2277484 : Oid collid = PG_GET_COLLATION();
1785 : bool result;
1786 :
1787 2277484 : check_collation_set(collid);
1788 :
1789 2277484 : if (lc_collate_is_c(collid) ||
1790 0 : collid == DEFAULT_COLLATION_OID ||
1791 0 : pg_newlocale_from_collation(collid)->deterministic)
1792 2277484 : {
1793 2277484 : Datum arg1 = PG_GETARG_DATUM(0);
1794 2277484 : Datum arg2 = PG_GETARG_DATUM(1);
1795 : Size len1,
1796 : len2;
1797 :
1798 : /*
1799 : * Since we only care about equality or not-equality, we can avoid all
1800 : * the expense of strcoll() here, and just do bitwise comparison. In
1801 : * fact, we don't even have to do a bitwise comparison if we can show
1802 : * the lengths of the strings are unequal; which might save us from
1803 : * having to detoast one or both values.
1804 : */
1805 2277484 : len1 = toast_raw_datum_size(arg1);
1806 2277484 : len2 = toast_raw_datum_size(arg2);
1807 2277484 : if (len1 != len2)
1808 631326 : result = false;
1809 : else
1810 : {
1811 1646158 : text *targ1 = DatumGetTextPP(arg1);
1812 1646158 : text *targ2 = DatumGetTextPP(arg2);
1813 :
1814 1646158 : result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1815 : len1 - VARHDRSZ) == 0);
1816 :
1817 1646158 : PG_FREE_IF_COPY(targ1, 0);
1818 1646158 : PG_FREE_IF_COPY(targ2, 1);
1819 : }
1820 : }
1821 : else
1822 : {
1823 0 : text *arg1 = PG_GETARG_TEXT_PP(0);
1824 0 : text *arg2 = PG_GETARG_TEXT_PP(1);
1825 :
1826 0 : result = (text_cmp(arg1, arg2, collid) == 0);
1827 :
1828 0 : PG_FREE_IF_COPY(arg1, 0);
1829 0 : PG_FREE_IF_COPY(arg2, 1);
1830 : }
1831 :
1832 2277484 : PG_RETURN_BOOL(result);
1833 : }
1834 :
1835 : Datum
1836 12130 : textne(PG_FUNCTION_ARGS)
1837 : {
1838 12130 : Oid collid = PG_GET_COLLATION();
1839 : bool result;
1840 :
1841 12130 : check_collation_set(collid);
1842 :
1843 12130 : if (lc_collate_is_c(collid) ||
1844 0 : collid == DEFAULT_COLLATION_OID ||
1845 0 : pg_newlocale_from_collation(collid)->deterministic)
1846 12130 : {
1847 12130 : Datum arg1 = PG_GETARG_DATUM(0);
1848 12130 : Datum arg2 = PG_GETARG_DATUM(1);
1849 : Size len1,
1850 : len2;
1851 :
1852 : /* See comment in texteq() */
1853 12130 : len1 = toast_raw_datum_size(arg1);
1854 12130 : len2 = toast_raw_datum_size(arg2);
1855 12130 : if (len1 != len2)
1856 686 : result = true;
1857 : else
1858 : {
1859 11444 : text *targ1 = DatumGetTextPP(arg1);
1860 11444 : text *targ2 = DatumGetTextPP(arg2);
1861 :
1862 11444 : result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1863 : len1 - VARHDRSZ) != 0);
1864 :
1865 11444 : PG_FREE_IF_COPY(targ1, 0);
1866 11444 : PG_FREE_IF_COPY(targ2, 1);
1867 : }
1868 : }
1869 : else
1870 : {
1871 0 : text *arg1 = PG_GETARG_TEXT_PP(0);
1872 0 : text *arg2 = PG_GETARG_TEXT_PP(1);
1873 :
1874 0 : result = (text_cmp(arg1, arg2, collid) != 0);
1875 :
1876 0 : PG_FREE_IF_COPY(arg1, 0);
1877 0 : PG_FREE_IF_COPY(arg2, 1);
1878 : }
1879 :
1880 12130 : PG_RETURN_BOOL(result);
1881 : }
1882 :
1883 : Datum
1884 88896 : text_lt(PG_FUNCTION_ARGS)
1885 : {
1886 88896 : text *arg1 = PG_GETARG_TEXT_PP(0);
1887 88896 : text *arg2 = PG_GETARG_TEXT_PP(1);
1888 : bool result;
1889 :
1890 88896 : result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1891 :
1892 88892 : PG_FREE_IF_COPY(arg1, 0);
1893 88892 : PG_FREE_IF_COPY(arg2, 1);
1894 :
1895 88892 : PG_RETURN_BOOL(result);
1896 : }
1897 :
1898 : Datum
1899 141290 : text_le(PG_FUNCTION_ARGS)
1900 : {
1901 141290 : text *arg1 = PG_GETARG_TEXT_PP(0);
1902 141290 : text *arg2 = PG_GETARG_TEXT_PP(1);
1903 : bool result;
1904 :
1905 141290 : result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1906 :
1907 141290 : PG_FREE_IF_COPY(arg1, 0);
1908 141290 : PG_FREE_IF_COPY(arg2, 1);
1909 :
1910 141290 : PG_RETURN_BOOL(result);
1911 : }
1912 :
1913 : Datum
1914 57926 : text_gt(PG_FUNCTION_ARGS)
1915 : {
1916 57926 : text *arg1 = PG_GETARG_TEXT_PP(0);
1917 57926 : text *arg2 = PG_GETARG_TEXT_PP(1);
1918 : bool result;
1919 :
1920 57926 : result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1921 :
1922 57926 : PG_FREE_IF_COPY(arg1, 0);
1923 57926 : PG_FREE_IF_COPY(arg2, 1);
1924 :
1925 57926 : PG_RETURN_BOOL(result);
1926 : }
1927 :
1928 : Datum
1929 84288 : text_ge(PG_FUNCTION_ARGS)
1930 : {
1931 84288 : text *arg1 = PG_GETARG_TEXT_PP(0);
1932 84288 : text *arg2 = PG_GETARG_TEXT_PP(1);
1933 : bool result;
1934 :
1935 84288 : result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1936 :
1937 84288 : PG_FREE_IF_COPY(arg1, 0);
1938 84288 : PG_FREE_IF_COPY(arg2, 1);
1939 :
1940 84288 : PG_RETURN_BOOL(result);
1941 : }
1942 :
1943 : Datum
1944 25132 : text_starts_with(PG_FUNCTION_ARGS)
1945 : {
1946 25132 : Datum arg1 = PG_GETARG_DATUM(0);
1947 25132 : Datum arg2 = PG_GETARG_DATUM(1);
1948 25132 : Oid collid = PG_GET_COLLATION();
1949 25132 : pg_locale_t mylocale = 0;
1950 : bool result;
1951 : Size len1,
1952 : len2;
1953 :
1954 25132 : check_collation_set(collid);
1955 :
1956 25132 : if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1957 0 : mylocale = pg_newlocale_from_collation(collid);
1958 :
1959 25132 : if (mylocale && !mylocale->deterministic)
1960 0 : ereport(ERROR,
1961 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1962 : errmsg("nondeterministic collations are not supported for substring searches")));
1963 :
1964 25132 : len1 = toast_raw_datum_size(arg1);
1965 25132 : len2 = toast_raw_datum_size(arg2);
1966 25132 : if (len2 > len1)
1967 0 : result = false;
1968 : else
1969 : {
1970 25132 : text *targ1 = text_substring(arg1, 1, len2, false);
1971 25132 : text *targ2 = DatumGetTextPP(arg2);
1972 :
1973 25132 : result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1974 25132 : VARSIZE_ANY_EXHDR(targ2)) == 0);
1975 :
1976 25132 : PG_FREE_IF_COPY(targ1, 0);
1977 25132 : PG_FREE_IF_COPY(targ2, 1);
1978 : }
1979 :
1980 25132 : PG_RETURN_BOOL(result);
1981 : }
1982 :
1983 : Datum
1984 4457892 : bttextcmp(PG_FUNCTION_ARGS)
1985 : {
1986 4457892 : text *arg1 = PG_GETARG_TEXT_PP(0);
1987 4457892 : text *arg2 = PG_GETARG_TEXT_PP(1);
1988 : int32 result;
1989 :
1990 4457892 : result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1991 :
1992 4457892 : PG_FREE_IF_COPY(arg1, 0);
1993 4457892 : PG_FREE_IF_COPY(arg2, 1);
1994 :
1995 4457892 : PG_RETURN_INT32(result);
1996 : }
1997 :
1998 : Datum
1999 48770 : bttextsortsupport(PG_FUNCTION_ARGS)
2000 : {
2001 48770 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
2002 48770 : Oid collid = ssup->ssup_collation;
2003 : MemoryContext oldcontext;
2004 :
2005 48770 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2006 :
2007 : /* Use generic string SortSupport */
2008 48770 : varstr_sortsupport(ssup, TEXTOID, collid);
2009 :
2010 48766 : MemoryContextSwitchTo(oldcontext);
2011 :
2012 48766 : PG_RETURN_VOID();
2013 : }
2014 :
2015 : /*
2016 : * Generic sortsupport interface for character type's operator classes.
2017 : * Includes locale support, and support for BpChar semantics (i.e. removing
2018 : * trailing spaces before comparison).
2019 : *
2020 : * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
2021 : * same representation. Callers that always use the C collation (e.g.
2022 : * non-collatable type callers like bytea) may have NUL bytes in their strings;
2023 : * this will not work with any other collation, though.
2024 : */
2025 : void
2026 100468 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
2027 : {
2028 100468 : bool abbreviate = ssup->abbreviate;
2029 100468 : bool collate_c = false;
2030 : VarStringSortSupport *sss;
2031 100468 : pg_locale_t locale = 0;
2032 :
2033 100468 : check_collation_set(collid);
2034 :
2035 : /*
2036 : * If possible, set ssup->comparator to a function which can be used to
2037 : * directly compare two datums. If we can do this, we'll avoid the
2038 : * overhead of a trip through the fmgr layer for every comparison, which
2039 : * can be substantial.
2040 : *
2041 : * Most typically, we'll set the comparator to varlenafastcmp_locale,
2042 : * which uses strcoll() to perform comparisons. We use that for the
2043 : * BpChar case too, but type NAME uses namefastcmp_locale. However, if
2044 : * LC_COLLATE = C, we can make things quite a bit faster with
2045 : * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
2046 : * memcmp() rather than strcoll().
2047 : */
2048 100464 : if (lc_collate_is_c(collid))
2049 : {
2050 71264 : if (typid == BPCHAROID)
2051 16 : ssup->comparator = bpcharfastcmp_c;
2052 71248 : else if (typid == NAMEOID)
2053 : {
2054 51108 : ssup->comparator = namefastcmp_c;
2055 : /* Not supporting abbreviation with type NAME, for now */
2056 51108 : abbreviate = false;
2057 : }
2058 : else
2059 20140 : ssup->comparator = varstrfastcmp_c;
2060 :
2061 71264 : collate_c = true;
2062 : }
2063 : else
2064 : {
2065 : /*
2066 : * We need a collation-sensitive comparison. To make things faster,
2067 : * we'll figure out the collation based on the locale id and cache the
2068 : * result.
2069 : */
2070 29200 : if (collid != DEFAULT_COLLATION_OID)
2071 0 : locale = pg_newlocale_from_collation(collid);
2072 :
2073 : /*
2074 : * There is a further exception on Windows. When the database
2075 : * encoding is UTF-8 and we are not using the C collation, complex
2076 : * hacks are required. We don't currently have a comparator that
2077 : * handles that case, so we fall back on the slow method of having the
2078 : * sort code invoke bttextcmp() (in the case of text) via the fmgr
2079 : * trampoline. ICU locales work just the same on Windows, however.
2080 : */
2081 : #ifdef WIN32
2082 : if (GetDatabaseEncoding() == PG_UTF8 &&
2083 : !(locale && locale->provider == COLLPROVIDER_ICU))
2084 : return;
2085 : #endif
2086 :
2087 : /*
2088 : * We use varlenafastcmp_locale except for type NAME.
2089 : */
2090 29200 : if (typid == NAMEOID)
2091 : {
2092 0 : ssup->comparator = namefastcmp_locale;
2093 : /* Not supporting abbreviation with type NAME, for now */
2094 0 : abbreviate = false;
2095 : }
2096 : else
2097 29200 : ssup->comparator = varlenafastcmp_locale;
2098 : }
2099 :
2100 : /*
2101 : * Unfortunately, it seems that abbreviation for non-C collations is
2102 : * broken on many common platforms; testing of multiple versions of glibc
2103 : * reveals that, for many locales, strcoll() and strxfrm() do not return
2104 : * consistent results, which is fatal to this optimization. While no
2105 : * other libc other than Cygwin has so far been shown to have a problem,
2106 : * we take the conservative course of action for right now and disable
2107 : * this categorically. (Users who are certain this isn't a problem on
2108 : * their system can define TRUST_STRXFRM.)
2109 : *
2110 : * Even apart from the risk of broken locales, it's possible that there
2111 : * are platforms where the use of abbreviated keys should be disabled at
2112 : * compile time. Having only 4 byte datums could make worst-case
2113 : * performance drastically more likely, for example. Moreover, macOS's
2114 : * strxfrm() implementation is known to not effectively concentrate a
2115 : * significant amount of entropy from the original string in earlier
2116 : * transformed blobs. It's possible that other supported platforms are
2117 : * similarly encumbered. So, if we ever get past disabling this
2118 : * categorically, we may still want or need to disable it for particular
2119 : * platforms.
2120 : */
2121 : #ifndef TRUST_STRXFRM
2122 100464 : if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
2123 29200 : abbreviate = false;
2124 : #endif
2125 :
2126 : /*
2127 : * If we're using abbreviated keys, or if we're using a locale-aware
2128 : * comparison, we need to initialize a VarStringSortSupport object. Both
2129 : * cases will make use of the temporary buffers we initialize here for
2130 : * scratch space (and to detect requirement for BpChar semantics from
2131 : * caller), and the abbreviation case requires additional state.
2132 : */
2133 100464 : if (abbreviate || !collate_c)
2134 : {
2135 30652 : sss = palloc(sizeof(VarStringSortSupport));
2136 30652 : sss->buf1 = palloc(TEXTBUFLEN);
2137 30652 : sss->buflen1 = TEXTBUFLEN;
2138 30652 : sss->buf2 = palloc(TEXTBUFLEN);
2139 30652 : sss->buflen2 = TEXTBUFLEN;
2140 : /* Start with invalid values */
2141 30652 : sss->last_len1 = -1;
2142 30652 : sss->last_len2 = -1;
2143 : /* Initialize */
2144 30652 : sss->last_returned = 0;
2145 30652 : sss->locale = locale;
2146 :
2147 : /*
2148 : * To avoid somehow confusing a strxfrm() blob and an original string,
2149 : * constantly keep track of the variety of data that buf1 and buf2
2150 : * currently contain.
2151 : *
2152 : * Comparisons may be interleaved with conversion calls. Frequently,
2153 : * conversions and comparisons are batched into two distinct phases,
2154 : * but the correctness of caching cannot hinge upon this. For
2155 : * comparison caching, buffer state is only trusted if cache_blob is
2156 : * found set to false, whereas strxfrm() caching only trusts the state
2157 : * when cache_blob is found set to true.
2158 : *
2159 : * Arbitrarily initialize cache_blob to true.
2160 : */
2161 30652 : sss->cache_blob = true;
2162 30652 : sss->collate_c = collate_c;
2163 30652 : sss->typid = typid;
2164 30652 : ssup->ssup_extra = sss;
2165 :
2166 : /*
2167 : * If possible, plan to use the abbreviated keys optimization. The
2168 : * core code may switch back to authoritative comparator should
2169 : * abbreviation be aborted.
2170 : */
2171 30652 : if (abbreviate)
2172 : {
2173 1452 : sss->prop_card = 0.20;
2174 1452 : initHyperLogLog(&sss->abbr_card, 10);
2175 1452 : initHyperLogLog(&sss->full_card, 10);
2176 1452 : ssup->abbrev_full_comparator = ssup->comparator;
2177 1452 : ssup->comparator = varstrcmp_abbrev;
2178 1452 : ssup->abbrev_converter = varstr_abbrev_convert;
2179 1452 : ssup->abbrev_abort = varstr_abbrev_abort;
2180 : }
2181 : }
2182 100464 : }
2183 :
2184 : /*
2185 : * sortsupport comparison func (for C locale case)
2186 : */
2187 : static int
2188 61558648 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
2189 : {
2190 61558648 : VarString *arg1 = DatumGetVarStringPP(x);
2191 61558648 : VarString *arg2 = DatumGetVarStringPP(y);
2192 : char *a1p,
2193 : *a2p;
2194 : int len1,
2195 : len2,
2196 : result;
2197 :
2198 61558648 : a1p = VARDATA_ANY(arg1);
2199 61558648 : a2p = VARDATA_ANY(arg2);
2200 :
2201 61558648 : len1 = VARSIZE_ANY_EXHDR(arg1);
2202 61558648 : len2 = VARSIZE_ANY_EXHDR(arg2);
2203 :
2204 61558648 : result = memcmp(a1p, a2p, Min(len1, len2));
2205 61558648 : if ((result == 0) && (len1 != len2))
2206 1224756 : result = (len1 < len2) ? -1 : 1;
2207 :
2208 : /* We can't afford to leak memory here. */
2209 61558648 : if (PointerGetDatum(arg1) != x)
2210 0 : pfree(arg1);
2211 61558648 : if (PointerGetDatum(arg2) != y)
2212 0 : pfree(arg2);
2213 :
2214 61558648 : return result;
2215 : }
2216 :
2217 : /*
2218 : * sortsupport comparison func (for BpChar C locale case)
2219 : *
2220 : * BpChar outsources its sortsupport to this module. Specialization for the
2221 : * varstr_sortsupport BpChar case, modeled on
2222 : * internal_bpchar_pattern_compare().
2223 : */
2224 : static int
2225 16 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
2226 : {
2227 16 : BpChar *arg1 = DatumGetBpCharPP(x);
2228 16 : BpChar *arg2 = DatumGetBpCharPP(y);
2229 : char *a1p,
2230 : *a2p;
2231 : int len1,
2232 : len2,
2233 : result;
2234 :
2235 16 : a1p = VARDATA_ANY(arg1);
2236 16 : a2p = VARDATA_ANY(arg2);
2237 :
2238 16 : len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2239 16 : len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2240 :
2241 16 : result = memcmp(a1p, a2p, Min(len1, len2));
2242 16 : if ((result == 0) && (len1 != len2))
2243 0 : result = (len1 < len2) ? -1 : 1;
2244 :
2245 : /* We can't afford to leak memory here. */
2246 16 : if (PointerGetDatum(arg1) != x)
2247 0 : pfree(arg1);
2248 16 : if (PointerGetDatum(arg2) != y)
2249 0 : pfree(arg2);
2250 :
2251 16 : return result;
2252 : }
2253 :
2254 : /*
2255 : * sortsupport comparison func (for NAME C locale case)
2256 : */
2257 : static int
2258 75082870 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
2259 : {
2260 75082870 : Name arg1 = DatumGetName(x);
2261 75082870 : Name arg2 = DatumGetName(y);
2262 :
2263 75082870 : return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
2264 : }
2265 :
2266 : /*
2267 : * sortsupport comparison func (for locale case with all varlena types)
2268 : */
2269 : static int
2270 25153588 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
2271 : {
2272 25153588 : VarString *arg1 = DatumGetVarStringPP(x);
2273 25153588 : VarString *arg2 = DatumGetVarStringPP(y);
2274 : char *a1p,
2275 : *a2p;
2276 : int len1,
2277 : len2,
2278 : result;
2279 :
2280 25153588 : a1p = VARDATA_ANY(arg1);
2281 25153588 : a2p = VARDATA_ANY(arg2);
2282 :
2283 25153588 : len1 = VARSIZE_ANY_EXHDR(arg1);
2284 25153588 : len2 = VARSIZE_ANY_EXHDR(arg2);
2285 :
2286 25153588 : result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
2287 :
2288 : /* We can't afford to leak memory here. */
2289 25153588 : if (PointerGetDatum(arg1) != x)
2290 0 : pfree(arg1);
2291 25153588 : if (PointerGetDatum(arg2) != y)
2292 0 : pfree(arg2);
2293 :
2294 25153588 : return result;
2295 : }
2296 :
2297 : /*
2298 : * sortsupport comparison func (for locale case with NAME type)
2299 : */
2300 : static int
2301 0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
2302 : {
2303 0 : Name arg1 = DatumGetName(x);
2304 0 : Name arg2 = DatumGetName(y);
2305 :
2306 0 : return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
2307 0 : NameStr(*arg2), strlen(NameStr(*arg2)),
2308 : ssup);
2309 : }
2310 :
2311 : /*
2312 : * sortsupport comparison func for locale cases
2313 : */
2314 : static int
2315 25153588 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
2316 : {
2317 25153588 : VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2318 : int result;
2319 : bool arg1_match;
2320 :
2321 : /* Fast pre-check for equality, as discussed in varstr_cmp() */
2322 25153588 : if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2323 : {
2324 : /*
2325 : * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2326 : * last_len2. Existing contents of buffers might still be used by
2327 : * next call.
2328 : *
2329 : * It's fine to allow the comparison of BpChar padding bytes here,
2330 : * even though that implies that the memcmp() will usually be
2331 : * performed for BpChar callers (though multibyte characters could
2332 : * still prevent that from occurring). The memcmp() is still very
2333 : * cheap, and BpChar's funny semantics have us remove trailing spaces
2334 : * (not limited to padding), so we need make no distinction between
2335 : * padding space characters and "real" space characters.
2336 : */
2337 9289352 : return 0;
2338 : }
2339 :
2340 15864236 : if (sss->typid == BPCHAROID)
2341 : {
2342 : /* Get true number of bytes, ignoring trailing spaces */
2343 32972 : len1 = bpchartruelen(a1p, len1);
2344 32972 : len2 = bpchartruelen(a2p, len2);
2345 : }
2346 :
2347 15864236 : if (len1 >= sss->buflen1)
2348 : {
2349 0 : pfree(sss->buf1);
2350 0 : sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2351 0 : sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2352 : }
2353 15864236 : if (len2 >= sss->buflen2)
2354 : {
2355 0 : pfree(sss->buf2);
2356 0 : sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2357 0 : sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2358 : }
2359 :
2360 : /*
2361 : * We're likely to be asked to compare the same strings repeatedly, and
2362 : * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2363 : * comparisons, even though in general there is no reason to think that
2364 : * that will work out (every string datum may be unique). Caching does
2365 : * not slow things down measurably when it doesn't work out, and can speed
2366 : * things up by rather a lot when it does. In part, this is because the
2367 : * memcmp() compares data from cachelines that are needed in L1 cache even
2368 : * when the last comparison's result cannot be reused.
2369 : */
2370 15864236 : arg1_match = true;
2371 15864236 : if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2372 : {
2373 14048476 : arg1_match = false;
2374 14048476 : memcpy(sss->buf1, a1p, len1);
2375 14048476 : sss->buf1[len1] = '\0';
2376 14048476 : sss->last_len1 = len1;
2377 : }
2378 :
2379 : /*
2380 : * If we're comparing the same two strings as last time, we can return the
2381 : * same answer without calling strcoll() again. This is more likely than
2382 : * it seems (at least with moderate to low cardinality sets), because
2383 : * quicksort compares the same pivot against many values.
2384 : */
2385 15864236 : if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2386 : {
2387 2596454 : memcpy(sss->buf2, a2p, len2);
2388 2596454 : sss->buf2[len2] = '\0';
2389 2596454 : sss->last_len2 = len2;
2390 : }
2391 13267782 : else if (arg1_match && !sss->cache_blob)
2392 : {
2393 : /* Use result cached following last actual strcoll() call */
2394 1562694 : return sss->last_returned;
2395 : }
2396 :
2397 14301542 : if (sss->locale)
2398 : {
2399 0 : if (sss->locale->provider == COLLPROVIDER_ICU)
2400 : {
2401 : #ifdef USE_ICU
2402 : #ifdef HAVE_UCOL_STRCOLLUTF8
2403 : if (GetDatabaseEncoding() == PG_UTF8)
2404 : {
2405 : UErrorCode status;
2406 :
2407 : status = U_ZERO_ERROR;
2408 : result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2409 : a1p, len1,
2410 : a2p, len2,
2411 : &status);
2412 : if (U_FAILURE(status))
2413 : ereport(ERROR,
2414 : (errmsg("collation failed: %s", u_errorName(status))));
2415 : }
2416 : else
2417 : #endif
2418 : {
2419 : int32_t ulen1,
2420 : ulen2;
2421 : UChar *uchar1,
2422 : *uchar2;
2423 :
2424 : ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2425 : ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2426 :
2427 : result = ucol_strcoll(sss->locale->info.icu.ucol,
2428 : uchar1, ulen1,
2429 : uchar2, ulen2);
2430 :
2431 : pfree(uchar1);
2432 : pfree(uchar2);
2433 : }
2434 : #else /* not USE_ICU */
2435 : /* shouldn't happen */
2436 0 : elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2437 : #endif /* not USE_ICU */
2438 : }
2439 : else
2440 : {
2441 : #ifdef HAVE_LOCALE_T
2442 0 : result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2443 : #else
2444 : /* shouldn't happen */
2445 : elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2446 : #endif
2447 : }
2448 : }
2449 : else
2450 14301542 : result = strcoll(sss->buf1, sss->buf2);
2451 :
2452 : /* Break tie if necessary. */
2453 14301542 : if (result == 0 &&
2454 0 : (!sss->locale || sss->locale->deterministic))
2455 0 : result = strcmp(sss->buf1, sss->buf2);
2456 :
2457 : /* Cache result, perhaps saving an expensive strcoll() call next time */
2458 14301542 : sss->cache_blob = false;
2459 14301542 : sss->last_returned = result;
2460 14301542 : return result;
2461 : }
2462 :
2463 : /*
2464 : * Abbreviated key comparison func
2465 : */
2466 : static int
2467 3528180 : varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
2468 : {
2469 : /*
2470 : * When 0 is returned, the core system will call varstrfastcmp_c()
2471 : * (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale(). Even a
2472 : * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2473 : * authoritatively, for the same reason that there is a strcoll()
2474 : * tie-breaker call to strcmp() in varstr_cmp().
2475 : */
2476 3528180 : if (x > y)
2477 1538364 : return 1;
2478 1989816 : else if (x == y)
2479 527810 : return 0;
2480 : else
2481 1462006 : return -1;
2482 : }
2483 :
2484 : /*
2485 : * Conversion routine for sortsupport. Converts original to abbreviated key
2486 : * representation. Our encoding strategy is simple -- pack the first 8 bytes
2487 : * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2488 : * stored in reverse order), and treat it as an unsigned integer. When the "C"
2489 : * locale is used, or in case of bytea, just memcpy() from original instead.
2490 : */
2491 : static Datum
2492 354970 : varstr_abbrev_convert(Datum original, SortSupport ssup)
2493 : {
2494 354970 : VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2495 354970 : VarString *authoritative = DatumGetVarStringPP(original);
2496 354970 : char *authoritative_data = VARDATA_ANY(authoritative);
2497 :
2498 : /* working state */
2499 : Datum res;
2500 : char *pres;
2501 : int len;
2502 : uint32 hash;
2503 :
2504 354970 : pres = (char *) &res;
2505 : /* memset(), so any non-overwritten bytes are NUL */
2506 354970 : memset(pres, 0, sizeof(Datum));
2507 354970 : len = VARSIZE_ANY_EXHDR(authoritative);
2508 :
2509 : /* Get number of bytes, ignoring trailing spaces */
2510 354970 : if (sss->typid == BPCHAROID)
2511 0 : len = bpchartruelen(authoritative_data, len);
2512 :
2513 : /*
2514 : * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2515 : * abbreviate keys. The full comparator for the C locale is always
2516 : * memcmp(). It would be incorrect to allow bytea callers (callers that
2517 : * always force the C collation -- bytea isn't a collatable type, but this
2518 : * approach is convenient) to use strxfrm(). This is because bytea
2519 : * strings may contain NUL bytes. Besides, this should be faster, too.
2520 : *
2521 : * More generally, it's okay that bytea callers can have NUL bytes in
2522 : * strings because varstrcmp_abbrev() need not make a distinction between
2523 : * terminating NUL bytes, and NUL bytes representing actual NULs in the
2524 : * authoritative representation. Hopefully a comparison at or past one
2525 : * abbreviated key's terminating NUL byte will resolve the comparison
2526 : * without consulting the authoritative representation; specifically, some
2527 : * later non-NUL byte in the longer string can resolve the comparison
2528 : * against a subsequent terminating NUL in the shorter string. There will
2529 : * usually be what is effectively a "length-wise" resolution there and
2530 : * then.
2531 : *
2532 : * If that doesn't work out -- if all bytes in the longer string
2533 : * positioned at or past the offset of the smaller string's (first)
2534 : * terminating NUL are actually representative of NUL bytes in the
2535 : * authoritative binary string (perhaps with some *terminating* NUL bytes
2536 : * towards the end of the longer string iff it happens to still be small)
2537 : * -- then an authoritative tie-breaker will happen, and do the right
2538 : * thing: explicitly consider string length.
2539 : */
2540 354970 : if (sss->collate_c)
2541 354970 : memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2542 : else
2543 : {
2544 : Size bsize;
2545 : #ifdef USE_ICU
2546 : int32_t ulen = -1;
2547 : UChar *uchar = NULL;
2548 : #endif
2549 :
2550 : /*
2551 : * We're not using the C collation, so fall back on strxfrm or ICU
2552 : * analogs.
2553 : */
2554 :
2555 : /* By convention, we use buffer 1 to store and NUL-terminate */
2556 0 : if (len >= sss->buflen1)
2557 : {
2558 0 : pfree(sss->buf1);
2559 0 : sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2560 0 : sss->buf1 = palloc(sss->buflen1);
2561 : }
2562 :
2563 : /* Might be able to reuse strxfrm() blob from last call */
2564 0 : if (sss->last_len1 == len && sss->cache_blob &&
2565 0 : memcmp(sss->buf1, authoritative_data, len) == 0)
2566 : {
2567 0 : memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2568 : /* No change affecting cardinality, so no hashing required */
2569 0 : goto done;
2570 : }
2571 :
2572 0 : memcpy(sss->buf1, authoritative_data, len);
2573 :
2574 : /*
2575 : * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2576 : * necessary for ICU, but doesn't hurt.
2577 : */
2578 0 : sss->buf1[len] = '\0';
2579 0 : sss->last_len1 = len;
2580 :
2581 : #ifdef USE_ICU
2582 : /* When using ICU and not UTF8, convert string to UChar. */
2583 : if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2584 : GetDatabaseEncoding() != PG_UTF8)
2585 : ulen = icu_to_uchar(&uchar, sss->buf1, len);
2586 : #endif
2587 :
2588 : /*
2589 : * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2590 : * and try again. Both of these functions have the result buffer
2591 : * content undefined if the result did not fit, so we need to retry
2592 : * until everything fits, even though we only need the first few bytes
2593 : * in the end. When using ucol_nextSortKeyPart(), however, we only
2594 : * ask for as many bytes as we actually need.
2595 : */
2596 : for (;;)
2597 : {
2598 : #ifdef USE_ICU
2599 : if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2600 : {
2601 : /*
2602 : * When using UTF8, use the iteration interface so we only
2603 : * need to produce as many bytes as we actually need.
2604 : */
2605 : if (GetDatabaseEncoding() == PG_UTF8)
2606 : {
2607 : UCharIterator iter;
2608 : uint32_t state[2];
2609 : UErrorCode status;
2610 :
2611 : uiter_setUTF8(&iter, sss->buf1, len);
2612 : state[0] = state[1] = 0; /* won't need that again */
2613 : status = U_ZERO_ERROR;
2614 : bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2615 : &iter,
2616 : state,
2617 : (uint8_t *) sss->buf2,
2618 : Min(sizeof(Datum), sss->buflen2),
2619 : &status);
2620 : if (U_FAILURE(status))
2621 : ereport(ERROR,
2622 : (errmsg("sort key generation failed: %s",
2623 : u_errorName(status))));
2624 : }
2625 : else
2626 : bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2627 : uchar, ulen,
2628 : (uint8_t *) sss->buf2, sss->buflen2);
2629 : }
2630 : else
2631 : #endif
2632 : #ifdef HAVE_LOCALE_T
2633 0 : if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2634 0 : bsize = strxfrm_l(sss->buf2, sss->buf1,
2635 0 : sss->buflen2, sss->locale->info.lt);
2636 : else
2637 : #endif
2638 0 : bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2639 :
2640 0 : sss->last_len2 = bsize;
2641 0 : if (bsize < sss->buflen2)
2642 0 : break;
2643 :
2644 : /*
2645 : * Grow buffer and retry.
2646 : */
2647 0 : pfree(sss->buf2);
2648 0 : sss->buflen2 = Max(bsize + 1,
2649 : Min(sss->buflen2 * 2, MaxAllocSize));
2650 0 : sss->buf2 = palloc(sss->buflen2);
2651 : }
2652 :
2653 : /*
2654 : * Every Datum byte is always compared. This is safe because the
2655 : * strxfrm() blob is itself NUL terminated, leaving no danger of
2656 : * misinterpreting any NUL bytes not intended to be interpreted as
2657 : * logically representing termination.
2658 : *
2659 : * (Actually, even if there were NUL bytes in the blob it would be
2660 : * okay. See remarks on bytea case above.)
2661 : */
2662 0 : memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2663 :
2664 : #ifdef USE_ICU
2665 : if (uchar)
2666 : pfree(uchar);
2667 : #endif
2668 : }
2669 :
2670 : /*
2671 : * Maintain approximate cardinality of both abbreviated keys and original,
2672 : * authoritative keys using HyperLogLog. Used as cheap insurance against
2673 : * the worst case, where we do many string transformations for no saving
2674 : * in full strcoll()-based comparisons. These statistics are used by
2675 : * varstr_abbrev_abort().
2676 : *
2677 : * First, Hash key proper, or a significant fraction of it. Mix in length
2678 : * in order to compensate for cases where differences are past
2679 : * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2680 : */
2681 354970 : hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2682 : Min(len, PG_CACHE_LINE_SIZE)));
2683 :
2684 354970 : if (len > PG_CACHE_LINE_SIZE)
2685 8 : hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2686 :
2687 354970 : addHyperLogLog(&sss->full_card, hash);
2688 :
2689 : /* Hash abbreviated key */
2690 : #if SIZEOF_DATUM == 8
2691 : {
2692 : uint32 lohalf,
2693 : hihalf;
2694 :
2695 354970 : lohalf = (uint32) res;
2696 354970 : hihalf = (uint32) (res >> 32);
2697 354970 : hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2698 : }
2699 : #else /* SIZEOF_DATUM != 8 */
2700 : hash = DatumGetUInt32(hash_uint32((uint32) res));
2701 : #endif
2702 :
2703 354970 : addHyperLogLog(&sss->abbr_card, hash);
2704 :
2705 : /* Cache result, perhaps saving an expensive strxfrm() call next time */
2706 354970 : sss->cache_blob = true;
2707 354970 : done:
2708 :
2709 : /*
2710 : * Byteswap on little-endian machines.
2711 : *
2712 : * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2713 : * comparator) works correctly on all platforms. If we didn't do this,
2714 : * the comparator would have to call memcmp() with a pair of pointers to
2715 : * the first byte of each abbreviated key, which is slower.
2716 : */
2717 354970 : res = DatumBigEndianToNative(res);
2718 :
2719 : /* Don't leak memory here */
2720 354970 : if (PointerGetDatum(authoritative) != original)
2721 0 : pfree(authoritative);
2722 :
2723 354970 : return res;
2724 : }
2725 :
2726 : /*
2727 : * Callback for estimating effectiveness of abbreviated key optimization, using
2728 : * heuristic rules. Returns value indicating if the abbreviation optimization
2729 : * should be aborted, based on its projected effectiveness.
2730 : */
2731 : static bool
2732 964 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2733 : {
2734 964 : VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2735 : double abbrev_distinct,
2736 : key_distinct;
2737 :
2738 : Assert(ssup->abbreviate);
2739 :
2740 : /* Have a little patience */
2741 964 : if (memtupcount < 100)
2742 464 : return false;
2743 :
2744 500 : abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2745 500 : key_distinct = estimateHyperLogLog(&sss->full_card);
2746 :
2747 : /*
2748 : * Clamp cardinality estimates to at least one distinct value. While
2749 : * NULLs are generally disregarded, if only NULL values were seen so far,
2750 : * that might misrepresent costs if we failed to clamp.
2751 : */
2752 500 : if (abbrev_distinct <= 1.0)
2753 0 : abbrev_distinct = 1.0;
2754 :
2755 500 : if (key_distinct <= 1.0)
2756 0 : key_distinct = 1.0;
2757 :
2758 : /*
2759 : * In the worst case all abbreviated keys are identical, while at the same
2760 : * time there are differences within full key strings not captured in
2761 : * abbreviations.
2762 : */
2763 : #ifdef TRACE_SORT
2764 500 : if (trace_sort)
2765 : {
2766 0 : double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2767 :
2768 0 : elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2769 : "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2770 : memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2771 : sss->prop_card);
2772 : }
2773 : #endif
2774 :
2775 : /*
2776 : * If the number of distinct abbreviated keys approximately matches the
2777 : * number of distinct authoritative original keys, that's reason enough to
2778 : * proceed. We can win even with a very low cardinality set if most
2779 : * tie-breakers only memcmp(). This is by far the most important
2780 : * consideration.
2781 : *
2782 : * While comparisons that are resolved at the abbreviated key level are
2783 : * considerably cheaper than tie-breakers resolved with memcmp(), both of
2784 : * those two outcomes are so much cheaper than a full strcoll() once
2785 : * sorting is underway that it doesn't seem worth it to weigh abbreviated
2786 : * cardinality against the overall size of the set in order to more
2787 : * accurately model costs. Assume that an abbreviated comparison, and an
2788 : * abbreviated comparison with a cheap memcmp()-based authoritative
2789 : * resolution are equivalent.
2790 : */
2791 500 : if (abbrev_distinct > key_distinct * sss->prop_card)
2792 : {
2793 : /*
2794 : * When we have exceeded 10,000 tuples, decay required cardinality
2795 : * aggressively for next call.
2796 : *
2797 : * This is useful because the number of comparisons required on
2798 : * average increases at a linearithmic rate, and at roughly 10,000
2799 : * tuples that factor will start to dominate over the linear costs of
2800 : * string transformation (this is a conservative estimate). The decay
2801 : * rate is chosen to be a little less aggressive than halving -- which
2802 : * (since we're called at points at which memtupcount has doubled)
2803 : * would never see the cost model actually abort past the first call
2804 : * following a decay. This decay rate is mostly a precaution against
2805 : * a sudden, violent swing in how well abbreviated cardinality tracks
2806 : * full key cardinality. The decay also serves to prevent a marginal
2807 : * case from being aborted too late, when too much has already been
2808 : * invested in string transformation.
2809 : *
2810 : * It's possible for sets of several million distinct strings with
2811 : * mere tens of thousands of distinct abbreviated keys to still
2812 : * benefit very significantly. This will generally occur provided
2813 : * each abbreviated key is a proxy for a roughly uniform number of the
2814 : * set's full keys. If it isn't so, we hope to catch that early and
2815 : * abort. If it isn't caught early, by the time the problem is
2816 : * apparent it's probably not worth aborting.
2817 : */
2818 500 : if (memtupcount > 10000)
2819 0 : sss->prop_card *= 0.65;
2820 :
2821 500 : return false;
2822 : }
2823 :
2824 : /*
2825 : * Abort abbreviation strategy.
2826 : *
2827 : * The worst case, where all abbreviated keys are identical while all
2828 : * original strings differ will typically only see a regression of about
2829 : * 10% in execution time for small to medium sized lists of strings.
2830 : * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2831 : * often expect very large improvements, particularly with sets of strings
2832 : * of moderately high to high abbreviated cardinality. There is little to
2833 : * lose but much to gain, which our strategy reflects.
2834 : */
2835 : #ifdef TRACE_SORT
2836 0 : if (trace_sort)
2837 0 : elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2838 : "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2839 : memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2840 : #endif
2841 :
2842 0 : return true;
2843 : }
2844 :
2845 : /*
2846 : * Generic equalimage support function for character type's operator classes.
2847 : * Disables the use of deduplication with nondeterministic collations.
2848 : */
2849 : Datum
2850 1906 : btvarstrequalimage(PG_FUNCTION_ARGS)
2851 : {
2852 : /* Oid opcintype = PG_GETARG_OID(0); */
2853 1906 : Oid collid = PG_GET_COLLATION();
2854 :
2855 1906 : check_collation_set(collid);
2856 :
2857 1906 : if (lc_collate_is_c(collid) ||
2858 0 : collid == DEFAULT_COLLATION_OID ||
2859 0 : get_collation_isdeterministic(collid))
2860 1906 : PG_RETURN_BOOL(true);
2861 : else
2862 0 : PG_RETURN_BOOL(false);
2863 : }
2864 :
2865 : Datum
2866 137394 : text_larger(PG_FUNCTION_ARGS)
2867 : {
2868 137394 : text *arg1 = PG_GETARG_TEXT_PP(0);
2869 137394 : text *arg2 = PG_GETARG_TEXT_PP(1);
2870 : text *result;
2871 :
2872 137394 : result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2873 :
2874 137394 : PG_RETURN_TEXT_P(result);
2875 : }
2876 :
2877 : Datum
2878 41072 : text_smaller(PG_FUNCTION_ARGS)
2879 : {
2880 41072 : text *arg1 = PG_GETARG_TEXT_PP(0);
2881 41072 : text *arg2 = PG_GETARG_TEXT_PP(1);
2882 : text *result;
2883 :
2884 41072 : result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2885 :
2886 41072 : PG_RETURN_TEXT_P(result);
2887 : }
2888 :
2889 :
2890 : /*
2891 : * Cross-type comparison functions for types text and name.
2892 : */
2893 :
2894 : Datum
2895 105138 : nameeqtext(PG_FUNCTION_ARGS)
2896 : {
2897 105138 : Name arg1 = PG_GETARG_NAME(0);
2898 105138 : text *arg2 = PG_GETARG_TEXT_PP(1);
2899 105138 : size_t len1 = strlen(NameStr(*arg1));
2900 105138 : size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2901 105138 : Oid collid = PG_GET_COLLATION();
2902 : bool result;
2903 :
2904 105138 : check_collation_set(collid);
2905 :
2906 105138 : if (collid == C_COLLATION_OID)
2907 181814 : result = (len1 == len2 &&
2908 85452 : memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2909 : else
2910 8776 : result = (varstr_cmp(NameStr(*arg1), len1,
2911 8776 : VARDATA_ANY(arg2), len2,
2912 : collid) == 0);
2913 :
2914 105138 : PG_FREE_IF_COPY(arg2, 1);
2915 :
2916 105138 : PG_RETURN_BOOL(result);
2917 : }
2918 :
2919 : Datum
2920 256 : texteqname(PG_FUNCTION_ARGS)
2921 : {
2922 256 : text *arg1 = PG_GETARG_TEXT_PP(0);
2923 256 : Name arg2 = PG_GETARG_NAME(1);
2924 256 : size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2925 256 : size_t len2 = strlen(NameStr(*arg2));
2926 256 : Oid collid = PG_GET_COLLATION();
2927 : bool result;
2928 :
2929 256 : check_collation_set(collid);
2930 :
2931 256 : if (collid == C_COLLATION_OID)
2932 376 : result = (len1 == len2 &&
2933 120 : memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2934 : else
2935 0 : result = (varstr_cmp(VARDATA_ANY(arg1), len1,
2936 0 : NameStr(*arg2), len2,
2937 : collid) == 0);
2938 :
2939 256 : PG_FREE_IF_COPY(arg1, 0);
2940 :
2941 256 : PG_RETURN_BOOL(result);
2942 : }
2943 :
2944 : Datum
2945 0 : namenetext(PG_FUNCTION_ARGS)
2946 : {
2947 0 : Name arg1 = PG_GETARG_NAME(0);
2948 0 : text *arg2 = PG_GETARG_TEXT_PP(1);
2949 0 : size_t len1 = strlen(NameStr(*arg1));
2950 0 : size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2951 0 : Oid collid = PG_GET_COLLATION();
2952 : bool result;
2953 :
2954 0 : check_collation_set(collid);
2955 :
2956 0 : if (collid == C_COLLATION_OID)
2957 0 : result = !(len1 == len2 &&
2958 0 : memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2959 : else
2960 0 : result = !(varstr_cmp(NameStr(*arg1), len1,
2961 0 : VARDATA_ANY(arg2), len2,
2962 : collid) == 0);
2963 :
2964 0 : PG_FREE_IF_COPY(arg2, 1);
2965 :
2966 0 : PG_RETURN_BOOL(result);
2967 : }
2968 :
2969 : Datum
2970 0 : textnename(PG_FUNCTION_ARGS)
2971 : {
2972 0 : text *arg1 = PG_GETARG_TEXT_PP(0);
2973 0 : Name arg2 = PG_GETARG_NAME(1);
2974 0 : size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2975 0 : size_t len2 = strlen(NameStr(*arg2));
2976 0 : Oid collid = PG_GET_COLLATION();
2977 : bool result;
2978 :
2979 0 : check_collation_set(collid);
2980 :
2981 0 : if (collid == C_COLLATION_OID)
2982 0 : result = !(len1 == len2 &&
2983 0 : memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2984 : else
2985 0 : result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
2986 0 : NameStr(*arg2), len2,
2987 : collid) == 0);
2988 :
2989 0 : PG_FREE_IF_COPY(arg1, 0);
2990 :
2991 0 : PG_RETURN_BOOL(result);
2992 : }
2993 :
2994 : Datum
2995 70956 : btnametextcmp(PG_FUNCTION_ARGS)
2996 : {
2997 70956 : Name arg1 = PG_GETARG_NAME(0);
2998 70956 : text *arg2 = PG_GETARG_TEXT_PP(1);
2999 : int32 result;
3000 :
3001 141912 : result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
3002 141912 : VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
3003 : PG_GET_COLLATION());
3004 :
3005 70956 : PG_FREE_IF_COPY(arg2, 1);
3006 :
3007 70956 : PG_RETURN_INT32(result);
3008 : }
3009 :
3010 : Datum
3011 0 : bttextnamecmp(PG_FUNCTION_ARGS)
3012 : {
3013 0 : text *arg1 = PG_GETARG_TEXT_PP(0);
3014 0 : Name arg2 = PG_GETARG_NAME(1);
3015 : int32 result;
3016 :
3017 0 : result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
3018 0 : NameStr(*arg2), strlen(NameStr(*arg2)),
3019 : PG_GET_COLLATION());
3020 :
3021 0 : PG_FREE_IF_COPY(arg1, 0);
3022 :
3023 0 : PG_RETURN_INT32(result);
3024 : }
3025 :
3026 : #define CmpCall(cmpfunc) \
3027 : DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
3028 : PG_GET_COLLATION(), \
3029 : PG_GETARG_DATUM(0), \
3030 : PG_GETARG_DATUM(1)))
3031 :
3032 : Datum
3033 21838 : namelttext(PG_FUNCTION_ARGS)
3034 : {
3035 21838 : PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
3036 : }
3037 :
3038 : Datum
3039 0 : nameletext(PG_FUNCTION_ARGS)
3040 : {
3041 0 : PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
3042 : }
3043 :
3044 : Datum
3045 0 : namegttext(PG_FUNCTION_ARGS)
3046 : {
3047 0 : PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
3048 : }
3049 :
3050 : Datum
3051 20506 : namegetext(PG_FUNCTION_ARGS)
3052 : {
3053 20506 : PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
3054 : }
3055 :
3056 : Datum
3057 0 : textltname(PG_FUNCTION_ARGS)
3058 : {
3059 0 : PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
3060 : }
3061 :
3062 : Datum
3063 0 : textlename(PG_FUNCTION_ARGS)
3064 : {
3065 0 : PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
3066 : }
3067 :
3068 : Datum
3069 0 : textgtname(PG_FUNCTION_ARGS)
3070 : {
3071 0 : PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
3072 : }
3073 :
3074 : Datum
3075 0 : textgename(PG_FUNCTION_ARGS)
3076 : {
3077 0 : PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
3078 : }
3079 :
3080 : #undef CmpCall
3081 :
3082 :
3083 : /*
3084 : * The following operators support character-by-character comparison
3085 : * of text datums, to allow building indexes suitable for LIKE clauses.
3086 : * Note that the regular texteq/textne comparison operators, and regular
3087 : * support functions 1 and 2 with "C" collation are assumed to be
3088 : * compatible with these!
3089 : */
3090 :
3091 : static int
3092 101392 : internal_text_pattern_compare(text *arg1, text *arg2)
3093 : {
3094 : int result;
3095 : int len1,
3096 : len2;
3097 :
3098 101392 : len1 = VARSIZE_ANY_EXHDR(arg1);
3099 101392 : len2 = VARSIZE_ANY_EXHDR(arg2);
3100 :
3101 101392 : result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3102 101392 : if (result != 0)
3103 101356 : return result;
3104 36 : else if (len1 < len2)
3105 0 : return -1;
3106 36 : else if (len1 > len2)
3107 12 : return 1;
3108 : else
3109 24 : return 0;
3110 : }
3111 :
3112 :
3113 : Datum
3114 26360 : text_pattern_lt(PG_FUNCTION_ARGS)
3115 : {
3116 26360 : text *arg1 = PG_GETARG_TEXT_PP(0);
3117 26360 : text *arg2 = PG_GETARG_TEXT_PP(1);
3118 : int result;
3119 :
3120 26360 : result = internal_text_pattern_compare(arg1, arg2);
3121 :
3122 26360 : PG_FREE_IF_COPY(arg1, 0);
3123 26360 : PG_FREE_IF_COPY(arg2, 1);
3124 :
3125 26360 : PG_RETURN_BOOL(result < 0);
3126 : }
3127 :
3128 :
3129 : Datum
3130 25008 : text_pattern_le(PG_FUNCTION_ARGS)
3131 : {
3132 25008 : text *arg1 = PG_GETARG_TEXT_PP(0);
3133 25008 : text *arg2 = PG_GETARG_TEXT_PP(1);
3134 : int result;
3135 :
3136 25008 : result = internal_text_pattern_compare(arg1, arg2);
3137 :
3138 25008 : PG_FREE_IF_COPY(arg1, 0);
3139 25008 : PG_FREE_IF_COPY(arg2, 1);
3140 :
3141 25008 : PG_RETURN_BOOL(result <= 0);
3142 : }
3143 :
3144 :
3145 : Datum
3146 25008 : text_pattern_ge(PG_FUNCTION_ARGS)
3147 : {
3148 25008 : text *arg1 = PG_GETARG_TEXT_PP(0);
3149 25008 : text *arg2 = PG_GETARG_TEXT_PP(1);
3150 : int result;
3151 :
3152 25008 : result = internal_text_pattern_compare(arg1, arg2);
3153 :
3154 25008 : PG_FREE_IF_COPY(arg1, 0);
3155 25008 : PG_FREE_IF_COPY(arg2, 1);
3156 :
3157 25008 : PG_RETURN_BOOL(result >= 0);
3158 : }
3159 :
3160 :
3161 : Datum
3162 25008 : text_pattern_gt(PG_FUNCTION_ARGS)
3163 : {
3164 25008 : text *arg1 = PG_GETARG_TEXT_PP(0);
3165 25008 : text *arg2 = PG_GETARG_TEXT_PP(1);
3166 : int result;
3167 :
3168 25008 : result = internal_text_pattern_compare(arg1, arg2);
3169 :
3170 25008 : PG_FREE_IF_COPY(arg1, 0);
3171 25008 : PG_FREE_IF_COPY(arg2, 1);
3172 :
3173 25008 : PG_RETURN_BOOL(result > 0);
3174 : }
3175 :
3176 :
3177 : Datum
3178 8 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
3179 : {
3180 8 : text *arg1 = PG_GETARG_TEXT_PP(0);
3181 8 : text *arg2 = PG_GETARG_TEXT_PP(1);
3182 : int result;
3183 :
3184 8 : result = internal_text_pattern_compare(arg1, arg2);
3185 :
3186 8 : PG_FREE_IF_COPY(arg1, 0);
3187 8 : PG_FREE_IF_COPY(arg2, 1);
3188 :
3189 8 : PG_RETURN_INT32(result);
3190 : }
3191 :
3192 :
3193 : Datum
3194 78 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
3195 : {
3196 78 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
3197 : MemoryContext oldcontext;
3198 :
3199 78 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3200 :
3201 : /* Use generic string SortSupport, forcing "C" collation */
3202 78 : varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
3203 :
3204 78 : MemoryContextSwitchTo(oldcontext);
3205 :
3206 78 : PG_RETURN_VOID();
3207 : }
3208 :
3209 :
3210 : /*-------------------------------------------------------------
3211 : * byteaoctetlen
3212 : *
3213 : * get the number of bytes contained in an instance of type 'bytea'
3214 : *-------------------------------------------------------------
3215 : */
3216 : Datum
3217 26 : byteaoctetlen(PG_FUNCTION_ARGS)
3218 : {
3219 26 : Datum str = PG_GETARG_DATUM(0);
3220 :
3221 : /* We need not detoast the input at all */
3222 26 : PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
3223 : }
3224 :
3225 : /*
3226 : * byteacat -
3227 : * takes two bytea* and returns a bytea* that is the concatenation of
3228 : * the two.
3229 : *
3230 : * Cloned from textcat and modified as required.
3231 : */
3232 : Datum
3233 0 : byteacat(PG_FUNCTION_ARGS)
3234 : {
3235 0 : bytea *t1 = PG_GETARG_BYTEA_PP(0);
3236 0 : bytea *t2 = PG_GETARG_BYTEA_PP(1);
3237 :
3238 0 : PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
3239 : }
3240 :
3241 : /*
3242 : * bytea_catenate
3243 : * Guts of byteacat(), broken out so it can be used by other functions
3244 : *
3245 : * Arguments can be in short-header form, but not compressed or out-of-line
3246 : */
3247 : static bytea *
3248 24 : bytea_catenate(bytea *t1, bytea *t2)
3249 : {
3250 : bytea *result;
3251 : int len1,
3252 : len2,
3253 : len;
3254 : char *ptr;
3255 :
3256 24 : len1 = VARSIZE_ANY_EXHDR(t1);
3257 24 : len2 = VARSIZE_ANY_EXHDR(t2);
3258 :
3259 : /* paranoia ... probably should throw error instead? */
3260 24 : if (len1 < 0)
3261 0 : len1 = 0;
3262 24 : if (len2 < 0)
3263 0 : len2 = 0;
3264 :
3265 24 : len = len1 + len2 + VARHDRSZ;
3266 24 : result = (bytea *) palloc(len);
3267 :
3268 : /* Set size of result string... */
3269 24 : SET_VARSIZE(result, len);
3270 :
3271 : /* Fill data field of result string... */
3272 24 : ptr = VARDATA(result);
3273 24 : if (len1 > 0)
3274 24 : memcpy(ptr, VARDATA_ANY(t1), len1);
3275 24 : if (len2 > 0)
3276 12 : memcpy(ptr + len1, VARDATA_ANY(t2), len2);
3277 :
3278 24 : return result;
3279 : }
3280 :
3281 : #define PG_STR_GET_BYTEA(str_) \
3282 : DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
3283 :
3284 : /*
3285 : * bytea_substr()
3286 : * Return a substring starting at the specified position.
3287 : * Cloned from text_substr and modified as required.
3288 : *
3289 : * Input:
3290 : * - string
3291 : * - starting position (is one-based)
3292 : * - string length (optional)
3293 : *
3294 : * If the starting position is zero or less, then return from the start of the string
3295 : * adjusting the length to be consistent with the "negative start" per SQL.
3296 : * If the length is less than zero, an ERROR is thrown. If no third argument
3297 : * (length) is provided, the length to the end of the string is assumed.
3298 : */
3299 : Datum
3300 52 : bytea_substr(PG_FUNCTION_ARGS)
3301 : {
3302 52 : PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
3303 : PG_GETARG_INT32(1),
3304 : PG_GETARG_INT32(2),
3305 : false));
3306 : }
3307 :
3308 : /*
3309 : * bytea_substr_no_len -
3310 : * Wrapper to avoid opr_sanity failure due to
3311 : * one function accepting a different number of args.
3312 : */
3313 : Datum
3314 20 : bytea_substr_no_len(PG_FUNCTION_ARGS)
3315 : {
3316 20 : PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
3317 : PG_GETARG_INT32(1),
3318 : -1,
3319 : true));
3320 : }
3321 :
3322 : static bytea *
3323 96 : bytea_substring(Datum str,
3324 : int S,
3325 : int L,
3326 : bool length_not_specified)
3327 : {
3328 : int32 S1; /* adjusted start position */
3329 : int32 L1; /* adjusted substring length */
3330 : int32 E; /* end position */
3331 :
3332 : /*
3333 : * The logic here should generally match text_substring().
3334 : */
3335 96 : S1 = Max(S, 1);
3336 :
3337 96 : if (length_not_specified)
3338 : {
3339 : /*
3340 : * Not passed a length - DatumGetByteaPSlice() grabs everything to the
3341 : * end of the string if we pass it a negative value for length.
3342 : */
3343 32 : L1 = -1;
3344 : }
3345 64 : else if (L < 0)
3346 : {
3347 : /* SQL99 says to throw an error for E < S, i.e., negative length */
3348 8 : ereport(ERROR,
3349 : (errcode(ERRCODE_SUBSTRING_ERROR),
3350 : errmsg("negative substring length not allowed")));
3351 : L1 = -1; /* silence stupider compilers */
3352 : }
3353 56 : else if (pg_add_s32_overflow(S, L, &E))
3354 : {
3355 : /*
3356 : * L could be large enough for S + L to overflow, in which case the
3357 : * substring must run to end of string.
3358 : */
3359 4 : L1 = -1;
3360 : }
3361 : else
3362 : {
3363 : /*
3364 : * A zero or negative value for the end position can happen if the
3365 : * start was negative or one. SQL99 says to return a zero-length
3366 : * string.
3367 : */
3368 52 : if (E < 1)
3369 0 : return PG_STR_GET_BYTEA("");
3370 :
3371 52 : L1 = E - S1;
3372 : }
3373 :
3374 : /*
3375 : * If the start position is past the end of the string, SQL99 says to
3376 : * return a zero-length string -- DatumGetByteaPSlice() will do that for
3377 : * us. We need only convert S1 to zero-based starting position.
3378 : */
3379 88 : return DatumGetByteaPSlice(str, S1 - 1, L1);
3380 : }
3381 :
3382 : /*
3383 : * byteaoverlay
3384 : * Replace specified substring of first string with second
3385 : *
3386 : * The SQL standard defines OVERLAY() in terms of substring and concatenation.
3387 : * This code is a direct implementation of what the standard says.
3388 : */
3389 : Datum
3390 4 : byteaoverlay(PG_FUNCTION_ARGS)
3391 : {
3392 4 : bytea *t1 = PG_GETARG_BYTEA_PP(0);
3393 4 : bytea *t2 = PG_GETARG_BYTEA_PP(1);
3394 4 : int sp = PG_GETARG_INT32(2); /* substring start position */
3395 4 : int sl = PG_GETARG_INT32(3); /* substring length */
3396 :
3397 4 : PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3398 : }
3399 :
3400 : Datum
3401 8 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
3402 : {
3403 8 : bytea *t1 = PG_GETARG_BYTEA_PP(0);
3404 8 : bytea *t2 = PG_GETARG_BYTEA_PP(1);
3405 8 : int sp = PG_GETARG_INT32(2); /* substring start position */
3406 : int sl;
3407 :
3408 8 : sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
3409 8 : PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3410 : }
3411 :
3412 : static bytea *
3413 12 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
3414 : {
3415 : bytea *result;
3416 : bytea *s1;
3417 : bytea *s2;
3418 : int sp_pl_sl;
3419 :
3420 : /*
3421 : * Check for possible integer-overflow cases. For negative sp, throw a
3422 : * "substring length" error because that's what should be expected
3423 : * according to the spec's definition of OVERLAY().
3424 : */
3425 12 : if (sp <= 0)
3426 0 : ereport(ERROR,
3427 : (errcode(ERRCODE_SUBSTRING_ERROR),
3428 : errmsg("negative substring length not allowed")));
3429 12 : if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
3430 0 : ereport(ERROR,
3431 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
3432 : errmsg("integer out of range")));
3433 :
3434 12 : s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
3435 12 : s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
3436 12 : result = bytea_catenate(s1, t2);
3437 12 : result = bytea_catenate(result, s2);
3438 :
3439 12 : return result;
3440 : }
3441 :
3442 : /*
3443 : * byteapos -
3444 : * Return the position of the specified substring.
3445 : * Implements the SQL POSITION() function.
3446 : * Cloned from textpos and modified as required.
3447 : */
3448 : Datum
3449 0 : byteapos(PG_FUNCTION_ARGS)
3450 : {
3451 0 : bytea *t1 = PG_GETARG_BYTEA_PP(0);
3452 0 : bytea *t2 = PG_GETARG_BYTEA_PP(1);
3453 : int pos;
3454 : int px,
3455 : p;
3456 : int len1,
3457 : len2;
3458 : char *p1,
3459 : *p2;
3460 :
3461 0 : len1 = VARSIZE_ANY_EXHDR(t1);
3462 0 : len2 = VARSIZE_ANY_EXHDR(t2);
3463 :
3464 0 : if (len2 <= 0)
3465 0 : PG_RETURN_INT32(1); /* result for empty pattern */
3466 :
3467 0 : p1 = VARDATA_ANY(t1);
3468 0 : p2 = VARDATA_ANY(t2);
3469 :
3470 0 : pos = 0;
3471 0 : px = (len1 - len2);
3472 0 : for (p = 0; p <= px; p++)
3473 : {
3474 0 : if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
3475 : {
3476 0 : pos = p + 1;
3477 0 : break;
3478 : };
3479 0 : p1++;
3480 : };
3481 :
3482 0 : PG_RETURN_INT32(pos);
3483 : }
3484 :
3485 : /*-------------------------------------------------------------
3486 : * byteaGetByte
3487 : *
3488 : * this routine treats "bytea" as an array of bytes.
3489 : * It returns the Nth byte (a number between 0 and 255).
3490 : *-------------------------------------------------------------
3491 : */
3492 : Datum
3493 8 : byteaGetByte(PG_FUNCTION_ARGS)
3494 : {
3495 8 : bytea *v = PG_GETARG_BYTEA_PP(0);
3496 8 : int32 n = PG_GETARG_INT32(1);
3497 : int len;
3498 : int byte;
3499 :
3500 8 : len = VARSIZE_ANY_EXHDR(v);
3501 :
3502 8 : if (n < 0 || n >= len)
3503 4 : ereport(ERROR,
3504 : (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3505 : errmsg("index %d out of valid range, 0..%d",
3506 : n, len - 1)));
3507 :
3508 4 : byte = ((unsigned char *) VARDATA_ANY(v))[n];
3509 :
3510 4 : PG_RETURN_INT32(byte);
3511 : }
3512 :
3513 : /*-------------------------------------------------------------
3514 : * byteaGetBit
3515 : *
3516 : * This routine treats a "bytea" type like an array of bits.
3517 : * It returns the value of the Nth bit (0 or 1).
3518 : *
3519 : *-------------------------------------------------------------
3520 : */
3521 : Datum
3522 8 : byteaGetBit(PG_FUNCTION_ARGS)
3523 : {
3524 8 : bytea *v = PG_GETARG_BYTEA_PP(0);
3525 8 : int64 n = PG_GETARG_INT64(1);
3526 : int byteNo,
3527 : bitNo;
3528 : int len;
3529 : int byte;
3530 :
3531 8 : len = VARSIZE_ANY_EXHDR(v);
3532 :
3533 8 : if (n < 0 || n >= (int64) len * 8)
3534 4 : ereport(ERROR,
3535 : (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3536 : errmsg("index %lld out of valid range, 0..%lld",
3537 : (long long) n, (long long) len * 8 - 1)));
3538 :
3539 : /* n/8 is now known < len, so safe to cast to int */
3540 4 : byteNo = (int) (n / 8);
3541 4 : bitNo = (int) (n % 8);
3542 :
3543 4 : byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3544 :
3545 4 : if (byte & (1 << bitNo))
3546 4 : PG_RETURN_INT32(1);
3547 : else
3548 0 : PG_RETURN_INT32(0);
3549 : }
3550 :
3551 : /*-------------------------------------------------------------
3552 : * byteaSetByte
3553 : *
3554 : * Given an instance of type 'bytea' creates a new one with
3555 : * the Nth byte set to the given value.
3556 : *
3557 : *-------------------------------------------------------------
3558 : */
3559 : Datum
3560 8 : byteaSetByte(PG_FUNCTION_ARGS)
3561 : {
3562 8 : bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3563 8 : int32 n = PG_GETARG_INT32(1);
3564 8 : int32 newByte = PG_GETARG_INT32(2);
3565 : int len;
3566 :
3567 8 : len = VARSIZE(res) - VARHDRSZ;
3568 :
3569 8 : if (n < 0 || n >= len)
3570 4 : ereport(ERROR,
3571 : (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3572 : errmsg("index %d out of valid range, 0..%d",
3573 : n, len - 1)));
3574 :
3575 : /*
3576 : * Now set the byte.
3577 : */
3578 4 : ((unsigned char *) VARDATA(res))[n] = newByte;
3579 :
3580 4 : PG_RETURN_BYTEA_P(res);
3581 : }
3582 :
3583 : /*-------------------------------------------------------------
3584 : * byteaSetBit
3585 : *
3586 : * Given an instance of type 'bytea' creates a new one with
3587 : * the Nth bit set to the given value.
3588 : *
3589 : *-------------------------------------------------------------
3590 : */
3591 : Datum
3592 8 : byteaSetBit(PG_FUNCTION_ARGS)
3593 : {
3594 8 : bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3595 8 : int64 n = PG_GETARG_INT64(1);
3596 8 : int32 newBit = PG_GETARG_INT32(2);
3597 : int len;
3598 : int oldByte,
3599 : newByte;
3600 : int byteNo,
3601 : bitNo;
3602 :
3603 8 : len = VARSIZE(res) - VARHDRSZ;
3604 :
3605 8 : if (n < 0 || n >= (int64) len * 8)
3606 4 : ereport(ERROR,
3607 : (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3608 : errmsg("index %lld out of valid range, 0..%lld",
3609 : (long long) n, (long long) len * 8 - 1)));
3610 :
3611 : /* n/8 is now known < len, so safe to cast to int */
3612 4 : byteNo = (int) (n / 8);
3613 4 : bitNo = (int) (n % 8);
3614 :
3615 : /*
3616 : * sanity check!
3617 : */
3618 4 : if (newBit != 0 && newBit != 1)
3619 0 : ereport(ERROR,
3620 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3621 : errmsg("new bit must be 0 or 1")));
3622 :
3623 : /*
3624 : * Update the byte.
3625 : */
3626 4 : oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3627 :
3628 4 : if (newBit == 0)
3629 4 : newByte = oldByte & (~(1 << bitNo));
3630 : else
3631 0 : newByte = oldByte | (1 << bitNo);
3632 :
3633 4 : ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3634 :
3635 4 : PG_RETURN_BYTEA_P(res);
3636 : }
3637 :
3638 :
3639 : /* text_name()
3640 : * Converts a text type to a Name type.
3641 : */
3642 : Datum
3643 2882 : text_name(PG_FUNCTION_ARGS)
3644 : {
3645 2882 : text *s = PG_GETARG_TEXT_PP(0);
3646 : Name result;
3647 : int len;
3648 :
3649 2882 : len = VARSIZE_ANY_EXHDR(s);
3650 :
3651 : /* Truncate oversize input */
3652 2882 : if (len >= NAMEDATALEN)
3653 4 : len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3654 :
3655 : /* We use palloc0 here to ensure result is zero-padded */
3656 2882 : result = (Name) palloc0(NAMEDATALEN);
3657 2882 : memcpy(NameStr(*result), VARDATA_ANY(s), len);
3658 :
3659 2882 : PG_RETURN_NAME(result);
3660 : }
3661 :
3662 : /* name_text()
3663 : * Converts a Name type to a text type.
3664 : */
3665 : Datum
3666 576696 : name_text(PG_FUNCTION_ARGS)
3667 : {
3668 576696 : Name s = PG_GETARG_NAME(0);
3669 :
3670 576696 : PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
3671 : }
3672 :
3673 :
3674 : /*
3675 : * textToQualifiedNameList - convert a text object to list of names
3676 : *
3677 : * This implements the input parsing needed by nextval() and other
3678 : * functions that take a text parameter representing a qualified name.
3679 : * We split the name at dots, downcase if not double-quoted, and
3680 : * truncate names if they're too long.
3681 : */
3682 : List *
3683 878 : textToQualifiedNameList(text *textval)
3684 : {
3685 : char *rawname;
3686 878 : List *result = NIL;
3687 : List *namelist;
3688 : ListCell *l;
3689 :
3690 : /* Convert to C string (handles possible detoasting). */
3691 : /* Note we rely on being able to modify rawname below. */
3692 878 : rawname = text_to_cstring(textval);
3693 :
3694 878 : if (!SplitIdentifierString(rawname, '.', &namelist))
3695 0 : ereport(ERROR,
3696 : (errcode(ERRCODE_INVALID_NAME),
3697 : errmsg("invalid name syntax")));
3698 :
3699 878 : if (namelist == NIL)
3700 0 : ereport(ERROR,
3701 : (errcode(ERRCODE_INVALID_NAME),
3702 : errmsg("invalid name syntax")));
3703 :
3704 1832 : foreach(l, namelist)
3705 : {
3706 954 : char *curname = (char *) lfirst(l);
3707 :
3708 954 : result = lappend(result, makeString(pstrdup(curname)));
3709 : }
3710 :
3711 878 : pfree(rawname);
3712 878 : list_free(namelist);
3713 :
3714 878 : return result;
3715 : }
3716 :
3717 : /*
3718 : * SplitIdentifierString --- parse a string containing identifiers
3719 : *
3720 : * This is the guts of textToQualifiedNameList, and is exported for use in
3721 : * other situations such as parsing GUC variables. In the GUC case, it's
3722 : * important to avoid memory leaks, so the API is designed to minimize the
3723 : * amount of stuff that needs to be allocated and freed.
3724 : *
3725 : * Inputs:
3726 : * rawstring: the input string; must be overwritable! On return, it's
3727 : * been modified to contain the separated identifiers.
3728 : * separator: the separator punctuation expected between identifiers
3729 : * (typically '.' or ','). Whitespace may also appear around
3730 : * identifiers.
3731 : * Outputs:
3732 : * namelist: filled with a palloc'd list of pointers to identifiers within
3733 : * rawstring. Caller should list_free() this even on error return.
3734 : *
3735 : * Returns true if okay, false if there is a syntax error in the string.
3736 : *
3737 : * Note that an empty string is considered okay here, though not in
3738 : * textToQualifiedNameList.
3739 : */
3740 : bool
3741 89640 : SplitIdentifierString(char *rawstring, char separator,
3742 : List **namelist)
3743 : {
3744 89640 : char *nextp = rawstring;
3745 89640 : bool done = false;
3746 :
3747 89640 : *namelist = NIL;
3748 :
3749 89640 : while (scanner_isspace(*nextp))
3750 0 : nextp++; /* skip leading whitespace */
3751 :
3752 89640 : if (*nextp == '\0')
3753 9672 : return true; /* allow empty string */
3754 :
3755 : /* At the top of the loop, we are at start of a new identifier. */
3756 : do
3757 : {
3758 : char *curname;
3759 : char *endp;
3760 :
3761 130938 : if (*nextp == '"')
3762 : {
3763 : /* Quoted name --- collapse quote-quote pairs, no downcasing */
3764 17178 : curname = nextp + 1;
3765 : for (;;)
3766 : {
3767 17182 : endp = strchr(nextp + 1, '"');
3768 17180 : if (endp == NULL)
3769 0 : return false; /* mismatched quotes */
3770 17180 : if (endp[1] != '"')
3771 17178 : break; /* found end of quoted name */
3772 : /* Collapse adjacent quotes into one quote, and look again */
3773 2 : memmove(endp, endp + 1, strlen(endp));
3774 2 : nextp = endp;
3775 : }
3776 : /* endp now points at the terminating quote */
3777 17178 : nextp = endp + 1;
3778 : }
3779 : else
3780 : {
3781 : /* Unquoted name --- extends to separator or whitespace */
3782 : char *downname;
3783 : int len;
3784 :
3785 113760 : curname = nextp;
3786 1012004 : while (*nextp && *nextp != separator &&
3787 898246 : !scanner_isspace(*nextp))
3788 898244 : nextp++;
3789 113760 : endp = nextp;
3790 113760 : if (curname == nextp)
3791 0 : return false; /* empty unquoted name not allowed */
3792 :
3793 : /*
3794 : * Downcase the identifier, using same code as main lexer does.
3795 : *
3796 : * XXX because we want to overwrite the input in-place, we cannot
3797 : * support a downcasing transformation that increases the string
3798 : * length. This is not a problem given the current implementation
3799 : * of downcase_truncate_identifier, but we'll probably have to do
3800 : * something about this someday.
3801 : */
3802 113760 : len = endp - curname;
3803 113760 : downname = downcase_truncate_identifier(curname, len, false);
3804 : Assert(strlen(downname) <= len);
3805 113760 : strncpy(curname, downname, len); /* strncpy is required here */
3806 113760 : pfree(downname);
3807 : }
3808 :
3809 130940 : while (scanner_isspace(*nextp))
3810 2 : nextp++; /* skip trailing whitespace */
3811 :
3812 130938 : if (*nextp == separator)
3813 : {
3814 50970 : nextp++;
3815 76786 : while (scanner_isspace(*nextp))
3816 25816 : nextp++; /* skip leading whitespace for next */
3817 : /* we expect another name, so done remains false */
3818 : }
3819 79968 : else if (*nextp == '\0')
3820 79966 : done = true;
3821 : else
3822 2 : return false; /* invalid syntax */
3823 :
3824 : /* Now safe to overwrite separator with a null */
3825 130936 : *endp = '\0';
3826 :
3827 : /* Truncate name if it's overlength */
3828 130936 : truncate_identifier(curname, strlen(curname), false);
3829 :
3830 : /*
3831 : * Finished isolating current name --- add it to list
3832 : */
3833 130936 : *namelist = lappend(*namelist, curname);
3834 :
3835 : /* Loop back if we didn't reach end of string */
3836 130936 : } while (!done);
3837 :
3838 79966 : return true;
3839 : }
3840 :
3841 :
3842 : /*
3843 : * SplitDirectoriesString --- parse a string containing file/directory names
3844 : *
3845 : * This works fine on file names too; the function name is historical.
3846 : *
3847 : * This is similar to SplitIdentifierString, except that the parsing
3848 : * rules are meant to handle pathnames instead of identifiers: there is
3849 : * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3850 : * and we apply canonicalize_path() to each extracted string. Because of the
3851 : * last, the returned strings are separately palloc'd rather than being
3852 : * pointers into rawstring --- but we still scribble on rawstring.
3853 : *
3854 : * Inputs:
3855 : * rawstring: the input string; must be modifiable!
3856 : * separator: the separator punctuation expected between directories
3857 : * (typically ',' or ';'). Whitespace may also appear around
3858 : * directories.
3859 : * Outputs:
3860 : * namelist: filled with a palloc'd list of directory names.
3861 : * Caller should list_free_deep() this even on error return.
3862 : *
3863 : * Returns true if okay, false if there is a syntax error in the string.
3864 : *
3865 : * Note that an empty string is considered okay here.
3866 : */
3867 : bool
3868 802 : SplitDirectoriesString(char *rawstring, char separator,
3869 : List **namelist)
3870 : {
3871 802 : char *nextp = rawstring;
3872 802 : bool done = false;
3873 :
3874 802 : *namelist = NIL;
3875 :
3876 802 : while (scanner_isspace(*nextp))
3877 0 : nextp++; /* skip leading whitespace */
3878 :
3879 802 : if (*nextp == '\0')
3880 2 : return true; /* allow empty string */
3881 :
3882 : /* At the top of the loop, we are at start of a new directory. */
3883 : do
3884 : {
3885 : char *curname;
3886 : char *endp;
3887 :
3888 800 : if (*nextp == '"')
3889 : {
3890 : /* Quoted name --- collapse quote-quote pairs */
3891 0 : curname = nextp + 1;
3892 : for (;;)
3893 : {
3894 0 : endp = strchr(nextp + 1, '"');
3895 0 : if (endp == NULL)
3896 0 : return false; /* mismatched quotes */
3897 0 : if (endp[1] != '"')
3898 0 : break; /* found end of quoted name */
3899 : /* Collapse adjacent quotes into one quote, and look again */
3900 0 : memmove(endp, endp + 1, strlen(endp));
3901 0 : nextp = endp;
3902 : }
3903 : /* endp now points at the terminating quote */
3904 0 : nextp = endp + 1;
3905 : }
3906 : else
3907 : {
3908 : /* Unquoted name --- extends to separator or end of string */
3909 800 : curname = endp = nextp;
3910 14070 : while (*nextp && *nextp != separator)
3911 : {
3912 : /* trailing whitespace should not be included in name */
3913 13270 : if (!scanner_isspace(*nextp))
3914 13270 : endp = nextp + 1;
3915 13270 : nextp++;
3916 : }
3917 800 : if (curname == endp)
3918 0 : return false; /* empty unquoted name not allowed */
3919 : }
3920 :
3921 800 : while (scanner_isspace(*nextp))
3922 0 : nextp++; /* skip trailing whitespace */
3923 :
3924 800 : if (*nextp == separator)
3925 : {
3926 0 : nextp++;
3927 0 : while (scanner_isspace(*nextp))
3928 0 : nextp++; /* skip leading whitespace for next */
3929 : /* we expect another name, so done remains false */
3930 : }
3931 800 : else if (*nextp == '\0')
3932 800 : done = true;
3933 : else
3934 0 : return false; /* invalid syntax */
3935 :
3936 : /* Now safe to overwrite separator with a null */
3937 800 : *endp = '\0';
3938 :
3939 : /* Truncate path if it's overlength */
3940 800 : if (strlen(curname) >= MAXPGPATH)
3941 0 : curname[MAXPGPATH - 1] = '\0';
3942 :
3943 : /*
3944 : * Finished isolating current name --- add it to list
3945 : */
3946 800 : curname = pstrdup(curname);
3947 800 : canonicalize_path(curname);
3948 800 : *namelist = lappend(*namelist, curname);
3949 :
3950 : /* Loop back if we didn't reach end of string */
3951 800 : } while (!done);
3952 :
3953 800 : return true;
3954 : }
3955 :
3956 :
3957 : /*
3958 : * SplitGUCList --- parse a string containing identifiers or file names
3959 : *
3960 : * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
3961 : * presuming whether the elements will be taken as identifiers or file names.
3962 : * We assume the input has already been through flatten_set_variable_args(),
3963 : * so that we need never downcase (if appropriate, that was done already).
3964 : * Nor do we ever truncate, since we don't know the correct max length.
3965 : * We disallow embedded whitespace for simplicity (it shouldn't matter,
3966 : * because any embedded whitespace should have led to double-quoting).
3967 : * Otherwise the API is identical to SplitIdentifierString.
3968 : *
3969 : * XXX it's annoying to have so many copies of this string-splitting logic.
3970 : * However, it's not clear that having one function with a bunch of option
3971 : * flags would be much better.
3972 : *
3973 : * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
3974 : * Be sure to update that if you have to change this.
3975 : *
3976 : * Inputs:
3977 : * rawstring: the input string; must be overwritable! On return, it's
3978 : * been modified to contain the separated identifiers.
3979 : * separator: the separator punctuation expected between identifiers
3980 : * (typically '.' or ','). Whitespace may also appear around
3981 : * identifiers.
3982 : * Outputs:
3983 : * namelist: filled with a palloc'd list of pointers to identifiers within
3984 : * rawstring. Caller should list_free() this even on error return.
3985 : *
3986 : * Returns true if okay, false if there is a syntax error in the string.
3987 : */
3988 : bool
3989 796 : SplitGUCList(char *rawstring, char separator,
3990 : List **namelist)
3991 : {
3992 796 : char *nextp = rawstring;
3993 796 : bool done = false;
3994 :
3995 796 : *namelist = NIL;
3996 :
3997 796 : while (scanner_isspace(*nextp))
3998 0 : nextp++; /* skip leading whitespace */
3999 :
4000 796 : if (*nextp == '\0')
4001 762 : return true; /* allow empty string */
4002 :
4003 : /* At the top of the loop, we are at start of a new identifier. */
4004 : do
4005 : {
4006 : char *curname;
4007 : char *endp;
4008 :
4009 46 : if (*nextp == '"')
4010 : {
4011 : /* Quoted name --- collapse quote-quote pairs */
4012 16 : curname = nextp + 1;
4013 : for (;;)
4014 : {
4015 24 : endp = strchr(nextp + 1, '"');
4016 20 : if (endp == NULL)
4017 0 : return false; /* mismatched quotes */
4018 20 : if (endp[1] != '"')
4019 16 : break; /* found end of quoted name */
4020 : /* Collapse adjacent quotes into one quote, and look again */
4021 4 : memmove(endp, endp + 1, strlen(endp));
4022 4 : nextp = endp;
4023 : }
4024 : /* endp now points at the terminating quote */
4025 16 : nextp = endp + 1;
4026 : }
4027 : else
4028 : {
4029 : /* Unquoted name --- extends to separator or whitespace */
4030 30 : curname = nextp;
4031 304 : while (*nextp && *nextp != separator &&
4032 274 : !scanner_isspace(*nextp))
4033 274 : nextp++;
4034 30 : endp = nextp;
4035 30 : if (curname == nextp)
4036 0 : return false; /* empty unquoted name not allowed */
4037 : }
4038 :
4039 46 : while (scanner_isspace(*nextp))
4040 0 : nextp++; /* skip trailing whitespace */
4041 :
4042 46 : if (*nextp == separator)
4043 : {
4044 12 : nextp++;
4045 24 : while (scanner_isspace(*nextp))
4046 12 : nextp++; /* skip leading whitespace for next */
4047 : /* we expect another name, so done remains false */
4048 : }
4049 34 : else if (*nextp == '\0')
4050 34 : done = true;
4051 : else
4052 0 : return false; /* invalid syntax */
4053 :
4054 : /* Now safe to overwrite separator with a null */
4055 46 : *endp = '\0';
4056 :
4057 : /*
4058 : * Finished isolating current name --- add it to list
4059 : */
4060 46 : *namelist = lappend(*namelist, curname);
4061 :
4062 : /* Loop back if we didn't reach end of string */
4063 46 : } while (!done);
4064 :
4065 34 : return true;
4066 : }
4067 :
4068 :
4069 : /*****************************************************************************
4070 : * Comparison Functions used for bytea
4071 : *
4072 : * Note: btree indexes need these routines not to leak memory; therefore,
4073 : * be careful to free working copies of toasted datums. Most places don't
4074 : * need to be so careful.
4075 : *****************************************************************************/
4076 :
4077 : Datum
4078 7960 : byteaeq(PG_FUNCTION_ARGS)
4079 : {
4080 7960 : Datum arg1 = PG_GETARG_DATUM(0);
4081 7960 : Datum arg2 = PG_GETARG_DATUM(1);
4082 : bool result;
4083 : Size len1,
4084 : len2;
4085 :
4086 : /*
4087 : * We can use a fast path for unequal lengths, which might save us from
4088 : * having to detoast one or both values.
4089 : */
4090 7960 : len1 = toast_raw_datum_size(arg1);
4091 7960 : len2 = toast_raw_datum_size(arg2);
4092 7960 : if (len1 != len2)
4093 4304 : result = false;
4094 : else
4095 : {
4096 3656 : bytea *barg1 = DatumGetByteaPP(arg1);
4097 3656 : bytea *barg2 = DatumGetByteaPP(arg2);
4098 :
4099 3656 : result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4100 : len1 - VARHDRSZ) == 0);
4101 :
4102 3656 : PG_FREE_IF_COPY(barg1, 0);
4103 3656 : PG_FREE_IF_COPY(barg2, 1);
4104 : }
4105 :
4106 7960 : PG_RETURN_BOOL(result);
4107 : }
4108 :
4109 : Datum
4110 512 : byteane(PG_FUNCTION_ARGS)
4111 : {
4112 512 : Datum arg1 = PG_GETARG_DATUM(0);
4113 512 : Datum arg2 = PG_GETARG_DATUM(1);
4114 : bool result;
4115 : Size len1,
4116 : len2;
4117 :
4118 : /*
4119 : * We can use a fast path for unequal lengths, which might save us from
4120 : * having to detoast one or both values.
4121 : */
4122 512 : len1 = toast_raw_datum_size(arg1);
4123 512 : len2 = toast_raw_datum_size(arg2);
4124 512 : if (len1 != len2)
4125 0 : result = true;
4126 : else
4127 : {
4128 512 : bytea *barg1 = DatumGetByteaPP(arg1);
4129 512 : bytea *barg2 = DatumGetByteaPP(arg2);
4130 :
4131 512 : result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4132 : len1 - VARHDRSZ) != 0);
4133 :
4134 512 : PG_FREE_IF_COPY(barg1, 0);
4135 512 : PG_FREE_IF_COPY(barg2, 1);
4136 : }
4137 :
4138 512 : PG_RETURN_BOOL(result);
4139 : }
4140 :
4141 : Datum
4142 7302 : bytealt(PG_FUNCTION_ARGS)
4143 : {
4144 7302 : bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4145 7302 : bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4146 : int len1,
4147 : len2;
4148 : int cmp;
4149 :
4150 7302 : len1 = VARSIZE_ANY_EXHDR(arg1);
4151 7302 : len2 = VARSIZE_ANY_EXHDR(arg2);
4152 :
4153 7302 : cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4154 :
4155 7302 : PG_FREE_IF_COPY(arg1, 0);
4156 7302 : PG_FREE_IF_COPY(arg2, 1);
4157 :
4158 7302 : PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
4159 : }
4160 :
4161 : Datum
4162 5556 : byteale(PG_FUNCTION_ARGS)
4163 : {
4164 5556 : bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4165 5556 : bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4166 : int len1,
4167 : len2;
4168 : int cmp;
4169 :
4170 5556 : len1 = VARSIZE_ANY_EXHDR(arg1);
4171 5556 : len2 = VARSIZE_ANY_EXHDR(arg2);
4172 :
4173 5556 : cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4174 :
4175 5556 : PG_FREE_IF_COPY(arg1, 0);
4176 5556 : PG_FREE_IF_COPY(arg2, 1);
4177 :
4178 5556 : PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
4179 : }
4180 :
4181 : Datum
4182 5214 : byteagt(PG_FUNCTION_ARGS)
4183 : {
4184 5214 : bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4185 5214 : bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4186 : int len1,
4187 : len2;
4188 : int cmp;
4189 :
4190 5214 : len1 = VARSIZE_ANY_EXHDR(arg1);
4191 5214 : len2 = VARSIZE_ANY_EXHDR(arg2);
4192 :
4193 5214 : cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4194 :
4195 5214 : PG_FREE_IF_COPY(arg1, 0);
4196 5214 : PG_FREE_IF_COPY(arg2, 1);
4197 :
4198 5214 : PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
4199 : }
4200 :
4201 : Datum
4202 4394 : byteage(PG_FUNCTION_ARGS)
4203 : {
4204 4394 : bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4205 4394 : bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4206 : int len1,
4207 : len2;
4208 : int cmp;
4209 :
4210 4394 : len1 = VARSIZE_ANY_EXHDR(arg1);
4211 4394 : len2 = VARSIZE_ANY_EXHDR(arg2);
4212 :
4213 4394 : cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4214 :
4215 4394 : PG_FREE_IF_COPY(arg1, 0);
4216 4394 : PG_FREE_IF_COPY(arg2, 1);
4217 :
4218 4394 : PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
4219 : }
4220 :
4221 : Datum
4222 87300 : byteacmp(PG_FUNCTION_ARGS)
4223 : {
4224 87300 : bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4225 87300 : bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4226 : int len1,
4227 : len2;
4228 : int cmp;
4229 :
4230 87300 : len1 = VARSIZE_ANY_EXHDR(arg1);
4231 87300 : len2 = VARSIZE_ANY_EXHDR(arg2);
4232 :
4233 87300 : cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4234 87300 : if ((cmp == 0) && (len1 != len2))
4235 14572 : cmp = (len1 < len2) ? -1 : 1;
4236 :
4237 87300 : PG_FREE_IF_COPY(arg1, 0);
4238 87300 : PG_FREE_IF_COPY(arg2, 1);
4239 :
4240 87300 : PG_RETURN_INT32(cmp);
4241 : }
4242 :
4243 : Datum
4244 20 : bytea_sortsupport(PG_FUNCTION_ARGS)
4245 : {
4246 20 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
4247 : MemoryContext oldcontext;
4248 :
4249 20 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
4250 :
4251 : /* Use generic string SortSupport, forcing "C" collation */
4252 20 : varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
4253 :
4254 20 : MemoryContextSwitchTo(oldcontext);
4255 :
4256 20 : PG_RETURN_VOID();
4257 : }
4258 :
4259 : /*
4260 : * appendStringInfoText
4261 : *
4262 : * Append a text to str.
4263 : * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
4264 : */
4265 : static void
4266 1236856 : appendStringInfoText(StringInfo str, const text *t)
4267 : {
4268 1236856 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
4269 1236856 : }
4270 :
4271 : /*
4272 : * replace_text
4273 : * replace all occurrences of 'old_sub_str' in 'orig_str'
4274 : * with 'new_sub_str' to form 'new_str'
4275 : *
4276 : * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
4277 : * otherwise returns 'new_str'
4278 : */
4279 : Datum
4280 1672 : replace_text(PG_FUNCTION_ARGS)
4281 : {
4282 1672 : text *src_text = PG_GETARG_TEXT_PP(0);
4283 1672 : text *from_sub_text = PG_GETARG_TEXT_PP(1);
4284 1672 : text *to_sub_text = PG_GETARG_TEXT_PP(2);
4285 : int src_text_len;
4286 : int from_sub_text_len;
4287 : TextPositionState state;
4288 : text *ret_text;
4289 : int chunk_len;
4290 : char *curr_ptr;
4291 : char *start_ptr;
4292 : StringInfoData str;
4293 : bool found;
4294 :
4295 1672 : src_text_len = VARSIZE_ANY_EXHDR(src_text);
4296 1672 : from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
4297 :
4298 : /* Return unmodified source string if empty source or pattern */
4299 1672 : if (src_text_len < 1 || from_sub_text_len < 1)
4300 : {
4301 0 : PG_RETURN_TEXT_P(src_text);
4302 : }
4303 :
4304 1672 : text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
4305 :
4306 1672 : found = text_position_next(&state);
4307 :
4308 : /* When the from_sub_text is not found, there is nothing to do. */
4309 1672 : if (!found)
4310 : {
4311 514 : text_position_cleanup(&state);
4312 514 : PG_RETURN_TEXT_P(src_text);
4313 : }
4314 1158 : curr_ptr = text_position_get_match_ptr(&state);
4315 1158 : start_ptr = VARDATA_ANY(src_text);
4316 :
4317 1158 : initStringInfo(&str);
4318 :
4319 : do
4320 : {
4321 4790 : CHECK_FOR_INTERRUPTS();
4322 :
4323 : /* copy the data skipped over by last text_position_next() */
4324 4790 : chunk_len = curr_ptr - start_ptr;
4325 4790 : appendBinaryStringInfo(&str, start_ptr, chunk_len);
4326 :
4327 4790 : appendStringInfoText(&str, to_sub_text);
4328 :
4329 4790 : start_ptr = curr_ptr + from_sub_text_len;
4330 :
4331 4790 : found = text_position_next(&state);
4332 4790 : if (found)
4333 3632 : curr_ptr = text_position_get_match_ptr(&state);
4334 : }
4335 4790 : while (found);
4336 :
4337 : /* copy trailing data */
4338 1158 : chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4339 1158 : appendBinaryStringInfo(&str, start_ptr, chunk_len);
4340 :
4341 1158 : text_position_cleanup(&state);
4342 :
4343 1158 : ret_text = cstring_to_text_with_len(str.data, str.len);
4344 1158 : pfree(str.data);
4345 :
4346 1158 : PG_RETURN_TEXT_P(ret_text);
4347 : }
4348 :
4349 : /*
4350 : * check_replace_text_has_escape_char
4351 : *
4352 : * check whether replace_text contains escape char.
4353 : */
4354 : static bool
4355 4160 : check_replace_text_has_escape_char(const text *replace_text)
4356 : {
4357 4160 : const char *p = VARDATA_ANY(replace_text);
4358 4160 : const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4359 :
4360 4160 : if (pg_database_encoding_max_length() == 1)
4361 : {
4362 12 : for (; p < p_end; p++)
4363 : {
4364 0 : if (*p == '\\')
4365 0 : return true;
4366 : }
4367 : }
4368 : else
4369 : {
4370 67986 : for (; p < p_end; p += pg_mblen(p))
4371 : {
4372 63946 : if (*p == '\\')
4373 108 : return true;
4374 : }
4375 : }
4376 :
4377 4052 : return false;
4378 : }
4379 :
4380 : /*
4381 : * appendStringInfoRegexpSubstr
4382 : *
4383 : * Append replace_text to str, substituting regexp back references for
4384 : * \n escapes. start_ptr is the start of the match in the source string,
4385 : * at logical character position data_pos.
4386 : */
4387 : static void
4388 60 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
4389 : regmatch_t *pmatch,
4390 : char *start_ptr, int data_pos)
4391 : {
4392 60 : const char *p = VARDATA_ANY(replace_text);
4393 60 : const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4394 60 : int eml = pg_database_encoding_max_length();
4395 :
4396 : for (;;)
4397 122 : {
4398 182 : const char *chunk_start = p;
4399 : int so;
4400 : int eo;
4401 :
4402 : /* Find next escape char. */
4403 182 : if (eml == 1)
4404 : {
4405 0 : for (; p < p_end && *p != '\\'; p++)
4406 : /* nothing */ ;
4407 : }
4408 : else
4409 : {
4410 990 : for (; p < p_end && *p != '\\'; p += pg_mblen(p))
4411 : /* nothing */ ;
4412 : }
4413 :
4414 : /* Copy the text we just scanned over, if any. */
4415 182 : if (p > chunk_start)
4416 72 : appendBinaryStringInfo(str, chunk_start, p - chunk_start);
4417 :
4418 : /* Done if at end of string, else advance over escape char. */
4419 182 : if (p >= p_end)
4420 60 : break;
4421 122 : p++;
4422 :
4423 122 : if (p >= p_end)
4424 : {
4425 : /* Escape at very end of input. Treat same as unexpected char */
4426 0 : appendStringInfoChar(str, '\\');
4427 0 : break;
4428 : }
4429 :
4430 122 : if (*p >= '1' && *p <= '9')
4431 104 : {
4432 : /* Use the back reference of regexp. */
4433 104 : int idx = *p - '0';
4434 :
4435 104 : so = pmatch[idx].rm_so;
4436 104 : eo = pmatch[idx].rm_eo;
4437 104 : p++;
4438 : }
4439 18 : else if (*p == '&')
4440 : {
4441 : /* Use the entire matched string. */
4442 0 : so = pmatch[0].rm_so;
4443 0 : eo = pmatch[0].rm_eo;
4444 0 : p++;
4445 : }
4446 18 : else if (*p == '\\')
4447 : {
4448 : /* \\ means transfer one \ to output. */
4449 18 : appendStringInfoChar(str, '\\');
4450 18 : p++;
4451 18 : continue;
4452 : }
4453 : else
4454 : {
4455 : /*
4456 : * If escape char is not followed by any expected char, just treat
4457 : * it as ordinary data to copy. (XXX would it be better to throw
4458 : * an error?)
4459 : */
4460 0 : appendStringInfoChar(str, '\\');
4461 0 : continue;
4462 : }
4463 :
4464 104 : if (so != -1 && eo != -1)
4465 : {
4466 : /*
4467 : * Copy the text that is back reference of regexp. Note so and eo
4468 : * are counted in characters not bytes.
4469 : */
4470 : char *chunk_start;
4471 : int chunk_len;
4472 :
4473 : Assert(so >= data_pos);
4474 104 : chunk_start = start_ptr;
4475 104 : chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
4476 104 : chunk_len = charlen_to_bytelen(chunk_start, eo - so);
4477 104 : appendBinaryStringInfo(str, chunk_start, chunk_len);
4478 : }
4479 : }
4480 60 : }
4481 :
4482 : #define REGEXP_REPLACE_BACKREF_CNT 10
4483 :
4484 : /*
4485 : * replace_text_regexp
4486 : *
4487 : * replace text that matches to regexp in src_text to replace_text.
4488 : *
4489 : * Note: to avoid having to include regex.h in builtins.h, we declare
4490 : * the regexp argument as void *, but really it's regex_t *.
4491 : */
4492 : text *
4493 4160 : replace_text_regexp(text *src_text, void *regexp,
4494 : text *replace_text, bool glob)
4495 : {
4496 : text *ret_text;
4497 4160 : regex_t *re = (regex_t *) regexp;
4498 4160 : int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4499 : StringInfoData buf;
4500 : regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
4501 : pg_wchar *data;
4502 : size_t data_len;
4503 : int search_start;
4504 : int data_pos;
4505 : char *start_ptr;
4506 : bool have_escape;
4507 :
4508 4160 : initStringInfo(&buf);
4509 :
4510 : /* Convert data string to wide characters. */
4511 4160 : data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
4512 4160 : data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
4513 :
4514 : /* Check whether replace_text has escape char. */
4515 4160 : have_escape = check_replace_text_has_escape_char(replace_text);
4516 :
4517 : /* start_ptr points to the data_pos'th character of src_text */
4518 4160 : start_ptr = (char *) VARDATA_ANY(src_text);
4519 4160 : data_pos = 0;
4520 :
4521 4160 : search_start = 0;
4522 6358 : while (search_start <= data_len)
4523 : {
4524 : int regexec_result;
4525 :
4526 6354 : CHECK_FOR_INTERRUPTS();
4527 :
4528 6354 : regexec_result = pg_regexec(re,
4529 : data,
4530 : data_len,
4531 : search_start,
4532 : NULL, /* no details */
4533 : REGEXP_REPLACE_BACKREF_CNT,
4534 : pmatch,
4535 : 0);
4536 :
4537 6354 : if (regexec_result == REG_NOMATCH)
4538 3650 : break;
4539 :
4540 2704 : if (regexec_result != REG_OKAY)
4541 : {
4542 : char errMsg[100];
4543 :
4544 0 : CHECK_FOR_INTERRUPTS();
4545 0 : pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
4546 0 : ereport(ERROR,
4547 : (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
4548 : errmsg("regular expression failed: %s", errMsg)));
4549 : }
4550 :
4551 : /*
4552 : * Copy the text to the left of the match position. Note we are given
4553 : * character not byte indexes.
4554 : */
4555 2704 : if (pmatch[0].rm_so - data_pos > 0)
4556 : {
4557 : int chunk_len;
4558 :
4559 2642 : chunk_len = charlen_to_bytelen(start_ptr,
4560 2642 : pmatch[0].rm_so - data_pos);
4561 2642 : appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4562 :
4563 : /*
4564 : * Advance start_ptr over that text, to avoid multiple rescans of
4565 : * it if the replace_text contains multiple back-references.
4566 : */
4567 2642 : start_ptr += chunk_len;
4568 2642 : data_pos = pmatch[0].rm_so;
4569 : }
4570 :
4571 : /*
4572 : * Copy the replace_text. Process back references when the
4573 : * replace_text has escape characters.
4574 : */
4575 2704 : if (have_escape)
4576 60 : appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
4577 : start_ptr, data_pos);
4578 : else
4579 2644 : appendStringInfoText(&buf, replace_text);
4580 :
4581 : /* Advance start_ptr and data_pos over the matched text. */
4582 5408 : start_ptr += charlen_to_bytelen(start_ptr,
4583 2704 : pmatch[0].rm_eo - data_pos);
4584 2704 : data_pos = pmatch[0].rm_eo;
4585 :
4586 : /*
4587 : * When global option is off, replace the first instance only.
4588 : */
4589 2704 : if (!glob)
4590 506 : break;
4591 :
4592 : /*
4593 : * Advance search position. Normally we start the next search at the
4594 : * end of the previous match; but if the match was of zero length, we
4595 : * have to advance by one character, or we'd just find the same match
4596 : * again.
4597 : */
4598 2198 : search_start = data_pos;
4599 2198 : if (pmatch[0].rm_so == pmatch[0].rm_eo)
4600 8 : search_start++;
4601 : }
4602 :
4603 : /*
4604 : * Copy the text to the right of the last match.
4605 : */
4606 4160 : if (data_pos < data_len)
4607 : {
4608 : int chunk_len;
4609 :
4610 3978 : chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4611 3978 : appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4612 : }
4613 :
4614 4160 : ret_text = cstring_to_text_with_len(buf.data, buf.len);
4615 4160 : pfree(buf.data);
4616 4160 : pfree(data);
4617 :
4618 4160 : return ret_text;
4619 : }
4620 :
4621 : /*
4622 : * split_part
4623 : * parse input string based on provided field separator
4624 : * return N'th item (1 based, negative counts from end)
4625 : */
4626 : Datum
4627 68 : split_part(PG_FUNCTION_ARGS)
4628 : {
4629 68 : text *inputstring = PG_GETARG_TEXT_PP(0);
4630 68 : text *fldsep = PG_GETARG_TEXT_PP(1);
4631 68 : int fldnum = PG_GETARG_INT32(2);
4632 : int inputstring_len;
4633 : int fldsep_len;
4634 : TextPositionState state;
4635 : char *start_ptr;
4636 : char *end_ptr;
4637 : text *result_text;
4638 : bool found;
4639 :
4640 : /* field number is 1 based */
4641 68 : if (fldnum == 0)
4642 4 : ereport(ERROR,
4643 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4644 : errmsg("field position must not be zero")));
4645 :
4646 64 : inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4647 64 : fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4648 :
4649 : /* return empty string for empty input string */
4650 64 : if (inputstring_len < 1)
4651 8 : PG_RETURN_TEXT_P(cstring_to_text(""));
4652 :
4653 : /* handle empty field separator */
4654 56 : if (fldsep_len < 1)
4655 : {
4656 : /* if first or last field, return input string, else empty string */
4657 16 : if (fldnum == 1 || fldnum == -1)
4658 8 : PG_RETURN_TEXT_P(inputstring);
4659 : else
4660 8 : PG_RETURN_TEXT_P(cstring_to_text(""));
4661 : }
4662 :
4663 : /* find the first field separator */
4664 40 : text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4665 :
4666 40 : found = text_position_next(&state);
4667 :
4668 : /* special case if fldsep not found at all */
4669 40 : if (!found)
4670 : {
4671 8 : text_position_cleanup(&state);
4672 : /* if first or last field, return input string, else empty string */
4673 8 : if (fldnum == 1 || fldnum == -1)
4674 4 : PG_RETURN_TEXT_P(inputstring);
4675 : else
4676 4 : PG_RETURN_TEXT_P(cstring_to_text(""));
4677 : }
4678 :
4679 : /*
4680 : * take care of a negative field number (i.e. count from the right) by
4681 : * converting to a positive field number; we need total number of fields
4682 : */
4683 32 : if (fldnum < 0)
4684 : {
4685 : /* we found a fldsep, so there are at least two fields */
4686 16 : int numfields = 2;
4687 :
4688 24 : while (text_position_next(&state))
4689 8 : numfields++;
4690 :
4691 : /* special case of last field does not require an extra pass */
4692 16 : if (fldnum == -1)
4693 : {
4694 4 : start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
4695 4 : end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
4696 4 : text_position_cleanup(&state);
4697 4 : PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
4698 : end_ptr - start_ptr));
4699 : }
4700 :
4701 : /* else, convert fldnum to positive notation */
4702 12 : fldnum += numfields + 1;
4703 :
4704 : /* if nonexistent field, return empty string */
4705 12 : if (fldnum <= 0)
4706 : {
4707 4 : text_position_cleanup(&state);
4708 4 : PG_RETURN_TEXT_P(cstring_to_text(""));
4709 : }
4710 :
4711 : /* reset to pointing at first match, but now with positive fldnum */
4712 8 : text_position_reset(&state);
4713 8 : found = text_position_next(&state);
4714 : Assert(found);
4715 : }
4716 :
4717 : /* identify bounds of first field */
4718 24 : start_ptr = VARDATA_ANY(inputstring);
4719 24 : end_ptr = text_position_get_match_ptr(&state);
4720 :
4721 44 : while (found && --fldnum > 0)
4722 : {
4723 : /* identify bounds of next field */
4724 20 : start_ptr = end_ptr + fldsep_len;
4725 20 : found = text_position_next(&state);
4726 20 : if (found)
4727 12 : end_ptr = text_position_get_match_ptr(&state);
4728 : }
4729 :
4730 24 : text_position_cleanup(&state);
4731 :
4732 24 : if (fldnum > 0)
4733 : {
4734 : /* N'th field separator not found */
4735 : /* if last field requested, return it, else empty string */
4736 8 : if (fldnum == 1)
4737 : {
4738 4 : int last_len = start_ptr - VARDATA_ANY(inputstring);
4739 :
4740 4 : result_text = cstring_to_text_with_len(start_ptr,
4741 : inputstring_len - last_len);
4742 : }
4743 : else
4744 4 : result_text = cstring_to_text("");
4745 : }
4746 : else
4747 : {
4748 : /* non-last field requested */
4749 16 : result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
4750 : }
4751 :
4752 24 : PG_RETURN_TEXT_P(result_text);
4753 : }
4754 :
4755 : /*
4756 : * Convenience function to return true when two text params are equal.
4757 : */
4758 : static bool
4759 112 : text_isequal(text *txt1, text *txt2, Oid collid)
4760 : {
4761 112 : return DatumGetBool(DirectFunctionCall2Coll(texteq,
4762 : collid,
4763 : PointerGetDatum(txt1),
4764 : PointerGetDatum(txt2)));
4765 : }
4766 :
4767 : /*
4768 : * text_to_array
4769 : * parse input string and return text array of elements,
4770 : * based on provided field separator
4771 : */
4772 : Datum
4773 72 : text_to_array(PG_FUNCTION_ARGS)
4774 : {
4775 : SplitTextOutputData tstate;
4776 :
4777 : /* For array output, tstate should start as all zeroes */
4778 72 : memset(&tstate, 0, sizeof(tstate));
4779 :
4780 72 : if (!split_text(fcinfo, &tstate))
4781 4 : PG_RETURN_NULL();
4782 :
4783 68 : if (tstate.astate == NULL)
4784 4 : PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
4785 :
4786 64 : PG_RETURN_ARRAYTYPE_P(makeArrayResult(tstate.astate,
4787 : CurrentMemoryContext));
4788 : }
4789 :
4790 : /*
4791 : * text_to_array_null
4792 : * parse input string and return text array of elements,
4793 : * based on provided field separator and null string
4794 : *
4795 : * This is a separate entry point only to prevent the regression tests from
4796 : * complaining about different argument sets for the same internal function.
4797 : */
4798 : Datum
4799 16 : text_to_array_null(PG_FUNCTION_ARGS)
4800 : {
4801 16 : return text_to_array(fcinfo);
4802 : }
4803 :
4804 : /*
4805 : * text_to_table
4806 : * parse input string and return table of elements,
4807 : * based on provided field separator
4808 : */
4809 : Datum
4810 56 : text_to_table(PG_FUNCTION_ARGS)
4811 : {
4812 56 : ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
4813 : SplitTextOutputData tstate;
4814 : MemoryContext old_cxt;
4815 :
4816 : /* check to see if caller supports us returning a tuplestore */
4817 56 : if (rsi == NULL || !IsA(rsi, ReturnSetInfo))
4818 0 : ereport(ERROR,
4819 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4820 : errmsg("set-valued function called in context that cannot accept a set")));
4821 56 : if (!(rsi->allowedModes & SFRM_Materialize))
4822 0 : ereport(ERROR,
4823 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4824 : errmsg("materialize mode required, but it is not allowed in this context")));
4825 :
4826 : /* OK, prepare tuplestore in per-query memory */
4827 56 : old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
4828 :
4829 56 : tstate.astate = NULL;
4830 56 : tstate.tupdesc = CreateTupleDescCopy(rsi->expectedDesc);
4831 56 : tstate.tupstore = tuplestore_begin_heap(true, false, work_mem);
4832 :
4833 56 : MemoryContextSwitchTo(old_cxt);
4834 :
4835 56 : (void) split_text(fcinfo, &tstate);
4836 :
4837 : tuplestore_donestoring(tstate.tupstore);
4838 :
4839 56 : rsi->returnMode = SFRM_Materialize;
4840 56 : rsi->setResult = tstate.tupstore;
4841 56 : rsi->setDesc = tstate.tupdesc;
4842 :
4843 56 : return (Datum) 0;
4844 : }
4845 :
4846 : /*
4847 : * text_to_table_null
4848 : * parse input string and return table of elements,
4849 : * based on provided field separator and null string
4850 : *
4851 : * This is a separate entry point only to prevent the regression tests from
4852 : * complaining about different argument sets for the same internal function.
4853 : */
4854 : Datum
4855 16 : text_to_table_null(PG_FUNCTION_ARGS)
4856 : {
4857 16 : return text_to_table(fcinfo);
4858 : }
4859 :
4860 : /*
4861 : * Common code for text_to_array, text_to_array_null, text_to_table
4862 : * and text_to_table_null functions.
4863 : *
4864 : * These are not strict so we have to test for null inputs explicitly.
4865 : * Returns false if result is to be null, else returns true.
4866 : *
4867 : * Note that if the result is valid but empty (zero elements), we return
4868 : * without changing *tstate --- caller must handle that case, too.
4869 : */
4870 : static bool
4871 128 : split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
4872 : {
4873 : text *inputstring;
4874 : text *fldsep;
4875 : text *null_string;
4876 128 : Oid collation = PG_GET_COLLATION();
4877 : int inputstring_len;
4878 : int fldsep_len;
4879 : char *start_ptr;
4880 : text *result_text;
4881 :
4882 : /* when input string is NULL, then result is NULL too */
4883 128 : if (PG_ARGISNULL(0))
4884 8 : return false;
4885 :
4886 120 : inputstring = PG_GETARG_TEXT_PP(0);
4887 :
4888 : /* fldsep can be NULL */
4889 120 : if (!PG_ARGISNULL(1))
4890 112 : fldsep = PG_GETARG_TEXT_PP(1);
4891 : else
4892 8 : fldsep = NULL;
4893 :
4894 : /* null_string can be NULL or omitted */
4895 120 : if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4896 32 : null_string = PG_GETARG_TEXT_PP(2);
4897 : else
4898 88 : null_string = NULL;
4899 :
4900 120 : if (fldsep != NULL)
4901 : {
4902 : /*
4903 : * Normal case with non-null fldsep. Use the text_position machinery
4904 : * to search for occurrences of fldsep.
4905 : */
4906 : TextPositionState state;
4907 :
4908 112 : inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4909 112 : fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4910 :
4911 : /* return empty set for empty input string */
4912 112 : if (inputstring_len < 1)
4913 40 : return true;
4914 :
4915 : /* empty field separator: return input string as a one-element set */
4916 104 : if (fldsep_len < 1)
4917 : {
4918 32 : split_text_accum_result(tstate, inputstring,
4919 : null_string, collation);
4920 32 : return true;
4921 : }
4922 :
4923 72 : text_position_setup(inputstring, fldsep, collation, &state);
4924 :
4925 72 : start_ptr = VARDATA_ANY(inputstring);
4926 :
4927 : for (;;)
4928 296 : {
4929 : bool found;
4930 : char *end_ptr;
4931 : int chunk_len;
4932 :
4933 368 : CHECK_FOR_INTERRUPTS();
4934 :
4935 368 : found = text_position_next(&state);
4936 368 : if (!found)
4937 : {
4938 : /* fetch last field */
4939 72 : chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4940 72 : end_ptr = NULL; /* not used, but some compilers complain */
4941 : }
4942 : else
4943 : {
4944 : /* fetch non-last field */
4945 296 : end_ptr = text_position_get_match_ptr(&state);
4946 296 : chunk_len = end_ptr - start_ptr;
4947 : }
4948 :
4949 : /* build a temp text datum to pass to split_text_accum_result */
4950 368 : result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4951 :
4952 : /* stash away this field */
4953 368 : split_text_accum_result(tstate, result_text,
4954 : null_string, collation);
4955 :
4956 368 : pfree(result_text);
4957 :
4958 368 : if (!found)
4959 72 : break;
4960 :
4961 296 : start_ptr = end_ptr + fldsep_len;
4962 : }
4963 :
4964 72 : text_position_cleanup(&state);
4965 : }
4966 : else
4967 : {
4968 : /*
4969 : * When fldsep is NULL, each character in the input string becomes a
4970 : * separate element in the result set. The separator is effectively
4971 : * the space between characters.
4972 : */
4973 8 : inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4974 :
4975 8 : start_ptr = VARDATA_ANY(inputstring);
4976 :
4977 48 : while (inputstring_len > 0)
4978 : {
4979 40 : int chunk_len = pg_mblen(start_ptr);
4980 :
4981 40 : CHECK_FOR_INTERRUPTS();
4982 :
4983 : /* build a temp text datum to pass to split_text_accum_result */
4984 40 : result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4985 :
4986 : /* stash away this field */
4987 40 : split_text_accum_result(tstate, result_text,
4988 : null_string, collation);
4989 :
4990 40 : pfree(result_text);
4991 :
4992 40 : start_ptr += chunk_len;
4993 40 : inputstring_len -= chunk_len;
4994 : }
4995 : }
4996 :
4997 80 : return true;
4998 : }
4999 :
5000 : /*
5001 : * Add text item to result set (table or array).
5002 : *
5003 : * This is also responsible for checking to see if the item matches
5004 : * the null_string, in which case we should emit NULL instead.
5005 : */
5006 : static void
5007 440 : split_text_accum_result(SplitTextOutputData *tstate,
5008 : text *field_value,
5009 : text *null_string,
5010 : Oid collation)
5011 : {
5012 440 : bool is_null = false;
5013 :
5014 440 : if (null_string && text_isequal(field_value, null_string, collation))
5015 32 : is_null = true;
5016 :
5017 440 : if (tstate->tupstore)
5018 : {
5019 : Datum values[1];
5020 : bool nulls[1];
5021 :
5022 152 : values[0] = PointerGetDatum(field_value);
5023 152 : nulls[0] = is_null;
5024 :
5025 152 : tuplestore_putvalues(tstate->tupstore,
5026 : tstate->tupdesc,
5027 : values,
5028 : nulls);
5029 : }
5030 : else
5031 : {
5032 288 : tstate->astate = accumArrayResult(tstate->astate,
5033 : PointerGetDatum(field_value),
5034 : is_null,
5035 : TEXTOID,
5036 : CurrentMemoryContext);
5037 : }
5038 440 : }
5039 :
5040 : /*
5041 : * array_to_text
5042 : * concatenate Cstring representation of input array elements
5043 : * using provided field separator
5044 : */
5045 : Datum
5046 36410 : array_to_text(PG_FUNCTION_ARGS)
5047 : {
5048 36410 : ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
5049 36410 : char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
5050 :
5051 36410 : PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
5052 : }
5053 :
5054 : /*
5055 : * array_to_text_null
5056 : * concatenate Cstring representation of input array elements
5057 : * using provided field separator and null string
5058 : *
5059 : * This version is not strict so we have to test for null inputs explicitly.
5060 : */
5061 : Datum
5062 8 : array_to_text_null(PG_FUNCTION_ARGS)
5063 : {
5064 : ArrayType *v;
5065 : char *fldsep;
5066 : char *null_string;
5067 :
5068 : /* returns NULL when first or second parameter is NULL */
5069 8 : if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
5070 0 : PG_RETURN_NULL();
5071 :
5072 8 : v = PG_GETARG_ARRAYTYPE_P(0);
5073 8 : fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
5074 :
5075 : /* NULL null string is passed through as a null pointer */
5076 8 : if (!PG_ARGISNULL(2))
5077 4 : null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
5078 : else
5079 4 : null_string = NULL;
5080 :
5081 8 : PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
5082 : }
5083 :
5084 : /*
5085 : * common code for array_to_text and array_to_text_null functions
5086 : */
5087 : static text *
5088 36430 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
5089 : const char *fldsep, const char *null_string)
5090 : {
5091 : text *result;
5092 : int nitems,
5093 : *dims,
5094 : ndims;
5095 : Oid element_type;
5096 : int typlen;
5097 : bool typbyval;
5098 : char typalign;
5099 : StringInfoData buf;
5100 36430 : bool printed = false;
5101 : char *p;
5102 : bits8 *bitmap;
5103 : int bitmask;
5104 : int i;
5105 : ArrayMetaState *my_extra;
5106 :
5107 36430 : ndims = ARR_NDIM(v);
5108 36430 : dims = ARR_DIMS(v);
5109 36430 : nitems = ArrayGetNItems(ndims, dims);
5110 :
5111 : /* if there are no elements, return an empty string */
5112 36430 : if (nitems == 0)
5113 21138 : return cstring_to_text_with_len("", 0);
5114 :
5115 15292 : element_type = ARR_ELEMTYPE(v);
5116 15292 : initStringInfo(&buf);
5117 :
5118 : /*
5119 : * We arrange to look up info about element type, including its output
5120 : * conversion proc, only once per series of calls, assuming the element
5121 : * type doesn't change underneath us.
5122 : */
5123 15292 : my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
5124 15292 : if (my_extra == NULL)
5125 : {
5126 936 : fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5127 : sizeof(ArrayMetaState));
5128 936 : my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
5129 936 : my_extra->element_type = ~element_type;
5130 : }
5131 :
5132 15292 : if (my_extra->element_type != element_type)
5133 : {
5134 : /*
5135 : * Get info about element type, including its output conversion proc
5136 : */
5137 936 : get_type_io_data(element_type, IOFunc_output,
5138 : &my_extra->typlen, &my_extra->typbyval,
5139 : &my_extra->typalign, &my_extra->typdelim,
5140 : &my_extra->typioparam, &my_extra->typiofunc);
5141 936 : fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
5142 936 : fcinfo->flinfo->fn_mcxt);
5143 936 : my_extra->element_type = element_type;
5144 : }
5145 15292 : typlen = my_extra->typlen;
5146 15292 : typbyval = my_extra->typbyval;
5147 15292 : typalign = my_extra->typalign;
5148 :
5149 15292 : p = ARR_DATA_PTR(v);
5150 15292 : bitmap = ARR_NULLBITMAP(v);
5151 15292 : bitmask = 1;
5152 :
5153 51700 : for (i = 0; i < nitems; i++)
5154 : {
5155 : Datum itemvalue;
5156 : char *value;
5157 :
5158 : /* Get source element, checking for NULL */
5159 36408 : if (bitmap && (*bitmap & bitmask) == 0)
5160 : {
5161 : /* if null_string is NULL, we just ignore null elements */
5162 16 : if (null_string != NULL)
5163 : {
5164 4 : if (printed)
5165 4 : appendStringInfo(&buf, "%s%s", fldsep, null_string);
5166 : else
5167 0 : appendStringInfoString(&buf, null_string);
5168 4 : printed = true;
5169 : }
5170 : }
5171 : else
5172 : {
5173 36396 : itemvalue = fetch_att(p, typbyval, typlen);
5174 :
5175 36396 : value = OutputFunctionCall(&my_extra->proc, itemvalue);
5176 :
5177 36396 : if (printed)
5178 21104 : appendStringInfo(&buf, "%s%s", fldsep, value);
5179 : else
5180 15292 : appendStringInfoString(&buf, value);
5181 36396 : printed = true;
5182 :
5183 36396 : p = att_addlength_pointer(p, typlen, p);
5184 36396 : p = (char *) att_align_nominal(p, typalign);
5185 : }
5186 :
5187 : /* advance bitmap pointer if any */
5188 36408 : if (bitmap)
5189 : {
5190 72 : bitmask <<= 1;
5191 72 : if (bitmask == 0x100)
5192 : {
5193 0 : bitmap++;
5194 0 : bitmask = 1;
5195 : }
5196 : }
5197 : }
5198 :
5199 15292 : result = cstring_to_text_with_len(buf.data, buf.len);
5200 15292 : pfree(buf.data);
5201 :
5202 15292 : return result;
5203 : }
5204 :
5205 : #define HEXBASE 16
5206 : /*
5207 : * Convert an int32 to a string containing a base 16 (hex) representation of
5208 : * the number.
5209 : */
5210 : Datum
5211 34056 : to_hex32(PG_FUNCTION_ARGS)
5212 : {
5213 34056 : uint32 value = (uint32) PG_GETARG_INT32(0);
5214 : char *ptr;
5215 34056 : const char *digits = "0123456789abcdef";
5216 : char buf[32]; /* bigger than needed, but reasonable */
5217 :
5218 34056 : ptr = buf + sizeof(buf) - 1;
5219 34056 : *ptr = '\0';
5220 :
5221 : do
5222 : {
5223 65896 : *--ptr = digits[value % HEXBASE];
5224 65896 : value /= HEXBASE;
5225 65896 : } while (ptr > buf && value);
5226 :
5227 34056 : PG_RETURN_TEXT_P(cstring_to_text(ptr));
5228 : }
5229 :
5230 : /*
5231 : * Convert an int64 to a string containing a base 16 (hex) representation of
5232 : * the number.
5233 : */
5234 : Datum
5235 4 : to_hex64(PG_FUNCTION_ARGS)
5236 : {
5237 4 : uint64 value = (uint64) PG_GETARG_INT64(0);
5238 : char *ptr;
5239 4 : const char *digits = "0123456789abcdef";
5240 : char buf[32]; /* bigger than needed, but reasonable */
5241 :
5242 4 : ptr = buf + sizeof(buf) - 1;
5243 4 : *ptr = '\0';
5244 :
5245 : do
5246 : {
5247 32 : *--ptr = digits[value % HEXBASE];
5248 32 : value /= HEXBASE;
5249 32 : } while (ptr > buf && value);
5250 :
5251 4 : PG_RETURN_TEXT_P(cstring_to_text(ptr));
5252 : }
5253 :
5254 : /*
5255 : * Return the size of a datum, possibly compressed
5256 : *
5257 : * Works on any data type
5258 : */
5259 : Datum
5260 102 : pg_column_size(PG_FUNCTION_ARGS)
5261 : {
5262 102 : Datum value = PG_GETARG_DATUM(0);
5263 : int32 result;
5264 : int typlen;
5265 :
5266 : /* On first call, get the input type's typlen, and save at *fn_extra */
5267 102 : if (fcinfo->flinfo->fn_extra == NULL)
5268 : {
5269 : /* Lookup the datatype of the supplied argument */
5270 102 : Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5271 :
5272 102 : typlen = get_typlen(argtypeid);
5273 102 : if (typlen == 0) /* should not happen */
5274 0 : elog(ERROR, "cache lookup failed for type %u", argtypeid);
5275 :
5276 102 : fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5277 : sizeof(int));
5278 102 : *((int *) fcinfo->flinfo->fn_extra) = typlen;
5279 : }
5280 : else
5281 0 : typlen = *((int *) fcinfo->flinfo->fn_extra);
5282 :
5283 102 : if (typlen == -1)
5284 : {
5285 : /* varlena type, possibly toasted */
5286 102 : result = toast_datum_size(value);
5287 : }
5288 0 : else if (typlen == -2)
5289 : {
5290 : /* cstring */
5291 0 : result = strlen(DatumGetCString(value)) + 1;
5292 : }
5293 : else
5294 : {
5295 : /* ordinary fixed-width type */
5296 0 : result = typlen;
5297 : }
5298 :
5299 102 : PG_RETURN_INT32(result);
5300 : }
5301 :
5302 : /*
5303 : * string_agg - Concatenates values and returns string.
5304 : *
5305 : * Syntax: string_agg(value text, delimiter text) RETURNS text
5306 : *
5307 : * Note: Any NULL values are ignored. The first-call delimiter isn't
5308 : * actually used at all, and on subsequent calls the delimiter precedes
5309 : * the associated value.
5310 : */
5311 :
5312 : /* subroutine to initialize state */
5313 : static StringInfo
5314 904 : makeStringAggState(FunctionCallInfo fcinfo)
5315 : {
5316 : StringInfo state;
5317 : MemoryContext aggcontext;
5318 : MemoryContext oldcontext;
5319 :
5320 904 : if (!AggCheckCallContext(fcinfo, &aggcontext))
5321 : {
5322 : /* cannot be called directly because of internal-type argument */
5323 0 : elog(ERROR, "string_agg_transfn called in non-aggregate context");
5324 : }
5325 :
5326 : /*
5327 : * Create state in aggregate context. It'll stay there across subsequent
5328 : * calls.
5329 : */
5330 904 : oldcontext = MemoryContextSwitchTo(aggcontext);
5331 904 : state = makeStringInfo();
5332 904 : MemoryContextSwitchTo(oldcontext);
5333 :
5334 904 : return state;
5335 : }
5336 :
5337 : Datum
5338 615186 : string_agg_transfn(PG_FUNCTION_ARGS)
5339 : {
5340 : StringInfo state;
5341 :
5342 615186 : state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5343 :
5344 : /* Append the value unless null. */
5345 615186 : if (!PG_ARGISNULL(1))
5346 : {
5347 : /* On the first time through, we ignore the delimiter. */
5348 615154 : if (state == NULL)
5349 886 : state = makeStringAggState(fcinfo);
5350 614268 : else if (!PG_ARGISNULL(2))
5351 614268 : appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
5352 :
5353 615154 : appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
5354 : }
5355 :
5356 : /*
5357 : * The transition type for string_agg() is declared to be "internal",
5358 : * which is a pass-by-value type the same size as a pointer.
5359 : */
5360 615186 : PG_RETURN_POINTER(state);
5361 : }
5362 :
5363 : Datum
5364 926 : string_agg_finalfn(PG_FUNCTION_ARGS)
5365 : {
5366 : StringInfo state;
5367 :
5368 : /* cannot be called directly because of internal-type argument */
5369 : Assert(AggCheckCallContext(fcinfo, NULL));
5370 :
5371 926 : state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5372 :
5373 926 : if (state != NULL)
5374 886 : PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
5375 : else
5376 40 : PG_RETURN_NULL();
5377 : }
5378 :
5379 : /*
5380 : * Prepare cache with fmgr info for the output functions of the datatypes of
5381 : * the arguments of a concat-like function, beginning with argument "argidx".
5382 : * (Arguments before that will have corresponding slots in the resulting
5383 : * FmgrInfo array, but we don't fill those slots.)
5384 : */
5385 : static FmgrInfo *
5386 24 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
5387 : {
5388 : FmgrInfo *foutcache;
5389 : int i;
5390 :
5391 : /* We keep the info in fn_mcxt so it survives across calls */
5392 24 : foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5393 24 : PG_NARGS() * sizeof(FmgrInfo));
5394 :
5395 120 : for (i = argidx; i < PG_NARGS(); i++)
5396 : {
5397 : Oid valtype;
5398 : Oid typOutput;
5399 : bool typIsVarlena;
5400 :
5401 96 : valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
5402 96 : if (!OidIsValid(valtype))
5403 0 : elog(ERROR, "could not determine data type of concat() input");
5404 :
5405 96 : getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
5406 96 : fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
5407 : }
5408 :
5409 24 : fcinfo->flinfo->fn_extra = foutcache;
5410 :
5411 24 : return foutcache;
5412 : }
5413 :
5414 : /*
5415 : * Implementation of both concat() and concat_ws().
5416 : *
5417 : * sepstr is the separator string to place between values.
5418 : * argidx identifies the first argument to concatenate (counting from zero);
5419 : * note that this must be constant across any one series of calls.
5420 : *
5421 : * Returns NULL if result should be NULL, else text value.
5422 : */
5423 : static text *
5424 44 : concat_internal(const char *sepstr, int argidx,
5425 : FunctionCallInfo fcinfo)
5426 : {
5427 : text *result;
5428 : StringInfoData str;
5429 : FmgrInfo *foutcache;
5430 44 : bool first_arg = true;
5431 : int i;
5432 :
5433 : /*
5434 : * concat(VARIADIC some-array) is essentially equivalent to
5435 : * array_to_text(), ie concat the array elements with the given separator.
5436 : * So we just pass the case off to that code.
5437 : */
5438 44 : if (get_fn_expr_variadic(fcinfo->flinfo))
5439 : {
5440 : ArrayType *arr;
5441 :
5442 : /* Should have just the one argument */
5443 : Assert(argidx == PG_NARGS() - 1);
5444 :
5445 : /* concat(VARIADIC NULL) is defined as NULL */
5446 20 : if (PG_ARGISNULL(argidx))
5447 8 : return NULL;
5448 :
5449 : /*
5450 : * Non-null argument had better be an array. We assume that any call
5451 : * context that could let get_fn_expr_variadic return true will have
5452 : * checked that a VARIADIC-labeled parameter actually is an array. So
5453 : * it should be okay to just Assert that it's an array rather than
5454 : * doing a full-fledged error check.
5455 : */
5456 : Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
5457 :
5458 : /* OK, safe to fetch the array value */
5459 12 : arr = PG_GETARG_ARRAYTYPE_P(argidx);
5460 :
5461 : /*
5462 : * And serialize the array. We tell array_to_text to ignore null
5463 : * elements, which matches the behavior of the loop below.
5464 : */
5465 12 : return array_to_text_internal(fcinfo, arr, sepstr, NULL);
5466 : }
5467 :
5468 : /* Normal case without explicit VARIADIC marker */
5469 24 : initStringInfo(&str);
5470 :
5471 : /* Get output function info, building it if first time through */
5472 24 : foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
5473 24 : if (foutcache == NULL)
5474 24 : foutcache = build_concat_foutcache(fcinfo, argidx);
5475 :
5476 120 : for (i = argidx; i < PG_NARGS(); i++)
5477 : {
5478 96 : if (!PG_ARGISNULL(i))
5479 : {
5480 88 : Datum value = PG_GETARG_DATUM(i);
5481 :
5482 : /* add separator if appropriate */
5483 88 : if (first_arg)
5484 24 : first_arg = false;
5485 : else
5486 64 : appendStringInfoString(&str, sepstr);
5487 :
5488 : /* call the appropriate type output function, append the result */
5489 88 : appendStringInfoString(&str,
5490 88 : OutputFunctionCall(&foutcache[i], value));
5491 : }
5492 : }
5493 :
5494 24 : result = cstring_to_text_with_len(str.data, str.len);
5495 24 : pfree(str.data);
5496 :
5497 24 : return result;
5498 : }
5499 :
5500 : /*
5501 : * Concatenate all arguments. NULL arguments are ignored.
5502 : */
5503 : Datum
5504 20 : text_concat(PG_FUNCTION_ARGS)
5505 : {
5506 : text *result;
5507 :
5508 20 : result = concat_internal("", 0, fcinfo);
5509 20 : if (result == NULL)
5510 4 : PG_RETURN_NULL();
5511 16 : PG_RETURN_TEXT_P(result);
5512 : }
5513 :
5514 : /*
5515 : * Concatenate all but first argument value with separators. The first
5516 : * parameter is used as the separator. NULL arguments are ignored.
5517 : */
5518 : Datum
5519 28 : text_concat_ws(PG_FUNCTION_ARGS)
5520 : {
5521 : char *sep;
5522 : text *result;
5523 :
5524 : /* return NULL when separator is NULL */
5525 28 : if (PG_ARGISNULL(0))
5526 4 : PG_RETURN_NULL();
5527 24 : sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
5528 :
5529 24 : result = concat_internal(sep, 1, fcinfo);
5530 24 : if (result == NULL)
5531 4 : PG_RETURN_NULL();
5532 20 : PG_RETURN_TEXT_P(result);
5533 : }
5534 :
5535 : /*
5536 : * Return first n characters in the string. When n is negative,
5537 : * return all but last |n| characters.
5538 : */
5539 : Datum
5540 1256 : text_left(PG_FUNCTION_ARGS)
5541 : {
5542 1256 : int n = PG_GETARG_INT32(1);
5543 :
5544 1256 : if (n < 0)
5545 : {
5546 20 : text *str = PG_GETARG_TEXT_PP(0);
5547 20 : const char *p = VARDATA_ANY(str);
5548 20 : int len = VARSIZE_ANY_EXHDR(str);
5549 : int rlen;
5550 :
5551 20 : n = pg_mbstrlen_with_len(p, len) + n;
5552 20 : rlen = pg_mbcharcliplen(p, len, n);
5553 20 : PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
5554 : }
5555 : else
5556 1236 : PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
5557 : }
5558 :
5559 : /*
5560 : * Return last n characters in the string. When n is negative,
5561 : * return all but first |n| characters.
5562 : */
5563 : Datum
5564 44 : text_right(PG_FUNCTION_ARGS)
5565 : {
5566 44 : text *str = PG_GETARG_TEXT_PP(0);
5567 44 : const char *p = VARDATA_ANY(str);
5568 44 : int len = VARSIZE_ANY_EXHDR(str);
5569 44 : int n = PG_GETARG_INT32(1);
5570 : int off;
5571 :
5572 44 : if (n < 0)
5573 20 : n = -n;
5574 : else
5575 24 : n = pg_mbstrlen_with_len(p, len) - n;
5576 44 : off = pg_mbcharcliplen(p, len, n);
5577 :
5578 44 : PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
5579 : }
5580 :
5581 : /*
5582 : * Return reversed string
5583 : */
5584 : Datum
5585 4 : text_reverse(PG_FUNCTION_ARGS)
5586 : {
5587 4 : text *str = PG_GETARG_TEXT_PP(0);
5588 4 : const char *p = VARDATA_ANY(str);
5589 4 : int len = VARSIZE_ANY_EXHDR(str);
5590 4 : const char *endp = p + len;
5591 : text *result;
5592 : char *dst;
5593 :
5594 4 : result = palloc(len + VARHDRSZ);
5595 4 : dst = (char *) VARDATA(result) + len;
5596 4 : SET_VARSIZE(result, len + VARHDRSZ);
5597 :
5598 4 : if (pg_database_encoding_max_length() > 1)
5599 : {
5600 : /* multibyte version */
5601 24 : while (p < endp)
5602 : {
5603 : int sz;
5604 :
5605 20 : sz = pg_mblen(p);
5606 20 : dst -= sz;
5607 20 : memcpy(dst, p, sz);
5608 20 : p += sz;
5609 : }
5610 : }
5611 : else
5612 : {
5613 : /* single byte version */
5614 0 : while (p < endp)
5615 0 : *(--dst) = *p++;
5616 : }
5617 :
5618 4 : PG_RETURN_TEXT_P(result);
5619 : }
5620 :
5621 :
5622 : /*
5623 : * Support macros for text_format()
5624 : */
5625 : #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
5626 :
5627 : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
5628 : do { \
5629 : if (++(ptr) >= (end_ptr)) \
5630 : ereport(ERROR, \
5631 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
5632 : errmsg("unterminated format() type specifier"), \
5633 : errhint("For a single \"%%\" use \"%%%%\"."))); \
5634 : } while (0)
5635 :
5636 : /*
5637 : * Returns a formatted string
5638 : */
5639 : Datum
5640 9030 : text_format(PG_FUNCTION_ARGS)
5641 : {
5642 : text *fmt;
5643 : StringInfoData str;
5644 : const char *cp;
5645 : const char *start_ptr;
5646 : const char *end_ptr;
5647 : text *result;
5648 : int arg;
5649 : bool funcvariadic;
5650 : int nargs;
5651 9030 : Datum *elements = NULL;
5652 9030 : bool *nulls = NULL;
5653 9030 : Oid element_type = InvalidOid;
5654 9030 : Oid prev_type = InvalidOid;
5655 9030 : Oid prev_width_type = InvalidOid;
5656 : FmgrInfo typoutputfinfo;
5657 : FmgrInfo typoutputinfo_width;
5658 :
5659 : /* When format string is null, immediately return null */
5660 9030 : if (PG_ARGISNULL(0))
5661 4 : PG_RETURN_NULL();
5662 :
5663 : /* If argument is marked VARIADIC, expand array into elements */
5664 9026 : if (get_fn_expr_variadic(fcinfo->flinfo))
5665 : {
5666 : ArrayType *arr;
5667 : int16 elmlen;
5668 : bool elmbyval;
5669 : char elmalign;
5670 : int nitems;
5671 :
5672 : /* Should have just the one argument */
5673 : Assert(PG_NARGS() == 2);
5674 :
5675 : /* If argument is NULL, we treat it as zero-length array */
5676 32 : if (PG_ARGISNULL(1))
5677 4 : nitems = 0;
5678 : else
5679 : {
5680 : /*
5681 : * Non-null argument had better be an array. We assume that any
5682 : * call context that could let get_fn_expr_variadic return true
5683 : * will have checked that a VARIADIC-labeled parameter actually is
5684 : * an array. So it should be okay to just Assert that it's an
5685 : * array rather than doing a full-fledged error check.
5686 : */
5687 : Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
5688 :
5689 : /* OK, safe to fetch the array value */
5690 28 : arr = PG_GETARG_ARRAYTYPE_P(1);
5691 :
5692 : /* Get info about array element type */
5693 28 : element_type = ARR_ELEMTYPE(arr);
5694 28 : get_typlenbyvalalign(element_type,
5695 : &elmlen, &elmbyval, &elmalign);
5696 :
5697 : /* Extract all array elements */
5698 28 : deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5699 : &elements, &nulls, &nitems);
5700 : }
5701 :
5702 32 : nargs = nitems + 1;
5703 32 : funcvariadic = true;
5704 : }
5705 : else
5706 : {
5707 : /* Non-variadic case, we'll process the arguments individually */
5708 8994 : nargs = PG_NARGS();
5709 8994 : funcvariadic = false;
5710 : }
5711 :
5712 : /* Setup for main loop. */
5713 9026 : fmt = PG_GETARG_TEXT_PP(0);
5714 9026 : start_ptr = VARDATA_ANY(fmt);
5715 9026 : end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5716 9026 : initStringInfo(&str);
5717 9026 : arg = 1; /* next argument position to print */
5718 :
5719 : /* Scan format string, looking for conversion specifiers. */
5720 268146 : for (cp = start_ptr; cp < end_ptr; cp++)
5721 : {
5722 : int argpos;
5723 : int widthpos;
5724 : int flags;
5725 : int width;
5726 : Datum value;
5727 : bool isNull;
5728 : Oid typid;
5729 :
5730 : /*
5731 : * If it's not the start of a conversion specifier, just copy it to
5732 : * the output buffer.
5733 : */
5734 259160 : if (*cp != '%')
5735 : {
5736 240682 : appendStringInfoCharMacro(&str, *cp);
5737 240694 : continue;
5738 : }
5739 :
5740 18478 : ADVANCE_PARSE_POINTER(cp, end_ptr);
5741 :
5742 : /* Easy case: %% outputs a single % */
5743 18478 : if (*cp == '%')
5744 : {
5745 12 : appendStringInfoCharMacro(&str, *cp);
5746 12 : continue;
5747 : }
5748 :
5749 : /* Parse the optional portions of the format specifier */
5750 18466 : cp = text_format_parse_format(cp, end_ptr,
5751 : &argpos, &widthpos,
5752 : &flags, &width);
5753 :
5754 : /*
5755 : * Next we should see the main conversion specifier. Whether or not
5756 : * an argument position was present, it's known that at least one
5757 : * character remains in the string at this point. Experience suggests
5758 : * that it's worth checking that that character is one of the expected
5759 : * ones before we try to fetch arguments, so as to produce the least
5760 : * confusing response to a mis-formatted specifier.
5761 : */
5762 18450 : if (strchr("sIL", *cp) == NULL)
5763 4 : ereport(ERROR,
5764 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5765 : errmsg("unrecognized format() type specifier \"%.*s\"",
5766 : pg_mblen(cp), cp),
5767 : errhint("For a single \"%%\" use \"%%%%\".")));
5768 :
5769 : /* If indirect width was specified, get its value */
5770 18446 : if (widthpos >= 0)
5771 : {
5772 : /* Collect the specified or next argument position */
5773 28 : if (widthpos > 0)
5774 24 : arg = widthpos;
5775 28 : if (arg >= nargs)
5776 0 : ereport(ERROR,
5777 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5778 : errmsg("too few arguments for format()")));
5779 :
5780 : /* Get the value and type of the selected argument */
5781 28 : if (!funcvariadic)
5782 : {
5783 28 : value = PG_GETARG_DATUM(arg);
5784 28 : isNull = PG_ARGISNULL(arg);
5785 28 : typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5786 : }
5787 : else
5788 : {
5789 0 : value = elements[arg - 1];
5790 0 : isNull = nulls[arg - 1];
5791 0 : typid = element_type;
5792 : }
5793 28 : if (!OidIsValid(typid))
5794 0 : elog(ERROR, "could not determine data type of format() input");
5795 :
5796 28 : arg++;
5797 :
5798 : /* We can treat NULL width the same as zero */
5799 28 : if (isNull)
5800 4 : width = 0;
5801 24 : else if (typid == INT4OID)
5802 24 : width = DatumGetInt32(value);
5803 0 : else if (typid == INT2OID)
5804 0 : width = DatumGetInt16(value);
5805 : else
5806 : {
5807 : /* For less-usual datatypes, convert to text then to int */
5808 : char *str;
5809 :
5810 0 : if (typid != prev_width_type)
5811 : {
5812 : Oid typoutputfunc;
5813 : bool typIsVarlena;
5814 :
5815 0 : getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5816 0 : fmgr_info(typoutputfunc, &typoutputinfo_width);
5817 0 : prev_width_type = typid;
5818 : }
5819 :
5820 0 : str = OutputFunctionCall(&typoutputinfo_width, value);
5821 :
5822 : /* pg_strtoint32 will complain about bad data or overflow */
5823 0 : width = pg_strtoint32(str);
5824 :
5825 0 : pfree(str);
5826 : }
5827 : }
5828 :
5829 : /* Collect the specified or next argument position */
5830 18446 : if (argpos > 0)
5831 88 : arg = argpos;
5832 18446 : if (arg >= nargs)
5833 16 : ereport(ERROR,
5834 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5835 : errmsg("too few arguments for format()")));
5836 :
5837 : /* Get the value and type of the selected argument */
5838 18430 : if (!funcvariadic)
5839 : {
5840 17582 : value = PG_GETARG_DATUM(arg);
5841 17582 : isNull = PG_ARGISNULL(arg);
5842 17582 : typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5843 : }
5844 : else
5845 : {
5846 848 : value = elements[arg - 1];
5847 848 : isNull = nulls[arg - 1];
5848 848 : typid = element_type;
5849 : }
5850 18430 : if (!OidIsValid(typid))
5851 0 : elog(ERROR, "could not determine data type of format() input");
5852 :
5853 18430 : arg++;
5854 :
5855 : /*
5856 : * Get the appropriate typOutput function, reusing previous one if
5857 : * same type as previous argument. That's particularly useful in the
5858 : * variadic-array case, but often saves work even for ordinary calls.
5859 : */
5860 18430 : if (typid != prev_type)
5861 : {
5862 : Oid typoutputfunc;
5863 : bool typIsVarlena;
5864 :
5865 10202 : getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5866 10202 : fmgr_info(typoutputfunc, &typoutputfinfo);
5867 10202 : prev_type = typid;
5868 : }
5869 :
5870 : /*
5871 : * And now we can format the value.
5872 : */
5873 18430 : switch (*cp)
5874 : {
5875 18430 : case 's':
5876 : case 'I':
5877 : case 'L':
5878 18430 : text_format_string_conversion(&str, *cp, &typoutputfinfo,
5879 : value, isNull,
5880 : flags, width);
5881 18426 : break;
5882 0 : default:
5883 : /* should not get here, because of previous check */
5884 0 : ereport(ERROR,
5885 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5886 : errmsg("unrecognized format() type specifier \"%.*s\"",
5887 : pg_mblen(cp), cp),
5888 : errhint("For a single \"%%\" use \"%%%%\".")));
5889 : break;
5890 : }
5891 : }
5892 :
5893 : /* Don't need deconstruct_array results anymore. */
5894 8986 : if (elements != NULL)
5895 28 : pfree(elements);
5896 8986 : if (nulls != NULL)
5897 28 : pfree(nulls);
5898 :
5899 : /* Generate results. */
5900 8986 : result = cstring_to_text_with_len(str.data, str.len);
5901 8986 : pfree(str.data);
5902 :
5903 8986 : PG_RETURN_TEXT_P(result);
5904 : }
5905 :
5906 : /*
5907 : * Parse contiguous digits as a decimal number.
5908 : *
5909 : * Returns true if some digits could be parsed.
5910 : * The value is returned into *value, and *ptr is advanced to the next
5911 : * character to be parsed.
5912 : *
5913 : * Note parsing invariant: at least one character is known available before
5914 : * string end (end_ptr) at entry, and this is still true at exit.
5915 : */
5916 : static bool
5917 36908 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5918 : {
5919 36908 : bool found = false;
5920 36908 : const char *cp = *ptr;
5921 36908 : int val = 0;
5922 :
5923 37116 : while (*cp >= '0' && *cp <= '9')
5924 : {
5925 212 : int8 digit = (*cp - '0');
5926 :
5927 212 : if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
5928 212 : unlikely(pg_add_s32_overflow(val, digit, &val)))
5929 0 : ereport(ERROR,
5930 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5931 : errmsg("number is out of range")));
5932 212 : ADVANCE_PARSE_POINTER(cp, end_ptr);
5933 208 : found = true;
5934 : }
5935 :
5936 36904 : *ptr = cp;
5937 36904 : *value = val;
5938 :
5939 36904 : return found;
5940 : }
5941 :
5942 : /*
5943 : * Parse a format specifier (generally following the SUS printf spec).
5944 : *
5945 : * We have already advanced over the initial '%', and we are looking for
5946 : * [argpos][flags][width]type (but the type character is not consumed here).
5947 : *
5948 : * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5949 : * Output parameters:
5950 : * argpos: argument position for value to be printed. -1 means unspecified.
5951 : * widthpos: argument position for width. Zero means the argument position
5952 : * was unspecified (ie, take the next arg) and -1 means no width
5953 : * argument (width was omitted or specified as a constant).
5954 : * flags: bitmask of flags.
5955 : * width: directly-specified width value. Zero means the width was omitted
5956 : * (note it's not necessary to distinguish this case from an explicit
5957 : * zero width value).
5958 : *
5959 : * The function result is the next character position to be parsed, ie, the
5960 : * location where the type character is/should be.
5961 : *
5962 : * Note parsing invariant: at least one character is known available before
5963 : * string end (end_ptr) at entry, and this is still true at exit.
5964 : */
5965 : static const char *
5966 18466 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
5967 : int *argpos, int *widthpos,
5968 : int *flags, int *width)
5969 : {
5970 18466 : const char *cp = start_ptr;
5971 : int n;
5972 :
5973 : /* set defaults for output parameters */
5974 18466 : *argpos = -1;
5975 18466 : *widthpos = -1;
5976 18466 : *flags = 0;
5977 18466 : *width = 0;
5978 :
5979 : /* try to identify first number */
5980 18466 : if (text_format_parse_digits(&cp, end_ptr, &n))
5981 : {
5982 116 : if (*cp != '$')
5983 : {
5984 : /* Must be just a width and a type, so we're done */
5985 16 : *width = n;
5986 16 : return cp;
5987 : }
5988 : /* The number was argument position */
5989 100 : *argpos = n;
5990 : /* Explicit 0 for argument index is immediately refused */
5991 100 : if (n == 0)
5992 4 : ereport(ERROR,
5993 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5994 : errmsg("format specifies argument 0, but arguments are numbered from 1")));
5995 96 : ADVANCE_PARSE_POINTER(cp, end_ptr);
5996 : }
5997 :
5998 : /* Handle flags (only minus is supported now) */
5999 18462 : while (*cp == '-')
6000 : {
6001 20 : *flags |= TEXT_FORMAT_FLAG_MINUS;
6002 20 : ADVANCE_PARSE_POINTER(cp, end_ptr);
6003 : }
6004 :
6005 18442 : if (*cp == '*')
6006 : {
6007 : /* Handle indirect width */
6008 32 : ADVANCE_PARSE_POINTER(cp, end_ptr);
6009 32 : if (text_format_parse_digits(&cp, end_ptr, &n))
6010 : {
6011 : /* number in this position must be closed by $ */
6012 28 : if (*cp != '$')
6013 0 : ereport(ERROR,
6014 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6015 : errmsg("width argument position must be ended by \"$\"")));
6016 : /* The number was width argument position */
6017 28 : *widthpos = n;
6018 : /* Explicit 0 for argument index is immediately refused */
6019 28 : if (n == 0)
6020 4 : ereport(ERROR,
6021 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6022 : errmsg("format specifies argument 0, but arguments are numbered from 1")));
6023 24 : ADVANCE_PARSE_POINTER(cp, end_ptr);
6024 : }
6025 : else
6026 4 : *widthpos = 0; /* width's argument position is unspecified */
6027 : }
6028 : else
6029 : {
6030 : /* Check for direct width specification */
6031 18410 : if (text_format_parse_digits(&cp, end_ptr, &n))
6032 20 : *width = n;
6033 : }
6034 :
6035 : /* cp should now be pointing at type character */
6036 18434 : return cp;
6037 : }
6038 :
6039 : /*
6040 : * Format a %s, %I, or %L conversion
6041 : */
6042 : static void
6043 18430 : text_format_string_conversion(StringInfo buf, char conversion,
6044 : FmgrInfo *typOutputInfo,
6045 : Datum value, bool isNull,
6046 : int flags, int width)
6047 : {
6048 : char *str;
6049 :
6050 : /* Handle NULL arguments before trying to stringify the value. */
6051 18430 : if (isNull)
6052 : {
6053 44 : if (conversion == 's')
6054 12 : text_format_append_string(buf, "", flags, width);
6055 32 : else if (conversion == 'L')
6056 28 : text_format_append_string(buf, "NULL", flags, width);
6057 4 : else if (conversion == 'I')
6058 4 : ereport(ERROR,
6059 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
6060 : errmsg("null values cannot be formatted as an SQL identifier")));
6061 40 : return;
6062 : }
6063 :
6064 : /* Stringify. */
6065 18386 : str = OutputFunctionCall(typOutputInfo, value);
6066 :
6067 : /* Escape. */
6068 18386 : if (conversion == 'I')
6069 : {
6070 : /* quote_identifier may or may not allocate a new string. */
6071 1336 : text_format_append_string(buf, quote_identifier(str), flags, width);
6072 : }
6073 17050 : else if (conversion == 'L')
6074 : {
6075 1018 : char *qstr = quote_literal_cstr(str);
6076 :
6077 1018 : text_format_append_string(buf, qstr, flags, width);
6078 : /* quote_literal_cstr() always allocates a new string */
6079 1018 : pfree(qstr);
6080 : }
6081 : else
6082 16032 : text_format_append_string(buf, str, flags, width);
6083 :
6084 : /* Cleanup. */
6085 18386 : pfree(str);
6086 : }
6087 :
6088 : /*
6089 : * Append str to buf, padding as directed by flags/width
6090 : */
6091 : static void
6092 18426 : text_format_append_string(StringInfo buf, const char *str,
6093 : int flags, int width)
6094 : {
6095 18426 : bool align_to_left = false;
6096 : int len;
6097 :
6098 : /* fast path for typical easy case */
6099 18426 : if (width == 0)
6100 : {
6101 18370 : appendStringInfoString(buf, str);
6102 18370 : return;
6103 : }
6104 :
6105 56 : if (width < 0)
6106 : {
6107 : /* Negative width: implicit '-' flag, then take absolute value */
6108 4 : align_to_left = true;
6109 : /* -INT_MIN is undefined */
6110 4 : if (width <= INT_MIN)
6111 0 : ereport(ERROR,
6112 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
6113 : errmsg("number is out of range")));
6114 4 : width = -width;
6115 : }
6116 52 : else if (flags & TEXT_FORMAT_FLAG_MINUS)
6117 16 : align_to_left = true;
6118 :
6119 56 : len = pg_mbstrlen(str);
6120 56 : if (align_to_left)
6121 : {
6122 : /* left justify */
6123 20 : appendStringInfoString(buf, str);
6124 20 : if (len < width)
6125 20 : appendStringInfoSpaces(buf, width - len);
6126 : }
6127 : else
6128 : {
6129 : /* right justify */
6130 36 : if (len < width)
6131 36 : appendStringInfoSpaces(buf, width - len);
6132 36 : appendStringInfoString(buf, str);
6133 : }
6134 : }
6135 :
6136 : /*
6137 : * text_format_nv - nonvariadic wrapper for text_format function.
6138 : *
6139 : * note: this wrapper is necessary to pass the sanity check in opr_sanity,
6140 : * which checks that all built-in functions that share the implementing C
6141 : * function take the same number of arguments.
6142 : */
6143 : Datum
6144 20 : text_format_nv(PG_FUNCTION_ARGS)
6145 : {
6146 20 : return text_format(fcinfo);
6147 : }
6148 :
6149 : /*
6150 : * Helper function for Levenshtein distance functions. Faster than memcmp(),
6151 : * for this use case.
6152 : */
6153 : static inline bool
6154 0 : rest_of_char_same(const char *s1, const char *s2, int len)
6155 : {
6156 0 : while (len > 0)
6157 : {
6158 0 : len--;
6159 0 : if (s1[len] != s2[len])
6160 0 : return false;
6161 : }
6162 0 : return true;
6163 : }
6164 :
6165 : /* Expand each Levenshtein distance variant */
6166 : #include "levenshtein.c"
6167 : #define LEVENSHTEIN_LESS_EQUAL
6168 : #include "levenshtein.c"
6169 :
6170 :
6171 : /*
6172 : * Unicode support
6173 : */
6174 :
6175 : static UnicodeNormalizationForm
6176 104 : unicode_norm_form_from_string(const char *formstr)
6177 : {
6178 104 : UnicodeNormalizationForm form = -1;
6179 :
6180 : /*
6181 : * Might as well check this while we're here.
6182 : */
6183 104 : if (GetDatabaseEncoding() != PG_UTF8)
6184 0 : ereport(ERROR,
6185 : (errcode(ERRCODE_SYNTAX_ERROR),
6186 : errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
6187 :
6188 104 : if (pg_strcasecmp(formstr, "NFC") == 0)
6189 36 : form = UNICODE_NFC;
6190 68 : else if (pg_strcasecmp(formstr, "NFD") == 0)
6191 20 : form = UNICODE_NFD;
6192 48 : else if (pg_strcasecmp(formstr, "NFKC") == 0)
6193 20 : form = UNICODE_NFKC;
6194 28 : else if (pg_strcasecmp(formstr, "NFKD") == 0)
6195 20 : form = UNICODE_NFKD;
6196 : else
6197 8 : ereport(ERROR,
6198 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
6199 : errmsg("invalid normalization form: %s", formstr)));
6200 :
6201 96 : return form;
6202 : }
6203 :
6204 : Datum
6205 28 : unicode_normalize_func(PG_FUNCTION_ARGS)
6206 : {
6207 28 : text *input = PG_GETARG_TEXT_PP(0);
6208 28 : char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
6209 : UnicodeNormalizationForm form;
6210 : int size;
6211 : pg_wchar *input_chars;
6212 : pg_wchar *output_chars;
6213 : unsigned char *p;
6214 : text *result;
6215 : int i;
6216 :
6217 28 : form = unicode_norm_form_from_string(formstr);
6218 :
6219 : /* convert to pg_wchar */
6220 24 : size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
6221 24 : input_chars = palloc((size + 1) * sizeof(pg_wchar));
6222 24 : p = (unsigned char *) VARDATA_ANY(input);
6223 108 : for (i = 0; i < size; i++)
6224 : {
6225 84 : input_chars[i] = utf8_to_unicode(p);
6226 84 : p += pg_utf_mblen(p);
6227 : }
6228 24 : input_chars[i] = (pg_wchar) '\0';
6229 : Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
6230 :
6231 : /* action */
6232 24 : output_chars = unicode_normalize(form, input_chars);
6233 :
6234 : /* convert back to UTF-8 string */
6235 24 : size = 0;
6236 104 : for (pg_wchar *wp = output_chars; *wp; wp++)
6237 : {
6238 : unsigned char buf[4];
6239 :
6240 80 : unicode_to_utf8(*wp, buf);
6241 80 : size += pg_utf_mblen(buf);
6242 : }
6243 :
6244 24 : result = palloc(size + VARHDRSZ);
6245 24 : SET_VARSIZE(result, size + VARHDRSZ);
6246 :
6247 24 : p = (unsigned char *) VARDATA_ANY(result);
6248 104 : for (pg_wchar *wp = output_chars; *wp; wp++)
6249 : {
6250 80 : unicode_to_utf8(*wp, p);
6251 80 : p += pg_utf_mblen(p);
6252 : }
6253 : Assert((char *) p == (char *) result + size + VARHDRSZ);
6254 :
6255 24 : PG_RETURN_TEXT_P(result);
6256 : }
6257 :
6258 : /*
6259 : * Check whether the string is in the specified Unicode normalization form.
6260 : *
6261 : * This is done by converting the string to the specified normal form and then
6262 : * comparing that to the original string. To speed that up, we also apply the
6263 : * "quick check" algorithm specified in UAX #15, which can give a yes or no
6264 : * answer for many strings by just scanning the string once.
6265 : *
6266 : * This function should generally be optimized for the case where the string
6267 : * is in fact normalized. In that case, we'll end up looking at the entire
6268 : * string, so it's probably not worth doing any incremental conversion etc.
6269 : */
6270 : Datum
6271 76 : unicode_is_normalized(PG_FUNCTION_ARGS)
6272 : {
6273 76 : text *input = PG_GETARG_TEXT_PP(0);
6274 76 : char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
6275 : UnicodeNormalizationForm form;
6276 : int size;
6277 : pg_wchar *input_chars;
6278 : pg_wchar *output_chars;
6279 : unsigned char *p;
6280 : int i;
6281 : UnicodeNormalizationQC quickcheck;
6282 : int output_size;
6283 : bool result;
6284 :
6285 76 : form = unicode_norm_form_from_string(formstr);
6286 :
6287 : /* convert to pg_wchar */
6288 72 : size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
6289 72 : input_chars = palloc((size + 1) * sizeof(pg_wchar));
6290 72 : p = (unsigned char *) VARDATA_ANY(input);
6291 320 : for (i = 0; i < size; i++)
6292 : {
6293 248 : input_chars[i] = utf8_to_unicode(p);
6294 248 : p += pg_utf_mblen(p);
6295 : }
6296 72 : input_chars[i] = (pg_wchar) '\0';
6297 : Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
6298 :
6299 : /* quick check (see UAX #15) */
6300 72 : quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
6301 72 : if (quickcheck == UNICODE_NORM_QC_YES)
6302 20 : PG_RETURN_BOOL(true);
6303 52 : else if (quickcheck == UNICODE_NORM_QC_NO)
6304 8 : PG_RETURN_BOOL(false);
6305 :
6306 : /* normalize and compare with original */
6307 44 : output_chars = unicode_normalize(form, input_chars);
6308 :
6309 44 : output_size = 0;
6310 208 : for (pg_wchar *wp = output_chars; *wp; wp++)
6311 164 : output_size++;
6312 :
6313 60 : result = (size == output_size) &&
6314 16 : (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
6315 :
6316 44 : PG_RETURN_BOOL(result);
6317 : }
|