Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsvector_op.c
4 : * operations over tsvector
5 : *
6 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsvector_op.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include <limits.h>
17 :
18 : #include "access/htup_details.h"
19 : #include "catalog/namespace.h"
20 : #include "catalog/pg_type.h"
21 : #include "commands/trigger.h"
22 : #include "executor/spi.h"
23 : #include "funcapi.h"
24 : #include "lib/qunique.h"
25 : #include "mb/pg_wchar.h"
26 : #include "miscadmin.h"
27 : #include "parser/parse_coerce.h"
28 : #include "tsearch/ts_utils.h"
29 : #include "utils/array.h"
30 : #include "utils/builtins.h"
31 : #include "utils/lsyscache.h"
32 : #include "utils/regproc.h"
33 : #include "utils/rel.h"
34 :
35 :
36 : typedef struct
37 : {
38 : WordEntry *arrb;
39 : WordEntry *arre;
40 : char *values;
41 : char *operand;
42 : } CHKVAL;
43 :
44 :
45 : typedef struct StatEntry
46 : {
47 : uint32 ndoc; /* zero indicates that we were already here
48 : * while walking through the tree */
49 : uint32 nentry;
50 : struct StatEntry *left;
51 : struct StatEntry *right;
52 : uint32 lenlexeme;
53 : char lexeme[FLEXIBLE_ARRAY_MEMBER];
54 : } StatEntry;
55 :
56 : #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
57 :
58 : typedef struct
59 : {
60 : int32 weight;
61 :
62 : uint32 maxdepth;
63 :
64 : StatEntry **stack;
65 : uint32 stackpos;
66 :
67 : StatEntry *root;
68 : } TSVectorStat;
69 :
70 :
71 : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
72 : uint32 flags,
73 : TSExecuteCallback chkcond);
74 : static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
75 : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
76 :
77 :
78 : /*
79 : * Order: haspos, len, word, for all positions (pos, weight)
80 : */
81 : static int
82 2 : silly_cmp_tsvector(const TSVector a, const TSVector b)
83 : {
84 2 : if (VARSIZE(a) < VARSIZE(b))
85 0 : return -1;
86 2 : else if (VARSIZE(a) > VARSIZE(b))
87 0 : return 1;
88 2 : else if (a->size < b->size)
89 0 : return -1;
90 2 : else if (a->size > b->size)
91 0 : return 1;
92 : else
93 : {
94 2 : WordEntry *aptr = ARRPTR(a);
95 2 : WordEntry *bptr = ARRPTR(b);
96 2 : int i = 0;
97 : int res;
98 :
99 :
100 8 : for (i = 0; i < a->size; i++)
101 : {
102 6 : if (aptr->haspos != bptr->haspos)
103 : {
104 0 : return (aptr->haspos > bptr->haspos) ? -1 : 1;
105 : }
106 6 : else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
107 : {
108 0 : return res;
109 : }
110 6 : else if (aptr->haspos)
111 : {
112 0 : WordEntryPos *ap = POSDATAPTR(a, aptr);
113 0 : WordEntryPos *bp = POSDATAPTR(b, bptr);
114 : int j;
115 :
116 0 : if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
117 0 : return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
118 :
119 0 : for (j = 0; j < POSDATALEN(a, aptr); j++)
120 : {
121 0 : if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
122 : {
123 0 : return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
124 : }
125 0 : else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
126 : {
127 0 : return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
128 : }
129 0 : ap++, bp++;
130 : }
131 : }
132 :
133 6 : aptr++;
134 6 : bptr++;
135 : }
136 : }
137 :
138 2 : return 0;
139 : }
140 :
141 : #define TSVECTORCMPFUNC( type, action, ret ) \
142 : Datum \
143 : tsvector_##type(PG_FUNCTION_ARGS) \
144 : { \
145 : TSVector a = PG_GETARG_TSVECTOR(0); \
146 : TSVector b = PG_GETARG_TSVECTOR(1); \
147 : int res = silly_cmp_tsvector(a, b); \
148 : PG_FREE_IF_COPY(a,0); \
149 : PG_FREE_IF_COPY(b,1); \
150 : PG_RETURN_##ret( res action 0 ); \
151 : } \
152 : /* keep compiler quiet - no extra ; */ \
153 : extern int no_such_variable
154 :
155 0 : TSVECTORCMPFUNC(lt, <, BOOL);
156 0 : TSVECTORCMPFUNC(le, <=, BOOL);
157 2 : TSVECTORCMPFUNC(eq, ==, BOOL);
158 0 : TSVECTORCMPFUNC(ge, >=, BOOL);
159 0 : TSVECTORCMPFUNC(gt, >, BOOL);
160 0 : TSVECTORCMPFUNC(ne, !=, BOOL);
161 0 : TSVECTORCMPFUNC(cmp, +, INT32);
162 :
163 : Datum
164 90 : tsvector_strip(PG_FUNCTION_ARGS)
165 : {
166 90 : TSVector in = PG_GETARG_TSVECTOR(0);
167 : TSVector out;
168 : int i,
169 90 : len = 0;
170 90 : WordEntry *arrin = ARRPTR(in),
171 : *arrout;
172 : char *cur;
173 :
174 318 : for (i = 0; i < in->size; i++)
175 228 : len += arrin[i].len;
176 :
177 90 : len = CALCDATASIZE(in->size, len);
178 90 : out = (TSVector) palloc0(len);
179 90 : SET_VARSIZE(out, len);
180 90 : out->size = in->size;
181 90 : arrout = ARRPTR(out);
182 90 : cur = STRPTR(out);
183 318 : for (i = 0; i < in->size; i++)
184 : {
185 228 : memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
186 228 : arrout[i].haspos = 0;
187 228 : arrout[i].len = arrin[i].len;
188 228 : arrout[i].pos = cur - STRPTR(out);
189 228 : cur += arrout[i].len;
190 : }
191 :
192 90 : PG_FREE_IF_COPY(in, 0);
193 90 : PG_RETURN_POINTER(out);
194 : }
195 :
196 : Datum
197 6 : tsvector_length(PG_FUNCTION_ARGS)
198 : {
199 6 : TSVector in = PG_GETARG_TSVECTOR(0);
200 6 : int32 ret = in->size;
201 :
202 6 : PG_FREE_IF_COPY(in, 0);
203 6 : PG_RETURN_INT32(ret);
204 : }
205 :
206 : Datum
207 12 : tsvector_setweight(PG_FUNCTION_ARGS)
208 : {
209 12 : TSVector in = PG_GETARG_TSVECTOR(0);
210 12 : char cw = PG_GETARG_CHAR(1);
211 : TSVector out;
212 : int i,
213 : j;
214 : WordEntry *entry;
215 : WordEntryPos *p;
216 12 : int w = 0;
217 :
218 12 : switch (cw)
219 : {
220 0 : case 'A':
221 : case 'a':
222 0 : w = 3;
223 0 : break;
224 0 : case 'B':
225 : case 'b':
226 0 : w = 2;
227 0 : break;
228 12 : case 'C':
229 : case 'c':
230 12 : w = 1;
231 12 : break;
232 0 : case 'D':
233 : case 'd':
234 0 : w = 0;
235 0 : break;
236 0 : default:
237 : /* internal error */
238 0 : elog(ERROR, "unrecognized weight: %d", cw);
239 : }
240 :
241 12 : out = (TSVector) palloc(VARSIZE(in));
242 12 : memcpy(out, in, VARSIZE(in));
243 12 : entry = ARRPTR(out);
244 12 : i = out->size;
245 60 : while (i--)
246 : {
247 48 : if ((j = POSDATALEN(out, entry)) != 0)
248 : {
249 48 : p = POSDATAPTR(out, entry);
250 168 : while (j--)
251 : {
252 120 : WEP_SETWEIGHT(*p, w);
253 120 : p++;
254 : }
255 : }
256 48 : entry++;
257 : }
258 :
259 12 : PG_FREE_IF_COPY(in, 0);
260 12 : PG_RETURN_POINTER(out);
261 : }
262 :
263 : /*
264 : * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
265 : *
266 : * Assign weight w to elements of tsin that are listed in lexemes.
267 : */
268 : Datum
269 24 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
270 : {
271 24 : TSVector tsin = PG_GETARG_TSVECTOR(0);
272 24 : char char_weight = PG_GETARG_CHAR(1);
273 24 : ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
274 :
275 : TSVector tsout;
276 : int i,
277 : j,
278 : nlexemes,
279 : weight;
280 : WordEntry *entry;
281 : Datum *dlexemes;
282 : bool *nulls;
283 :
284 24 : switch (char_weight)
285 : {
286 0 : case 'A':
287 : case 'a':
288 0 : weight = 3;
289 0 : break;
290 0 : case 'B':
291 : case 'b':
292 0 : weight = 2;
293 0 : break;
294 24 : case 'C':
295 : case 'c':
296 24 : weight = 1;
297 24 : break;
298 0 : case 'D':
299 : case 'd':
300 0 : weight = 0;
301 0 : break;
302 0 : default:
303 : /* internal error */
304 0 : elog(ERROR, "unrecognized weight: %c", char_weight);
305 : }
306 :
307 24 : tsout = (TSVector) palloc(VARSIZE(tsin));
308 24 : memcpy(tsout, tsin, VARSIZE(tsin));
309 24 : entry = ARRPTR(tsout);
310 :
311 24 : deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
312 : &dlexemes, &nulls, &nlexemes);
313 :
314 : /*
315 : * Assuming that lexemes array is significantly shorter than tsvector we
316 : * can iterate through lexemes performing binary search of each lexeme
317 : * from lexemes in tsvector.
318 : */
319 72 : for (i = 0; i < nlexemes; i++)
320 : {
321 : char *lex;
322 : int lex_len,
323 : lex_pos;
324 :
325 : /* Ignore null array elements, they surely don't match */
326 48 : if (nulls[i])
327 6 : continue;
328 :
329 42 : lex = VARDATA(dlexemes[i]);
330 42 : lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
331 42 : lex_pos = tsvector_bsearch(tsout, lex, lex_len);
332 :
333 42 : if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
334 : {
335 24 : WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
336 :
337 78 : while (j--)
338 : {
339 54 : WEP_SETWEIGHT(*p, weight);
340 54 : p++;
341 : }
342 : }
343 : }
344 :
345 24 : PG_FREE_IF_COPY(tsin, 0);
346 24 : PG_FREE_IF_COPY(lexemes, 2);
347 :
348 24 : PG_RETURN_POINTER(tsout);
349 : }
350 :
351 : #define compareEntry(pa, a, pb, b) \
352 : tsCompareString((pa) + (a)->pos, (a)->len, \
353 : (pb) + (b)->pos, (b)->len, \
354 : false)
355 :
356 : /*
357 : * Add positions from src to dest after offsetting them by maxpos.
358 : * Return the number added (might be less than expected due to overflow)
359 : */
360 : static int32
361 12 : add_pos(TSVector src, WordEntry *srcptr,
362 : TSVector dest, WordEntry *destptr,
363 : int32 maxpos)
364 : {
365 12 : uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
366 : int i;
367 12 : uint16 slen = POSDATALEN(src, srcptr),
368 : startlen;
369 12 : WordEntryPos *spos = POSDATAPTR(src, srcptr),
370 12 : *dpos = POSDATAPTR(dest, destptr);
371 :
372 12 : if (!destptr->haspos)
373 0 : *clen = 0;
374 :
375 12 : startlen = *clen;
376 12 : for (i = 0;
377 24 : i < slen && *clen < MAXNUMPOS &&
378 12 : (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
379 12 : i++)
380 : {
381 12 : WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
382 12 : WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
383 12 : (*clen)++;
384 : }
385 :
386 12 : if (*clen != startlen)
387 12 : destptr->haspos = 1;
388 12 : return *clen - startlen;
389 : }
390 :
391 : /*
392 : * Perform binary search of given lexeme in TSVector.
393 : * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
394 : * found.
395 : */
396 : static int
397 198 : tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
398 : {
399 198 : WordEntry *arrin = ARRPTR(tsv);
400 198 : int StopLow = 0,
401 198 : StopHigh = tsv->size,
402 : StopMiddle,
403 : cmp;
404 :
405 522 : while (StopLow < StopHigh)
406 : {
407 462 : StopMiddle = (StopLow + StopHigh) / 2;
408 :
409 462 : cmp = tsCompareString(lexeme, lexeme_len,
410 462 : STRPTR(tsv) + arrin[StopMiddle].pos,
411 462 : arrin[StopMiddle].len,
412 : false);
413 :
414 462 : if (cmp < 0)
415 216 : StopHigh = StopMiddle;
416 246 : else if (cmp > 0)
417 108 : StopLow = StopMiddle + 1;
418 : else /* found it */
419 138 : return StopMiddle;
420 : }
421 :
422 60 : return -1;
423 : }
424 :
425 : /*
426 : * qsort comparator functions
427 : */
428 :
429 : static int
430 78 : compare_int(const void *va, const void *vb)
431 : {
432 78 : int a = *((const int *) va);
433 78 : int b = *((const int *) vb);
434 :
435 78 : if (a == b)
436 12 : return 0;
437 66 : return (a > b) ? 1 : -1;
438 : }
439 :
440 : static int
441 102 : compare_text_lexemes(const void *va, const void *vb)
442 : {
443 102 : Datum a = *((const Datum *) va);
444 102 : Datum b = *((const Datum *) vb);
445 102 : char *alex = VARDATA_ANY(a);
446 102 : int alex_len = VARSIZE_ANY_EXHDR(a);
447 102 : char *blex = VARDATA_ANY(b);
448 102 : int blex_len = VARSIZE_ANY_EXHDR(b);
449 :
450 102 : return tsCompareString(alex, alex_len, blex, blex_len, false);
451 : }
452 :
453 : /*
454 : * Internal routine to delete lexemes from TSVector by array of offsets.
455 : *
456 : * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
457 : * int indices_count -- size of that array
458 : *
459 : * Returns new TSVector without given lexemes along with their positions
460 : * and weights.
461 : */
462 : static TSVector
463 66 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
464 : int indices_count)
465 : {
466 : TSVector tsout;
467 66 : WordEntry *arrin = ARRPTR(tsv),
468 : *arrout;
469 66 : char *data = STRPTR(tsv),
470 : *dataout;
471 : int i, /* index in arrin */
472 : j, /* index in arrout */
473 : k, /* index in indices_to_delete */
474 : curoff; /* index in dataout area */
475 :
476 : /*
477 : * Sort the filter array to simplify membership checks below. Also, get
478 : * rid of any duplicate entries, so that we can assume that indices_count
479 : * is exactly equal to the number of lexemes that will be removed.
480 : */
481 66 : if (indices_count > 1)
482 : {
483 30 : qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
484 30 : indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
485 : compare_int);
486 : }
487 :
488 : /*
489 : * Here we overestimate tsout size, since we don't know how much space is
490 : * used by the deleted lexeme(s). We will set exact size below.
491 : */
492 66 : tsout = (TSVector) palloc0(VARSIZE(tsv));
493 :
494 : /* This count must be correct because STRPTR(tsout) relies on it. */
495 66 : tsout->size = tsv->size - indices_count;
496 :
497 : /*
498 : * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
499 : */
500 66 : arrout = ARRPTR(tsout);
501 66 : dataout = STRPTR(tsout);
502 66 : curoff = 0;
503 396 : for (i = j = k = 0; i < tsv->size; i++)
504 : {
505 : /*
506 : * If current i is present in indices_to_delete, skip this lexeme.
507 : * Since indices_to_delete is already sorted, we only need to check
508 : * the current (k'th) entry.
509 : */
510 330 : if (k < indices_count && i == indices_to_delete[k])
511 : {
512 96 : k++;
513 96 : continue;
514 : }
515 :
516 : /* Copy lexeme and its positions and weights */
517 234 : memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
518 234 : arrout[j].haspos = arrin[i].haspos;
519 234 : arrout[j].len = arrin[i].len;
520 234 : arrout[j].pos = curoff;
521 234 : curoff += arrin[i].len;
522 234 : if (arrin[i].haspos)
523 : {
524 156 : int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
525 156 : + sizeof(uint16);
526 :
527 156 : curoff = SHORTALIGN(curoff);
528 156 : memcpy(dataout + curoff,
529 156 : STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
530 : len);
531 156 : curoff += len;
532 : }
533 :
534 234 : j++;
535 : }
536 :
537 : /*
538 : * k should now be exactly equal to indices_count. If it isn't then the
539 : * caller provided us with indices outside of [0, tsv->size) range and
540 : * estimation of tsout's size is wrong.
541 : */
542 : Assert(k == indices_count);
543 :
544 66 : SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
545 66 : return tsout;
546 : }
547 :
548 : /*
549 : * Delete given lexeme from tsvector.
550 : * Implementation of user-level ts_delete(tsvector, text).
551 : */
552 : Datum
553 36 : tsvector_delete_str(PG_FUNCTION_ARGS)
554 : {
555 36 : TSVector tsin = PG_GETARG_TSVECTOR(0),
556 : tsout;
557 36 : text *tlexeme = PG_GETARG_TEXT_PP(1);
558 36 : char *lexeme = VARDATA_ANY(tlexeme);
559 36 : int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
560 : skip_index;
561 :
562 36 : if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
563 12 : PG_RETURN_POINTER(tsin);
564 :
565 24 : tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
566 :
567 24 : PG_FREE_IF_COPY(tsin, 0);
568 24 : PG_FREE_IF_COPY(tlexeme, 1);
569 24 : PG_RETURN_POINTER(tsout);
570 : }
571 :
572 : /*
573 : * Delete given array of lexemes from tsvector.
574 : * Implementation of user-level ts_delete(tsvector, text[]).
575 : */
576 : Datum
577 42 : tsvector_delete_arr(PG_FUNCTION_ARGS)
578 : {
579 42 : TSVector tsin = PG_GETARG_TSVECTOR(0),
580 : tsout;
581 42 : ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
582 : int i,
583 : nlex,
584 : skip_count,
585 : *skip_indices;
586 : Datum *dlexemes;
587 : bool *nulls;
588 :
589 42 : deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
590 : &dlexemes, &nulls, &nlex);
591 :
592 : /*
593 : * In typical use case array of lexemes to delete is relatively small. So
594 : * here we optimize things for that scenario: iterate through lexarr
595 : * performing binary search of each lexeme from lexarr in tsvector.
596 : */
597 42 : skip_indices = palloc0(nlex * sizeof(int));
598 168 : for (i = skip_count = 0; i < nlex; i++)
599 : {
600 : char *lex;
601 : int lex_len,
602 : lex_pos;
603 :
604 : /* Ignore null array elements, they surely don't match */
605 126 : if (nulls[i])
606 6 : continue;
607 :
608 120 : lex = VARDATA(dlexemes[i]);
609 120 : lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
610 120 : lex_pos = tsvector_bsearch(tsin, lex, lex_len);
611 :
612 120 : if (lex_pos >= 0)
613 78 : skip_indices[skip_count++] = lex_pos;
614 : }
615 :
616 42 : tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
617 :
618 42 : pfree(skip_indices);
619 42 : PG_FREE_IF_COPY(tsin, 0);
620 42 : PG_FREE_IF_COPY(lexemes, 1);
621 :
622 42 : PG_RETURN_POINTER(tsout);
623 : }
624 :
625 : /*
626 : * Expand tsvector as table with following columns:
627 : * lexeme: lexeme text
628 : * positions: integer array of lexeme positions
629 : * weights: char array of weights corresponding to positions
630 : */
631 : Datum
632 180 : tsvector_unnest(PG_FUNCTION_ARGS)
633 : {
634 : FuncCallContext *funcctx;
635 : TSVector tsin;
636 :
637 180 : if (SRF_IS_FIRSTCALL())
638 : {
639 : MemoryContext oldcontext;
640 : TupleDesc tupdesc;
641 :
642 30 : funcctx = SRF_FIRSTCALL_INIT();
643 30 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
644 :
645 30 : tupdesc = CreateTemplateTupleDesc(3);
646 30 : TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
647 : TEXTOID, -1, 0);
648 30 : TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
649 : INT2ARRAYOID, -1, 0);
650 30 : TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
651 : TEXTARRAYOID, -1, 0);
652 30 : funcctx->tuple_desc = BlessTupleDesc(tupdesc);
653 :
654 30 : funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
655 :
656 30 : MemoryContextSwitchTo(oldcontext);
657 : }
658 :
659 180 : funcctx = SRF_PERCALL_SETUP();
660 180 : tsin = (TSVector) funcctx->user_fctx;
661 :
662 180 : if (funcctx->call_cntr < tsin->size)
663 : {
664 150 : WordEntry *arrin = ARRPTR(tsin);
665 150 : char *data = STRPTR(tsin);
666 : HeapTuple tuple;
667 : int j,
668 150 : i = funcctx->call_cntr;
669 150 : bool nulls[] = {false, false, false};
670 : Datum values[3];
671 :
672 150 : values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
673 :
674 150 : if (arrin[i].haspos)
675 : {
676 : WordEntryPosVector *posv;
677 : Datum *positions;
678 : Datum *weights;
679 : char weight;
680 :
681 : /*
682 : * Internally tsvector stores position and weight in the same
683 : * uint16 (2 bits for weight, 14 for position). Here we extract
684 : * that in two separate arrays.
685 : */
686 90 : posv = _POSVECPTR(tsin, arrin + i);
687 90 : positions = palloc(posv->npos * sizeof(Datum));
688 90 : weights = palloc(posv->npos * sizeof(Datum));
689 252 : for (j = 0; j < posv->npos; j++)
690 : {
691 162 : positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
692 162 : weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
693 162 : weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
694 : 1));
695 : }
696 :
697 90 : values[1] = PointerGetDatum(construct_array(positions, posv->npos,
698 : INT2OID, 2, true, TYPALIGN_SHORT));
699 90 : values[2] = PointerGetDatum(construct_array(weights, posv->npos,
700 : TEXTOID, -1, false, TYPALIGN_INT));
701 : }
702 : else
703 : {
704 60 : nulls[1] = nulls[2] = true;
705 : }
706 :
707 150 : tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
708 150 : SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
709 : }
710 : else
711 : {
712 30 : SRF_RETURN_DONE(funcctx);
713 : }
714 : }
715 :
716 : /*
717 : * Convert tsvector to array of lexemes.
718 : */
719 : Datum
720 12 : tsvector_to_array(PG_FUNCTION_ARGS)
721 : {
722 12 : TSVector tsin = PG_GETARG_TSVECTOR(0);
723 12 : WordEntry *arrin = ARRPTR(tsin);
724 : Datum *elements;
725 : int i;
726 : ArrayType *array;
727 :
728 12 : elements = palloc(tsin->size * sizeof(Datum));
729 :
730 72 : for (i = 0; i < tsin->size; i++)
731 : {
732 60 : elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
733 : arrin[i].len));
734 : }
735 :
736 12 : array = construct_array(elements, tsin->size, TEXTOID, -1, false, TYPALIGN_INT);
737 :
738 12 : pfree(elements);
739 12 : PG_FREE_IF_COPY(tsin, 0);
740 12 : PG_RETURN_POINTER(array);
741 : }
742 :
743 : /*
744 : * Build tsvector from array of lexemes.
745 : */
746 : Datum
747 24 : array_to_tsvector(PG_FUNCTION_ARGS)
748 : {
749 24 : ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
750 : TSVector tsout;
751 : Datum *dlexemes;
752 : WordEntry *arrout;
753 : bool *nulls;
754 : int nitems,
755 : i,
756 : tslen,
757 24 : datalen = 0;
758 : char *cur;
759 :
760 24 : deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
761 :
762 : /*
763 : * Reject nulls and zero length strings (maybe we should just ignore them,
764 : * instead?)
765 : */
766 126 : for (i = 0; i < nitems; i++)
767 : {
768 114 : if (nulls[i])
769 6 : ereport(ERROR,
770 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
771 : errmsg("lexeme array may not contain nulls")));
772 :
773 108 : if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
774 6 : ereport(ERROR,
775 : (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
776 : errmsg("lexeme array may not contain empty strings")));
777 : }
778 :
779 : /* Sort and de-dup, because this is required for a valid tsvector. */
780 12 : if (nitems > 1)
781 : {
782 12 : qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
783 12 : nitems = qunique(dlexemes, nitems, sizeof(Datum),
784 : compare_text_lexemes);
785 : }
786 :
787 : /* Calculate space needed for surviving lexemes. */
788 60 : for (i = 0; i < nitems; i++)
789 48 : datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
790 12 : tslen = CALCDATASIZE(nitems, datalen);
791 :
792 : /* Allocate and fill tsvector. */
793 12 : tsout = (TSVector) palloc0(tslen);
794 12 : SET_VARSIZE(tsout, tslen);
795 12 : tsout->size = nitems;
796 :
797 12 : arrout = ARRPTR(tsout);
798 12 : cur = STRPTR(tsout);
799 60 : for (i = 0; i < nitems; i++)
800 : {
801 48 : char *lex = VARDATA(dlexemes[i]);
802 48 : int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
803 :
804 48 : memcpy(cur, lex, lex_len);
805 48 : arrout[i].haspos = 0;
806 48 : arrout[i].len = lex_len;
807 48 : arrout[i].pos = cur - STRPTR(tsout);
808 48 : cur += lex_len;
809 : }
810 :
811 12 : PG_FREE_IF_COPY(v, 0);
812 12 : PG_RETURN_POINTER(tsout);
813 : }
814 :
815 : /*
816 : * ts_filter(): keep only lexemes with given weights in tsvector.
817 : */
818 : Datum
819 18 : tsvector_filter(PG_FUNCTION_ARGS)
820 : {
821 18 : TSVector tsin = PG_GETARG_TSVECTOR(0),
822 : tsout;
823 18 : ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1);
824 18 : WordEntry *arrin = ARRPTR(tsin),
825 : *arrout;
826 18 : char *datain = STRPTR(tsin),
827 : *dataout;
828 : Datum *dweights;
829 : bool *nulls;
830 : int nweights;
831 : int i,
832 : j;
833 18 : int cur_pos = 0;
834 18 : char mask = 0;
835 :
836 18 : deconstruct_array(weights, CHAROID, 1, true, TYPALIGN_CHAR,
837 : &dweights, &nulls, &nweights);
838 :
839 42 : for (i = 0; i < nweights; i++)
840 : {
841 : char char_weight;
842 :
843 30 : if (nulls[i])
844 6 : ereport(ERROR,
845 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
846 : errmsg("weight array may not contain nulls")));
847 :
848 24 : char_weight = DatumGetChar(dweights[i]);
849 24 : switch (char_weight)
850 : {
851 18 : case 'A':
852 : case 'a':
853 18 : mask = mask | 8;
854 18 : break;
855 6 : case 'B':
856 : case 'b':
857 6 : mask = mask | 4;
858 6 : break;
859 0 : case 'C':
860 : case 'c':
861 0 : mask = mask | 2;
862 0 : break;
863 0 : case 'D':
864 : case 'd':
865 0 : mask = mask | 1;
866 0 : break;
867 0 : default:
868 0 : ereport(ERROR,
869 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
870 : errmsg("unrecognized weight: \"%c\"", char_weight)));
871 : }
872 : }
873 :
874 12 : tsout = (TSVector) palloc0(VARSIZE(tsin));
875 12 : tsout->size = tsin->size;
876 12 : arrout = ARRPTR(tsout);
877 12 : dataout = STRPTR(tsout);
878 :
879 108 : for (i = j = 0; i < tsin->size; i++)
880 : {
881 : WordEntryPosVector *posvin,
882 : *posvout;
883 96 : int npos = 0;
884 : int k;
885 :
886 96 : if (!arrin[i].haspos)
887 30 : continue;
888 :
889 66 : posvin = _POSVECPTR(tsin, arrin + i);
890 66 : posvout = (WordEntryPosVector *)
891 66 : (dataout + SHORTALIGN(cur_pos + arrin[i].len));
892 :
893 132 : for (k = 0; k < posvin->npos; k++)
894 : {
895 66 : if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
896 30 : posvout->pos[npos++] = posvin->pos[k];
897 : }
898 :
899 : /* if no satisfactory positions found, skip lexeme */
900 66 : if (!npos)
901 36 : continue;
902 :
903 30 : arrout[j].haspos = true;
904 30 : arrout[j].len = arrin[i].len;
905 30 : arrout[j].pos = cur_pos;
906 :
907 30 : memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
908 30 : posvout->npos = npos;
909 30 : cur_pos += SHORTALIGN(arrin[i].len);
910 30 : cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
911 : sizeof(uint16);
912 30 : j++;
913 : }
914 :
915 12 : tsout->size = j;
916 12 : if (dataout != STRPTR(tsout))
917 12 : memmove(STRPTR(tsout), dataout, cur_pos);
918 :
919 12 : SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
920 :
921 12 : PG_FREE_IF_COPY(tsin, 0);
922 12 : PG_RETURN_POINTER(tsout);
923 : }
924 :
925 : Datum
926 12 : tsvector_concat(PG_FUNCTION_ARGS)
927 : {
928 12 : TSVector in1 = PG_GETARG_TSVECTOR(0);
929 12 : TSVector in2 = PG_GETARG_TSVECTOR(1);
930 : TSVector out;
931 : WordEntry *ptr;
932 : WordEntry *ptr1,
933 : *ptr2;
934 : WordEntryPos *p;
935 12 : int maxpos = 0,
936 : i,
937 : j,
938 : i1,
939 : i2,
940 : dataoff,
941 : output_bytes,
942 : output_size;
943 : char *data,
944 : *data1,
945 : *data2;
946 :
947 : /* Get max position in in1; we'll need this to offset in2's positions */
948 12 : ptr = ARRPTR(in1);
949 12 : i = in1->size;
950 30 : while (i--)
951 : {
952 18 : if ((j = POSDATALEN(in1, ptr)) != 0)
953 : {
954 18 : p = POSDATAPTR(in1, ptr);
955 36 : while (j--)
956 : {
957 18 : if (WEP_GETPOS(*p) > maxpos)
958 12 : maxpos = WEP_GETPOS(*p);
959 18 : p++;
960 : }
961 : }
962 18 : ptr++;
963 : }
964 :
965 12 : ptr1 = ARRPTR(in1);
966 12 : ptr2 = ARRPTR(in2);
967 12 : data1 = STRPTR(in1);
968 12 : data2 = STRPTR(in2);
969 12 : i1 = in1->size;
970 12 : i2 = in2->size;
971 :
972 : /*
973 : * Conservative estimate of space needed. We might need all the data in
974 : * both inputs, and conceivably add a pad byte before position data for
975 : * each item where there was none before.
976 : */
977 12 : output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
978 :
979 12 : out = (TSVector) palloc0(output_bytes);
980 12 : SET_VARSIZE(out, output_bytes);
981 :
982 : /*
983 : * We must make out->size valid so that STRPTR(out) is sensible. We'll
984 : * collapse out any unused space at the end.
985 : */
986 12 : out->size = in1->size + in2->size;
987 :
988 12 : ptr = ARRPTR(out);
989 12 : data = STRPTR(out);
990 12 : dataoff = 0;
991 30 : while (i1 && i2)
992 : {
993 18 : int cmp = compareEntry(data1, ptr1, data2, ptr2);
994 :
995 18 : if (cmp < 0)
996 : { /* in1 first */
997 6 : ptr->haspos = ptr1->haspos;
998 6 : ptr->len = ptr1->len;
999 6 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1000 6 : ptr->pos = dataoff;
1001 6 : dataoff += ptr1->len;
1002 6 : if (ptr->haspos)
1003 : {
1004 6 : dataoff = SHORTALIGN(dataoff);
1005 6 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1006 6 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1007 : }
1008 :
1009 6 : ptr++;
1010 6 : ptr1++;
1011 6 : i1--;
1012 : }
1013 12 : else if (cmp > 0)
1014 : { /* in2 first */
1015 6 : ptr->haspos = ptr2->haspos;
1016 6 : ptr->len = ptr2->len;
1017 6 : memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1018 6 : ptr->pos = dataoff;
1019 6 : dataoff += ptr2->len;
1020 6 : if (ptr->haspos)
1021 : {
1022 0 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1023 :
1024 0 : if (addlen == 0)
1025 0 : ptr->haspos = 0;
1026 : else
1027 : {
1028 0 : dataoff = SHORTALIGN(dataoff);
1029 0 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1030 : }
1031 : }
1032 :
1033 6 : ptr++;
1034 6 : ptr2++;
1035 6 : i2--;
1036 : }
1037 : else
1038 : {
1039 6 : ptr->haspos = ptr1->haspos | ptr2->haspos;
1040 6 : ptr->len = ptr1->len;
1041 6 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1042 6 : ptr->pos = dataoff;
1043 6 : dataoff += ptr1->len;
1044 6 : if (ptr->haspos)
1045 : {
1046 6 : if (ptr1->haspos)
1047 : {
1048 6 : dataoff = SHORTALIGN(dataoff);
1049 6 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1050 6 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1051 6 : if (ptr2->haspos)
1052 6 : dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1053 : }
1054 : else /* must have ptr2->haspos */
1055 : {
1056 0 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1057 :
1058 0 : if (addlen == 0)
1059 0 : ptr->haspos = 0;
1060 : else
1061 : {
1062 0 : dataoff = SHORTALIGN(dataoff);
1063 0 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1064 : }
1065 : }
1066 : }
1067 :
1068 6 : ptr++;
1069 6 : ptr1++;
1070 6 : ptr2++;
1071 6 : i1--;
1072 6 : i2--;
1073 : }
1074 : }
1075 :
1076 18 : while (i1)
1077 : {
1078 6 : ptr->haspos = ptr1->haspos;
1079 6 : ptr->len = ptr1->len;
1080 6 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1081 6 : ptr->pos = dataoff;
1082 6 : dataoff += ptr1->len;
1083 6 : if (ptr->haspos)
1084 : {
1085 6 : dataoff = SHORTALIGN(dataoff);
1086 6 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1087 6 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1088 : }
1089 :
1090 6 : ptr++;
1091 6 : ptr1++;
1092 6 : i1--;
1093 : }
1094 :
1095 18 : while (i2)
1096 : {
1097 6 : ptr->haspos = ptr2->haspos;
1098 6 : ptr->len = ptr2->len;
1099 6 : memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1100 6 : ptr->pos = dataoff;
1101 6 : dataoff += ptr2->len;
1102 6 : if (ptr->haspos)
1103 : {
1104 6 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1105 :
1106 6 : if (addlen == 0)
1107 0 : ptr->haspos = 0;
1108 : else
1109 : {
1110 6 : dataoff = SHORTALIGN(dataoff);
1111 6 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1112 : }
1113 : }
1114 :
1115 6 : ptr++;
1116 6 : ptr2++;
1117 6 : i2--;
1118 : }
1119 :
1120 : /*
1121 : * Instead of checking each offset individually, we check for overflow of
1122 : * pos fields once at the end.
1123 : */
1124 12 : if (dataoff > MAXSTRPOS)
1125 0 : ereport(ERROR,
1126 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1127 : errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1128 :
1129 : /*
1130 : * Adjust sizes (asserting that we didn't overrun the original estimates)
1131 : * and collapse out any unused array entries.
1132 : */
1133 12 : output_size = ptr - ARRPTR(out);
1134 : Assert(output_size <= out->size);
1135 12 : out->size = output_size;
1136 12 : if (data != STRPTR(out))
1137 6 : memmove(STRPTR(out), data, dataoff);
1138 12 : output_bytes = CALCDATASIZE(out->size, dataoff);
1139 : Assert(output_bytes <= VARSIZE(out));
1140 12 : SET_VARSIZE(out, output_bytes);
1141 :
1142 12 : PG_FREE_IF_COPY(in1, 0);
1143 12 : PG_FREE_IF_COPY(in2, 1);
1144 12 : PG_RETURN_POINTER(out);
1145 : }
1146 :
1147 : /*
1148 : * Compare two strings by tsvector rules.
1149 : *
1150 : * if prefix = true then it returns zero value iff b has prefix a
1151 : */
1152 : int32
1153 6212958 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1154 : {
1155 : int cmp;
1156 :
1157 6212958 : if (lena == 0)
1158 : {
1159 36 : if (prefix)
1160 0 : cmp = 0; /* empty string is prefix of anything */
1161 : else
1162 36 : cmp = (lenb > 0) ? -1 : 0;
1163 : }
1164 6212922 : else if (lenb == 0)
1165 : {
1166 0 : cmp = (lena > 0) ? 1 : 0;
1167 : }
1168 : else
1169 : {
1170 6212922 : cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
1171 :
1172 6212922 : if (prefix)
1173 : {
1174 16458 : if (cmp == 0 && lena > lenb)
1175 0 : cmp = 1; /* a is longer, so not a prefix of b */
1176 : }
1177 6196464 : else if (cmp == 0 && lena != lenb)
1178 : {
1179 32270 : cmp = (lena < lenb) ? -1 : 1;
1180 : }
1181 : }
1182 :
1183 6212958 : return cmp;
1184 : }
1185 :
1186 : /*
1187 : * Check weight info or/and fill 'data' with the required positions
1188 : */
1189 : static TSTernaryValue
1190 68082 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
1191 : ExecPhraseData *data)
1192 : {
1193 68082 : TSTernaryValue result = TS_NO;
1194 :
1195 : Assert(data == NULL || data->npos == 0);
1196 :
1197 68082 : if (entry->haspos)
1198 : {
1199 : WordEntryPosVector *posvec;
1200 :
1201 : /*
1202 : * We can't use the _POSVECPTR macro here because the pointer to the
1203 : * tsvector's lexeme storage is already contained in chkval->values.
1204 : */
1205 4488 : posvec = (WordEntryPosVector *)
1206 4488 : (chkval->values + SHORTALIGN(entry->pos + entry->len));
1207 :
1208 4488 : if (val->weight && data)
1209 48 : {
1210 48 : WordEntryPos *posvec_iter = posvec->pos;
1211 : WordEntryPos *dptr;
1212 :
1213 : /*
1214 : * Filter position information by weights
1215 : */
1216 48 : dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1217 48 : data->allocated = true;
1218 :
1219 : /* Is there a position with a matching weight? */
1220 96 : while (posvec_iter < posvec->pos + posvec->npos)
1221 : {
1222 : /* If true, append this position to the data->pos */
1223 48 : if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1224 : {
1225 24 : *dptr = WEP_GETPOS(*posvec_iter);
1226 24 : dptr++;
1227 : }
1228 :
1229 48 : posvec_iter++;
1230 : }
1231 :
1232 48 : data->npos = dptr - data->pos;
1233 :
1234 48 : if (data->npos > 0)
1235 24 : result = TS_YES;
1236 : else
1237 : {
1238 24 : pfree(data->pos);
1239 24 : data->pos = NULL;
1240 24 : data->allocated = false;
1241 : }
1242 : }
1243 4440 : else if (val->weight)
1244 : {
1245 456 : WordEntryPos *posvec_iter = posvec->pos;
1246 :
1247 : /* Is there a position with a matching weight? */
1248 690 : while (posvec_iter < posvec->pos + posvec->npos)
1249 : {
1250 504 : if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1251 : {
1252 270 : result = TS_YES;
1253 270 : break; /* no need to go further */
1254 : }
1255 :
1256 234 : posvec_iter++;
1257 : }
1258 : }
1259 3984 : else if (data)
1260 : {
1261 2274 : data->npos = posvec->npos;
1262 2274 : data->pos = posvec->pos;
1263 2274 : data->allocated = false;
1264 2274 : result = TS_YES;
1265 : }
1266 : else
1267 : {
1268 : /* simplest case: no weight check, positions not needed */
1269 1710 : result = TS_YES;
1270 : }
1271 : }
1272 : else
1273 : {
1274 : /*
1275 : * Position info is lacking, so if the caller requires it, we can only
1276 : * say that maybe there is a match.
1277 : *
1278 : * Notice, however, that we *don't* check val->weight here.
1279 : * Historically, stripped tsvectors are considered to match queries
1280 : * whether or not the query has a weight restriction; that's a little
1281 : * dubious but we'll preserve the behavior.
1282 : */
1283 63594 : if (data)
1284 23058 : result = TS_MAYBE;
1285 : else
1286 40536 : result = TS_YES;
1287 : }
1288 :
1289 68082 : return result;
1290 : }
1291 :
1292 : /*
1293 : * TS_execute callback for matching a tsquery operand to plain tsvector data
1294 : */
1295 : static TSTernaryValue
1296 284022 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
1297 : {
1298 284022 : CHKVAL *chkval = (CHKVAL *) checkval;
1299 284022 : WordEntry *StopLow = chkval->arrb;
1300 284022 : WordEntry *StopHigh = chkval->arre;
1301 284022 : WordEntry *StopMiddle = StopHigh;
1302 284022 : TSTernaryValue res = TS_NO;
1303 :
1304 : /* Loop invariant: StopLow <= val < StopHigh */
1305 1786806 : while (StopLow < StopHigh)
1306 : {
1307 : int difference;
1308 :
1309 1555830 : StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1310 1555830 : difference = tsCompareString(chkval->operand + val->distance,
1311 1555830 : val->length,
1312 1555830 : chkval->values + StopMiddle->pos,
1313 1555830 : StopMiddle->len,
1314 : false);
1315 :
1316 1555830 : if (difference == 0)
1317 : {
1318 : /* Check weight info & fill 'data' with positions */
1319 53046 : res = checkclass_str(chkval, StopMiddle, val, data);
1320 53046 : break;
1321 : }
1322 1502784 : else if (difference > 0)
1323 847512 : StopLow = StopMiddle + 1;
1324 : else
1325 655272 : StopHigh = StopMiddle;
1326 : }
1327 :
1328 : /*
1329 : * If it's a prefix search, we should also consider lexemes that the
1330 : * search term is a prefix of (which will necessarily immediately follow
1331 : * the place we found in the above loop). But we can skip them if there
1332 : * was a definite match on the exact term AND the caller doesn't need
1333 : * position info.
1334 : */
1335 284022 : if (val->prefix && (res != TS_YES || data))
1336 : {
1337 16524 : WordEntryPos *allpos = NULL;
1338 16524 : int npos = 0,
1339 16524 : totalpos = 0;
1340 :
1341 : /* adjust start position for corner case */
1342 16524 : if (StopLow >= StopHigh)
1343 16512 : StopMiddle = StopHigh;
1344 :
1345 : /* we don't try to re-use any data from the initial match */
1346 16524 : if (data)
1347 : {
1348 36 : if (data->allocated)
1349 0 : pfree(data->pos);
1350 36 : data->pos = NULL;
1351 36 : data->allocated = false;
1352 36 : data->npos = 0;
1353 : }
1354 16524 : res = TS_NO;
1355 :
1356 31560 : while ((res != TS_YES || data) &&
1357 32556 : StopMiddle < chkval->arre &&
1358 15930 : tsCompareString(chkval->operand + val->distance,
1359 15930 : val->length,
1360 15930 : chkval->values + StopMiddle->pos,
1361 15930 : StopMiddle->len,
1362 : true) == 0)
1363 : {
1364 : TSTernaryValue subres;
1365 :
1366 15036 : subres = checkclass_str(chkval, StopMiddle, val, data);
1367 :
1368 15036 : if (subres != TS_NO)
1369 : {
1370 14976 : if (data)
1371 : {
1372 : /*
1373 : * We need to join position information
1374 : */
1375 42 : if (subres == TS_MAYBE)
1376 : {
1377 : /*
1378 : * No position info for this match, so we must report
1379 : * MAYBE overall.
1380 : */
1381 0 : res = TS_MAYBE;
1382 : /* forget any previous positions */
1383 0 : npos = 0;
1384 : /* don't leak storage */
1385 0 : if (allpos)
1386 0 : pfree(allpos);
1387 0 : break;
1388 : }
1389 :
1390 78 : while (npos + data->npos > totalpos)
1391 : {
1392 36 : if (totalpos == 0)
1393 : {
1394 36 : totalpos = 256;
1395 36 : allpos = palloc(sizeof(WordEntryPos) * totalpos);
1396 : }
1397 : else
1398 : {
1399 0 : totalpos *= 2;
1400 0 : allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1401 : }
1402 : }
1403 :
1404 42 : memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1405 42 : npos += data->npos;
1406 :
1407 : /* don't leak storage from individual matches */
1408 42 : if (data->allocated)
1409 24 : pfree(data->pos);
1410 42 : data->pos = NULL;
1411 42 : data->allocated = false;
1412 : /* it's important to reset data->npos before next loop */
1413 42 : data->npos = 0;
1414 : }
1415 : else
1416 : {
1417 : /* Don't need positions, just handle YES/MAYBE */
1418 14934 : if (subres == TS_YES || res == TS_NO)
1419 14934 : res = subres;
1420 : }
1421 : }
1422 :
1423 15036 : StopMiddle++;
1424 : }
1425 :
1426 16524 : if (data && npos > 0)
1427 : {
1428 : /* Sort and make unique array of found positions */
1429 36 : data->pos = allpos;
1430 36 : qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1431 36 : data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1432 : compareWordEntryPos);
1433 36 : data->allocated = true;
1434 36 : res = TS_YES;
1435 : }
1436 : }
1437 :
1438 284022 : return res;
1439 : }
1440 :
1441 : /*
1442 : * Compute output position list for a tsquery operator in phrase mode.
1443 : *
1444 : * Merge the position lists in Ldata and Rdata as specified by "emit",
1445 : * returning the result list into *data. The input position lists must be
1446 : * sorted and unique, and the output will be as well.
1447 : *
1448 : * data: pointer to initially-all-zeroes output struct, or NULL
1449 : * Ldata, Rdata: input position lists
1450 : * emit: bitmask of TSPO_XXX flags
1451 : * Loffset: offset to be added to Ldata positions before comparing/outputting
1452 : * Roffset: offset to be added to Rdata positions before comparing/outputting
1453 : * max_npos: maximum possible required size of output position array
1454 : *
1455 : * Loffset and Roffset should not be negative, else we risk trying to output
1456 : * negative positions, which won't fit into WordEntryPos.
1457 : *
1458 : * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1459 : * we return it as TSTernaryValue.
1460 : *
1461 : * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1462 : * returns TS_YES if any positions would have been emitted.
1463 : */
1464 : #define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1465 : #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1466 : #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1467 :
1468 : static TSTernaryValue
1469 29942 : TS_phrase_output(ExecPhraseData *data,
1470 : ExecPhraseData *Ldata,
1471 : ExecPhraseData *Rdata,
1472 : int emit,
1473 : int Loffset,
1474 : int Roffset,
1475 : int max_npos)
1476 : {
1477 : int Lindex,
1478 : Rindex;
1479 :
1480 : /* Loop until both inputs are exhausted */
1481 29942 : Lindex = Rindex = 0;
1482 30920 : while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1483 : {
1484 : int Lpos,
1485 : Rpos;
1486 2304 : int output_pos = 0;
1487 :
1488 : /*
1489 : * Fetch current values to compare. WEP_GETPOS() is needed because
1490 : * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1491 : */
1492 2304 : if (Lindex < Ldata->npos)
1493 1662 : Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1494 : else
1495 : {
1496 : /* L array exhausted, so we're done if R_ONLY isn't set */
1497 642 : if (!(emit & TSPO_R_ONLY))
1498 156 : break;
1499 486 : Lpos = INT_MAX;
1500 : }
1501 2148 : if (Rindex < Rdata->npos)
1502 1890 : Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1503 : else
1504 : {
1505 : /* R array exhausted, so we're done if L_ONLY isn't set */
1506 258 : if (!(emit & TSPO_L_ONLY))
1507 174 : break;
1508 84 : Rpos = INT_MAX;
1509 : }
1510 :
1511 : /* Merge-join the two input lists */
1512 1974 : if (Lpos < Rpos)
1513 : {
1514 : /* Lpos is not matched in Rdata, should we output it? */
1515 480 : if (emit & TSPO_L_ONLY)
1516 114 : output_pos = Lpos;
1517 480 : Lindex++;
1518 : }
1519 1494 : else if (Lpos == Rpos)
1520 : {
1521 : /* Lpos and Rpos match ... should we output it? */
1522 744 : if (emit & TSPO_BOTH)
1523 648 : output_pos = Rpos;
1524 744 : Lindex++;
1525 744 : Rindex++;
1526 : }
1527 : else /* Lpos > Rpos */
1528 : {
1529 : /* Rpos is not matched in Ldata, should we output it? */
1530 750 : if (emit & TSPO_R_ONLY)
1531 528 : output_pos = Rpos;
1532 750 : Rindex++;
1533 : }
1534 :
1535 1974 : if (output_pos > 0)
1536 : {
1537 1290 : if (data)
1538 : {
1539 : /* Store position, first allocating output array if needed */
1540 294 : if (data->pos == NULL)
1541 : {
1542 246 : data->pos = (WordEntryPos *)
1543 246 : palloc(max_npos * sizeof(WordEntryPos));
1544 246 : data->allocated = true;
1545 : }
1546 294 : data->pos[data->npos++] = output_pos;
1547 : }
1548 : else
1549 : {
1550 : /*
1551 : * Exact positions not needed, so return TS_YES as soon as we
1552 : * know there is at least one.
1553 : */
1554 996 : return TS_YES;
1555 : }
1556 : }
1557 : }
1558 :
1559 28946 : if (data && data->npos > 0)
1560 : {
1561 : /* Let's assert we didn't overrun the array */
1562 : Assert(data->npos <= max_npos);
1563 246 : return TS_YES;
1564 : }
1565 28700 : return TS_NO;
1566 : }
1567 :
1568 : /*
1569 : * Execute tsquery at or below an OP_PHRASE operator.
1570 : *
1571 : * This handles tsquery execution at recursion levels where we need to care
1572 : * about match locations.
1573 : *
1574 : * In addition to the same arguments used for TS_execute, the caller may pass
1575 : * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1576 : * match position info on success. data == NULL if no position data need be
1577 : * returned. (In practice, outside callers pass NULL, and only the internal
1578 : * recursion cases pass a data pointer.)
1579 : * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1580 : * This is OK because an outside call always starts from an OP_PHRASE node.
1581 : *
1582 : * The detailed semantics of the match data, given that the function returned
1583 : * TS_YES (successful match), are:
1584 : *
1585 : * npos > 0, negate = false:
1586 : * query is matched at specified position(s) (and only those positions)
1587 : * npos > 0, negate = true:
1588 : * query is matched at all positions *except* specified position(s)
1589 : * npos = 0, negate = true:
1590 : * query is matched at all positions
1591 : * npos = 0, negate = false:
1592 : * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1593 : *
1594 : * Successful matches also return a "width" value which is the match width in
1595 : * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1596 : * and is the sum of the phrase operator distances for phrase matches. Note
1597 : * that when width > 0, the listed positions represent the ends of matches not
1598 : * the starts. (This unintuitive rule is needed to avoid possibly generating
1599 : * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1600 : *
1601 : * If the TSExecuteCallback function reports that an operand is present
1602 : * but fails to provide position(s) for it, we will return TS_MAYBE when
1603 : * it is possible but not certain that the query is matched.
1604 : *
1605 : * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1606 : * negate = false (which is the state initialized by the caller); but the
1607 : * "width" output in such cases is undefined.
1608 : */
1609 : static TSTernaryValue
1610 698952 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1611 : TSExecuteCallback chkcond,
1612 : ExecPhraseData *data)
1613 : {
1614 : ExecPhraseData Ldata,
1615 : Rdata;
1616 : TSTernaryValue lmatch,
1617 : rmatch;
1618 : int Loffset,
1619 : Roffset,
1620 : maxwidth;
1621 :
1622 : /* since this function recurses, it could be driven to stack overflow */
1623 698952 : check_stack_depth();
1624 :
1625 698952 : if (curitem->type == QI_VAL)
1626 343770 : return chkcond(arg, (QueryOperand *) curitem, data);
1627 :
1628 355182 : switch (curitem->qoperator.oper)
1629 : {
1630 120474 : case OP_NOT:
1631 :
1632 : /*
1633 : * We need not touch data->width, since a NOT operation does not
1634 : * change the match width.
1635 : */
1636 120474 : if (flags & TS_EXEC_SKIP_NOT)
1637 : {
1638 : /* with SKIP_NOT, report NOT as "match everywhere" */
1639 : Assert(data->npos == 0 && !data->negate);
1640 0 : data->negate = true;
1641 0 : return TS_YES;
1642 : }
1643 120474 : switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1644 : {
1645 105334 : case TS_NO:
1646 : /* change "match nowhere" to "match everywhere" */
1647 : Assert(data->npos == 0 && !data->negate);
1648 105334 : data->negate = true;
1649 105334 : return TS_YES;
1650 390 : case TS_YES:
1651 390 : if (data->npos > 0)
1652 : {
1653 : /* we have some positions, invert negate flag */
1654 384 : data->negate = !data->negate;
1655 384 : return TS_YES;
1656 : }
1657 6 : else if (data->negate)
1658 : {
1659 : /* change "match everywhere" to "match nowhere" */
1660 6 : data->negate = false;
1661 6 : return TS_NO;
1662 : }
1663 : /* Should not get here if result was TS_YES */
1664 : Assert(false);
1665 0 : break;
1666 14750 : case TS_MAYBE:
1667 : /* match positions are, and remain, uncertain */
1668 14750 : return TS_MAYBE;
1669 : }
1670 0 : break;
1671 :
1672 234552 : case OP_PHRASE:
1673 : case OP_AND:
1674 234552 : memset(&Ldata, 0, sizeof(Ldata));
1675 234552 : memset(&Rdata, 0, sizeof(Rdata));
1676 :
1677 234552 : lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1678 : arg, flags, chkcond, &Ldata);
1679 234552 : if (lmatch == TS_NO)
1680 125220 : return TS_NO;
1681 :
1682 109332 : rmatch = TS_phrase_execute(curitem + 1,
1683 : arg, flags, chkcond, &Rdata);
1684 109332 : if (rmatch == TS_NO)
1685 53844 : return TS_NO;
1686 :
1687 : /*
1688 : * If either operand has no position information, then we can't
1689 : * return reliable position data, only a MAYBE result.
1690 : */
1691 55488 : if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1692 25690 : return TS_MAYBE;
1693 :
1694 29798 : if (curitem->qoperator.oper == OP_PHRASE)
1695 : {
1696 : /*
1697 : * Compute Loffset and Roffset suitable for phrase match, and
1698 : * compute overall width of whole phrase match.
1699 : */
1700 29792 : Loffset = curitem->qoperator.distance + Rdata.width;
1701 29792 : Roffset = 0;
1702 29792 : if (data)
1703 126 : data->width = curitem->qoperator.distance +
1704 126 : Ldata.width + Rdata.width;
1705 : }
1706 : else
1707 : {
1708 : /*
1709 : * For OP_AND, set output width and alignment like OP_OR (see
1710 : * comment below)
1711 : */
1712 6 : maxwidth = Max(Ldata.width, Rdata.width);
1713 6 : Loffset = maxwidth - Ldata.width;
1714 6 : Roffset = maxwidth - Rdata.width;
1715 6 : if (data)
1716 6 : data->width = maxwidth;
1717 : }
1718 :
1719 29798 : if (Ldata.negate && Rdata.negate)
1720 : {
1721 : /* !L & !R: treat as !(L | R) */
1722 28436 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1723 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1724 : Loffset, Roffset,
1725 28436 : Ldata.npos + Rdata.npos);
1726 28436 : if (data)
1727 0 : data->negate = true;
1728 28436 : return TS_YES;
1729 : }
1730 1362 : else if (Ldata.negate)
1731 : {
1732 : /* !L & R */
1733 450 : return TS_phrase_output(data, &Ldata, &Rdata,
1734 : TSPO_R_ONLY,
1735 : Loffset, Roffset,
1736 : Rdata.npos);
1737 : }
1738 912 : else if (Rdata.negate)
1739 : {
1740 : /* L & !R */
1741 6 : return TS_phrase_output(data, &Ldata, &Rdata,
1742 : TSPO_L_ONLY,
1743 : Loffset, Roffset,
1744 : Ldata.npos);
1745 : }
1746 : else
1747 : {
1748 : /* straight AND */
1749 906 : return TS_phrase_output(data, &Ldata, &Rdata,
1750 : TSPO_BOTH,
1751 : Loffset, Roffset,
1752 906 : Min(Ldata.npos, Rdata.npos));
1753 : }
1754 :
1755 156 : case OP_OR:
1756 156 : memset(&Ldata, 0, sizeof(Ldata));
1757 156 : memset(&Rdata, 0, sizeof(Rdata));
1758 :
1759 156 : lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1760 : arg, flags, chkcond, &Ldata);
1761 156 : rmatch = TS_phrase_execute(curitem + 1,
1762 : arg, flags, chkcond, &Rdata);
1763 :
1764 156 : if (lmatch == TS_NO && rmatch == TS_NO)
1765 12 : return TS_NO;
1766 :
1767 : /*
1768 : * If either operand has no position information, then we can't
1769 : * return reliable position data, only a MAYBE result.
1770 : */
1771 144 : if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1772 0 : return TS_MAYBE;
1773 :
1774 : /*
1775 : * Cope with undefined output width from failed submatch. (This
1776 : * takes less code than trying to ensure that all failure returns
1777 : * set data->width to zero.)
1778 : */
1779 144 : if (lmatch == TS_NO)
1780 18 : Ldata.width = 0;
1781 144 : if (rmatch == TS_NO)
1782 84 : Rdata.width = 0;
1783 :
1784 : /*
1785 : * For OP_AND and OP_OR, report the width of the wider of the two
1786 : * inputs, and align the narrower input's positions to the right
1787 : * end of that width. This rule deals at least somewhat
1788 : * reasonably with cases like "x <-> (y | z <-> q)".
1789 : */
1790 144 : maxwidth = Max(Ldata.width, Rdata.width);
1791 144 : Loffset = maxwidth - Ldata.width;
1792 144 : Roffset = maxwidth - Rdata.width;
1793 144 : data->width = maxwidth;
1794 :
1795 144 : if (Ldata.negate && Rdata.negate)
1796 : {
1797 : /* !L | !R: treat as !(L & R) */
1798 6 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1799 : TSPO_BOTH,
1800 : Loffset, Roffset,
1801 6 : Min(Ldata.npos, Rdata.npos));
1802 6 : data->negate = true;
1803 6 : return TS_YES;
1804 : }
1805 138 : else if (Ldata.negate)
1806 : {
1807 : /* !L | R: treat as !(L & !R) */
1808 30 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1809 : TSPO_L_ONLY,
1810 : Loffset, Roffset,
1811 : Ldata.npos);
1812 30 : data->negate = true;
1813 30 : return TS_YES;
1814 : }
1815 108 : else if (Rdata.negate)
1816 : {
1817 : /* L | !R: treat as !(!L & R) */
1818 6 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1819 : TSPO_R_ONLY,
1820 : Loffset, Roffset,
1821 : Rdata.npos);
1822 6 : data->negate = true;
1823 6 : return TS_YES;
1824 : }
1825 : else
1826 : {
1827 : /* straight OR */
1828 102 : return TS_phrase_output(data, &Ldata, &Rdata,
1829 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1830 : Loffset, Roffset,
1831 102 : Ldata.npos + Rdata.npos);
1832 : }
1833 :
1834 0 : default:
1835 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1836 : }
1837 :
1838 : /* not reachable, but keep compiler quiet */
1839 0 : return TS_NO;
1840 : }
1841 :
1842 :
1843 : /*
1844 : * Evaluate tsquery boolean expression.
1845 : *
1846 : * curitem: current tsquery item (initially, the first one)
1847 : * arg: opaque value to pass through to callback function
1848 : * flags: bitmask of flag bits shown in ts_utils.h
1849 : * chkcond: callback function to check whether a primitive value is present
1850 : */
1851 : bool
1852 519392 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1853 : TSExecuteCallback chkcond)
1854 : {
1855 : /*
1856 : * If we get TS_MAYBE from the recursion, return true. We could only see
1857 : * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1858 : * need to check again.
1859 : */
1860 519392 : return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1861 : }
1862 :
1863 : /*
1864 : * Evaluate tsquery boolean expression.
1865 : *
1866 : * This is the same as TS_execute except that TS_MAYBE is returned as-is.
1867 : */
1868 : TSTernaryValue
1869 36942 : TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1870 : TSExecuteCallback chkcond)
1871 : {
1872 36942 : return TS_execute_recurse(curitem, arg, flags, chkcond);
1873 : }
1874 :
1875 : /*
1876 : * TS_execute recursion for operators above any phrase operator. Here we do
1877 : * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
1878 : * operator, we pass it off to TS_phrase_execute which does worry.
1879 : */
1880 : static TSTernaryValue
1881 1054460 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1882 : TSExecuteCallback chkcond)
1883 : {
1884 : TSTernaryValue lmatch;
1885 :
1886 : /* since this function recurses, it could be driven to stack overflow */
1887 1054460 : check_stack_depth();
1888 :
1889 : /* ... and let's check for query cancel while we're at it */
1890 1054460 : CHECK_FOR_INTERRUPTS();
1891 :
1892 1054460 : if (curitem->type == QI_VAL)
1893 424068 : return chkcond(arg, (QueryOperand *) curitem,
1894 : NULL /* don't need position info */ );
1895 :
1896 630392 : switch (curitem->qoperator.oper)
1897 : {
1898 203226 : case OP_NOT:
1899 203226 : if (flags & TS_EXEC_SKIP_NOT)
1900 0 : return TS_YES;
1901 203226 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1902 : {
1903 191760 : case TS_NO:
1904 191760 : return TS_YES;
1905 4890 : case TS_YES:
1906 4890 : return TS_NO;
1907 6576 : case TS_MAYBE:
1908 6576 : return TS_MAYBE;
1909 : }
1910 0 : break;
1911 :
1912 84032 : case OP_AND:
1913 84032 : lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1914 : flags, chkcond);
1915 84032 : if (lmatch == TS_NO)
1916 66748 : return TS_NO;
1917 17284 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1918 : {
1919 10232 : case TS_NO:
1920 10232 : return TS_NO;
1921 3300 : case TS_YES:
1922 3300 : return lmatch;
1923 3752 : case TS_MAYBE:
1924 3752 : return TS_MAYBE;
1925 : }
1926 0 : break;
1927 :
1928 108852 : case OP_OR:
1929 108852 : lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1930 : flags, chkcond);
1931 108852 : if (lmatch == TS_YES)
1932 24120 : return TS_YES;
1933 84732 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1934 : {
1935 57508 : case TS_NO:
1936 57508 : return lmatch;
1937 7392 : case TS_YES:
1938 7392 : return TS_YES;
1939 19832 : case TS_MAYBE:
1940 19832 : return TS_MAYBE;
1941 : }
1942 0 : break;
1943 :
1944 234282 : case OP_PHRASE:
1945 :
1946 : /*
1947 : * If we get a MAYBE result, and the caller doesn't want that,
1948 : * convert it to NO. It would be more consistent, perhaps, to
1949 : * return the result of TS_phrase_execute() verbatim and then
1950 : * convert MAYBE results at the top of the recursion. But
1951 : * converting at the topmost phrase operator gives results that
1952 : * are bug-compatible with the old implementation, so do it like
1953 : * this for now.
1954 : */
1955 234282 : switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1956 : {
1957 179280 : case TS_NO:
1958 179280 : return TS_NO;
1959 29318 : case TS_YES:
1960 29318 : return TS_YES;
1961 25684 : case TS_MAYBE:
1962 25684 : return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1963 : }
1964 0 : break;
1965 :
1966 0 : default:
1967 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1968 : }
1969 :
1970 : /* not reachable, but keep compiler quiet */
1971 0 : return TS_NO;
1972 : }
1973 :
1974 : /*
1975 : * Detect whether a tsquery boolean expression requires any positive matches
1976 : * to values shown in the tsquery.
1977 : *
1978 : * This is needed to know whether a GIN index search requires full index scan.
1979 : * For example, 'x & !y' requires a match of x, so it's sufficient to scan
1980 : * entries for x; but 'x | !y' could match rows containing neither x nor y.
1981 : */
1982 : bool
1983 834 : tsquery_requires_match(QueryItem *curitem)
1984 : {
1985 : /* since this function recurses, it could be driven to stack overflow */
1986 834 : check_stack_depth();
1987 :
1988 834 : if (curitem->type == QI_VAL)
1989 396 : return true;
1990 :
1991 438 : switch (curitem->qoperator.oper)
1992 : {
1993 168 : case OP_NOT:
1994 :
1995 : /*
1996 : * Assume there are no required matches underneath a NOT. For
1997 : * some cases with nested NOTs, we could prove there's a required
1998 : * match, but it seems unlikely to be worth the trouble.
1999 : */
2000 168 : return false;
2001 :
2002 204 : case OP_PHRASE:
2003 :
2004 : /*
2005 : * Treat OP_PHRASE as OP_AND here
2006 : */
2007 : case OP_AND:
2008 : /* If either side requires a match, we're good */
2009 204 : if (tsquery_requires_match(curitem + curitem->qoperator.left))
2010 156 : return true;
2011 : else
2012 48 : return tsquery_requires_match(curitem + 1);
2013 :
2014 66 : case OP_OR:
2015 : /* Both sides must require a match */
2016 66 : if (tsquery_requires_match(curitem + curitem->qoperator.left))
2017 66 : return tsquery_requires_match(curitem + 1);
2018 : else
2019 0 : return false;
2020 :
2021 0 : default:
2022 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2023 : }
2024 :
2025 : /* not reachable, but keep compiler quiet */
2026 : return false;
2027 : }
2028 :
2029 : /*
2030 : * boolean operations
2031 : */
2032 : Datum
2033 60 : ts_match_qv(PG_FUNCTION_ARGS)
2034 : {
2035 60 : PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
2036 : PG_GETARG_DATUM(1),
2037 : PG_GETARG_DATUM(0)));
2038 : }
2039 :
2040 : Datum
2041 220080 : ts_match_vq(PG_FUNCTION_ARGS)
2042 : {
2043 220080 : TSVector val = PG_GETARG_TSVECTOR(0);
2044 220080 : TSQuery query = PG_GETARG_TSQUERY(1);
2045 : CHKVAL chkval;
2046 : bool result;
2047 :
2048 : /* empty query matches nothing */
2049 220080 : if (!query->size)
2050 : {
2051 0 : PG_FREE_IF_COPY(val, 0);
2052 0 : PG_FREE_IF_COPY(query, 1);
2053 0 : PG_RETURN_BOOL(false);
2054 : }
2055 :
2056 220080 : chkval.arrb = ARRPTR(val);
2057 220080 : chkval.arre = chkval.arrb + val->size;
2058 220080 : chkval.values = STRPTR(val);
2059 220080 : chkval.operand = GETOPERAND(query);
2060 220080 : result = TS_execute(GETQUERY(query),
2061 : &chkval,
2062 : TS_EXEC_EMPTY,
2063 : checkcondition_str);
2064 :
2065 220080 : PG_FREE_IF_COPY(val, 0);
2066 220080 : PG_FREE_IF_COPY(query, 1);
2067 220080 : PG_RETURN_BOOL(result);
2068 : }
2069 :
2070 : Datum
2071 0 : ts_match_tt(PG_FUNCTION_ARGS)
2072 : {
2073 : TSVector vector;
2074 : TSQuery query;
2075 : bool res;
2076 :
2077 0 : vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2078 : PG_GETARG_DATUM(0)));
2079 0 : query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
2080 : PG_GETARG_DATUM(1)));
2081 :
2082 0 : res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2083 : TSVectorGetDatum(vector),
2084 : TSQueryGetDatum(query)));
2085 :
2086 0 : pfree(vector);
2087 0 : pfree(query);
2088 :
2089 0 : PG_RETURN_BOOL(res);
2090 : }
2091 :
2092 : Datum
2093 0 : ts_match_tq(PG_FUNCTION_ARGS)
2094 : {
2095 : TSVector vector;
2096 0 : TSQuery query = PG_GETARG_TSQUERY(1);
2097 : bool res;
2098 :
2099 0 : vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2100 : PG_GETARG_DATUM(0)));
2101 :
2102 0 : res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2103 : TSVectorGetDatum(vector),
2104 : TSQueryGetDatum(query)));
2105 :
2106 0 : pfree(vector);
2107 0 : PG_FREE_IF_COPY(query, 1);
2108 :
2109 0 : PG_RETURN_BOOL(res);
2110 : }
2111 :
2112 : /*
2113 : * ts_stat statistic function support
2114 : */
2115 :
2116 :
2117 : /*
2118 : * Returns the number of positions in value 'wptr' within tsvector 'txt',
2119 : * that have a weight equal to one of the weights in 'weight' bitmask.
2120 : */
2121 : static int
2122 8178 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
2123 : {
2124 8178 : int len = POSDATALEN(txt, wptr);
2125 8178 : int num = 0;
2126 8178 : WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2127 :
2128 16650 : while (len--)
2129 : {
2130 8472 : if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2131 12 : num++;
2132 8472 : ptr++;
2133 : }
2134 8178 : return num;
2135 : }
2136 :
2137 : #define compareStatWord(a,e,t) \
2138 : tsCompareString((a)->lexeme, (a)->lenlexeme, \
2139 : STRPTR(t) + (e)->pos, (e)->len, \
2140 : false)
2141 :
2142 : static void
2143 345624 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
2144 : {
2145 345624 : WordEntry *we = ARRPTR(txt) + off;
2146 345624 : StatEntry *node = stat->root,
2147 345624 : *pnode = NULL;
2148 : int n,
2149 345624 : res = 0;
2150 345624 : uint32 depth = 1;
2151 :
2152 345624 : if (stat->weight == 0)
2153 172812 : n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2154 : else
2155 172812 : n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2156 :
2157 345624 : if (n == 0)
2158 172806 : return; /* nothing to insert */
2159 :
2160 1745442 : while (node)
2161 : {
2162 1738578 : res = compareStatWord(node, we, txt);
2163 :
2164 1738578 : if (res == 0)
2165 : {
2166 165954 : break;
2167 : }
2168 : else
2169 : {
2170 1572624 : pnode = node;
2171 1572624 : node = (res < 0) ? node->left : node->right;
2172 : }
2173 1572624 : depth++;
2174 : }
2175 :
2176 172818 : if (depth > stat->maxdepth)
2177 126 : stat->maxdepth = depth;
2178 :
2179 172818 : if (node == NULL)
2180 : {
2181 6864 : node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2182 6864 : node->left = node->right = NULL;
2183 6864 : node->ndoc = 1;
2184 6864 : node->nentry = n;
2185 6864 : node->lenlexeme = we->len;
2186 6864 : memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2187 :
2188 6864 : if (pnode == NULL)
2189 : {
2190 12 : stat->root = node;
2191 : }
2192 : else
2193 : {
2194 6852 : if (res < 0)
2195 3372 : pnode->left = node;
2196 : else
2197 3480 : pnode->right = node;
2198 : }
2199 : }
2200 : else
2201 : {
2202 165954 : node->ndoc++;
2203 165954 : node->nentry += n;
2204 : }
2205 : }
2206 :
2207 : static void
2208 495384 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
2209 : uint32 low, uint32 high, uint32 offset)
2210 : {
2211 : uint32 pos;
2212 495384 : uint32 middle = (low + high) >> 1;
2213 :
2214 495384 : pos = (low + middle) >> 1;
2215 495384 : if (low != middle && pos >= offset && pos - offset < txt->size)
2216 170328 : insertStatEntry(persistentContext, stat, txt, pos - offset);
2217 495384 : pos = (high + middle + 1) >> 1;
2218 495384 : if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2219 169284 : insertStatEntry(persistentContext, stat, txt, pos - offset);
2220 :
2221 495384 : if (low != middle)
2222 247692 : chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2223 495384 : if (high != middle + 1)
2224 241680 : chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2225 495384 : }
2226 :
2227 : /*
2228 : * This is written like a custom aggregate function, because the
2229 : * original plan was to do just that. Unfortunately, an aggregate function
2230 : * can't return a set, so that plan was abandoned. If that limitation is
2231 : * lifted in the future, ts_stat could be a real aggregate function so that
2232 : * you could use it like this:
2233 : *
2234 : * SELECT ts_stat(vector_column) FROM vector_table;
2235 : *
2236 : * where vector_column is a tsvector-type column in vector_table.
2237 : */
2238 :
2239 : static TSVectorStat *
2240 6108 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2241 : {
2242 6108 : TSVector txt = DatumGetTSVector(data);
2243 : uint32 i,
2244 6108 : nbit = 0,
2245 : offset;
2246 :
2247 6108 : if (stat == NULL)
2248 : { /* Init in first */
2249 0 : stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2250 0 : stat->maxdepth = 1;
2251 : }
2252 :
2253 : /* simple check of correctness */
2254 6108 : if (txt == NULL || txt->size == 0)
2255 : {
2256 96 : if (txt && txt != (TSVector) DatumGetPointer(data))
2257 96 : pfree(txt);
2258 96 : return stat;
2259 : }
2260 :
2261 6012 : i = txt->size - 1;
2262 42720 : for (; i > 0; i >>= 1)
2263 36708 : nbit++;
2264 :
2265 6012 : nbit = 1 << nbit;
2266 6012 : offset = (nbit - txt->size) / 2;
2267 :
2268 6012 : insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2269 6012 : chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2270 :
2271 6012 : return stat;
2272 : }
2273 :
2274 : static void
2275 12 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
2276 : TSVectorStat *stat)
2277 : {
2278 : TupleDesc tupdesc;
2279 : MemoryContext oldcontext;
2280 : StatEntry *node;
2281 :
2282 12 : funcctx->user_fctx = (void *) stat;
2283 :
2284 12 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2285 :
2286 12 : stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
2287 12 : stat->stackpos = 0;
2288 :
2289 12 : node = stat->root;
2290 : /* find leftmost value */
2291 12 : if (node == NULL)
2292 0 : stat->stack[stat->stackpos] = NULL;
2293 : else
2294 : for (;;)
2295 : {
2296 48 : stat->stack[stat->stackpos] = node;
2297 48 : if (node->left)
2298 : {
2299 36 : stat->stackpos++;
2300 36 : node = node->left;
2301 : }
2302 : else
2303 12 : break;
2304 : }
2305 : Assert(stat->stackpos <= stat->maxdepth);
2306 :
2307 12 : tupdesc = CreateTemplateTupleDesc(3);
2308 12 : TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
2309 : TEXTOID, -1, 0);
2310 12 : TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
2311 : INT4OID, -1, 0);
2312 12 : TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
2313 : INT4OID, -1, 0);
2314 12 : funcctx->tuple_desc = BlessTupleDesc(tupdesc);
2315 12 : funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2316 :
2317 12 : MemoryContextSwitchTo(oldcontext);
2318 12 : }
2319 :
2320 : static StatEntry *
2321 13728 : walkStatEntryTree(TSVectorStat *stat)
2322 : {
2323 13728 : StatEntry *node = stat->stack[stat->stackpos];
2324 :
2325 13728 : if (node == NULL)
2326 0 : return NULL;
2327 :
2328 13728 : if (node->ndoc != 0)
2329 : {
2330 : /* return entry itself: we already was at left sublink */
2331 3384 : return node;
2332 : }
2333 10344 : else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2334 : {
2335 : /* go on right sublink */
2336 3480 : stat->stackpos++;
2337 3480 : node = node->right;
2338 :
2339 : /* find most-left value */
2340 : for (;;)
2341 : {
2342 6816 : stat->stack[stat->stackpos] = node;
2343 6816 : if (node->left)
2344 : {
2345 3336 : stat->stackpos++;
2346 3336 : node = node->left;
2347 : }
2348 : else
2349 3480 : break;
2350 : }
2351 3480 : Assert(stat->stackpos <= stat->maxdepth);
2352 : }
2353 : else
2354 : {
2355 : /* we already return all left subtree, itself and right subtree */
2356 6864 : if (stat->stackpos == 0)
2357 12 : return NULL;
2358 :
2359 6852 : stat->stackpos--;
2360 6852 : return walkStatEntryTree(stat);
2361 : }
2362 :
2363 3480 : return node;
2364 : }
2365 :
2366 : static Datum
2367 6876 : ts_process_call(FuncCallContext *funcctx)
2368 : {
2369 : TSVectorStat *st;
2370 : StatEntry *entry;
2371 :
2372 6876 : st = (TSVectorStat *) funcctx->user_fctx;
2373 :
2374 6876 : entry = walkStatEntryTree(st);
2375 :
2376 6876 : if (entry != NULL)
2377 : {
2378 : Datum result;
2379 : char *values[3];
2380 : char ndoc[16];
2381 : char nentry[16];
2382 : HeapTuple tuple;
2383 :
2384 6864 : values[0] = palloc(entry->lenlexeme + 1);
2385 6864 : memcpy(values[0], entry->lexeme, entry->lenlexeme);
2386 6864 : (values[0])[entry->lenlexeme] = '\0';
2387 6864 : sprintf(ndoc, "%d", entry->ndoc);
2388 6864 : values[1] = ndoc;
2389 6864 : sprintf(nentry, "%d", entry->nentry);
2390 6864 : values[2] = nentry;
2391 :
2392 6864 : tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2393 6864 : result = HeapTupleGetDatum(tuple);
2394 :
2395 6864 : pfree(values[0]);
2396 :
2397 : /* mark entry as already visited */
2398 6864 : entry->ndoc = 0;
2399 :
2400 6864 : return result;
2401 : }
2402 :
2403 12 : return (Datum) 0;
2404 : }
2405 :
2406 : static TSVectorStat *
2407 12 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2408 : {
2409 12 : char *query = text_to_cstring(txt);
2410 : TSVectorStat *stat;
2411 : bool isnull;
2412 : Portal portal;
2413 : SPIPlanPtr plan;
2414 :
2415 12 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2416 : /* internal error */
2417 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2418 :
2419 12 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2420 : /* internal error */
2421 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2422 :
2423 12 : SPI_cursor_fetch(portal, true, 100);
2424 :
2425 12 : if (SPI_tuptable == NULL ||
2426 12 : SPI_tuptable->tupdesc->natts != 1 ||
2427 12 : !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
2428 : TSVECTOROID))
2429 0 : ereport(ERROR,
2430 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2431 : errmsg("ts_stat query must return one tsvector column")));
2432 :
2433 12 : stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2434 12 : stat->maxdepth = 1;
2435 :
2436 12 : if (ws)
2437 : {
2438 : char *buf;
2439 :
2440 6 : buf = VARDATA_ANY(ws);
2441 18 : while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2442 : {
2443 12 : if (pg_mblen(buf) == 1)
2444 : {
2445 12 : switch (*buf)
2446 : {
2447 6 : case 'A':
2448 : case 'a':
2449 6 : stat->weight |= 1 << 3;
2450 6 : break;
2451 6 : case 'B':
2452 : case 'b':
2453 6 : stat->weight |= 1 << 2;
2454 6 : break;
2455 0 : case 'C':
2456 : case 'c':
2457 0 : stat->weight |= 1 << 1;
2458 0 : break;
2459 0 : case 'D':
2460 : case 'd':
2461 0 : stat->weight |= 1;
2462 0 : break;
2463 0 : default:
2464 0 : stat->weight |= 0;
2465 : }
2466 0 : }
2467 12 : buf += pg_mblen(buf);
2468 : }
2469 : }
2470 :
2471 84 : while (SPI_processed > 0)
2472 : {
2473 : uint64 i;
2474 :
2475 6180 : for (i = 0; i < SPI_processed; i++)
2476 : {
2477 6108 : Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
2478 :
2479 6108 : if (!isnull)
2480 6108 : stat = ts_accum(persistentContext, stat, data);
2481 : }
2482 :
2483 72 : SPI_freetuptable(SPI_tuptable);
2484 72 : SPI_cursor_fetch(portal, true, 100);
2485 : }
2486 :
2487 12 : SPI_freetuptable(SPI_tuptable);
2488 12 : SPI_cursor_close(portal);
2489 12 : SPI_freeplan(plan);
2490 12 : pfree(query);
2491 :
2492 12 : return stat;
2493 : }
2494 :
2495 : Datum
2496 6864 : ts_stat1(PG_FUNCTION_ARGS)
2497 : {
2498 : FuncCallContext *funcctx;
2499 : Datum result;
2500 :
2501 6864 : if (SRF_IS_FIRSTCALL())
2502 : {
2503 : TSVectorStat *stat;
2504 6 : text *txt = PG_GETARG_TEXT_PP(0);
2505 :
2506 6 : funcctx = SRF_FIRSTCALL_INIT();
2507 6 : SPI_connect();
2508 6 : stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2509 6 : PG_FREE_IF_COPY(txt, 0);
2510 6 : ts_setup_firstcall(fcinfo, funcctx, stat);
2511 6 : SPI_finish();
2512 : }
2513 :
2514 6864 : funcctx = SRF_PERCALL_SETUP();
2515 6864 : if ((result = ts_process_call(funcctx)) != (Datum) 0)
2516 6858 : SRF_RETURN_NEXT(funcctx, result);
2517 6 : SRF_RETURN_DONE(funcctx);
2518 : }
2519 :
2520 : Datum
2521 12 : ts_stat2(PG_FUNCTION_ARGS)
2522 : {
2523 : FuncCallContext *funcctx;
2524 : Datum result;
2525 :
2526 12 : if (SRF_IS_FIRSTCALL())
2527 : {
2528 : TSVectorStat *stat;
2529 6 : text *txt = PG_GETARG_TEXT_PP(0);
2530 6 : text *ws = PG_GETARG_TEXT_PP(1);
2531 :
2532 6 : funcctx = SRF_FIRSTCALL_INIT();
2533 6 : SPI_connect();
2534 6 : stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2535 6 : PG_FREE_IF_COPY(txt, 0);
2536 6 : PG_FREE_IF_COPY(ws, 1);
2537 6 : ts_setup_firstcall(fcinfo, funcctx, stat);
2538 6 : SPI_finish();
2539 : }
2540 :
2541 12 : funcctx = SRF_PERCALL_SETUP();
2542 12 : if ((result = ts_process_call(funcctx)) != (Datum) 0)
2543 6 : SRF_RETURN_NEXT(funcctx, result);
2544 6 : SRF_RETURN_DONE(funcctx);
2545 : }
2546 :
2547 :
2548 : /*
2549 : * Triggers for automatic update of a tsvector column from text column(s)
2550 : *
2551 : * Trigger arguments are either
2552 : * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2553 : * name of tsvector col, name of regconfig col, name(s) of text col(s)
2554 : * ie, tsconfig can either be specified by name, or indirectly as the
2555 : * contents of a regconfig field in the row. If the name is used, it must
2556 : * be explicitly schema-qualified.
2557 : */
2558 : Datum
2559 18 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
2560 : {
2561 18 : return tsvector_update_trigger(fcinfo, false);
2562 : }
2563 :
2564 : Datum
2565 0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
2566 : {
2567 0 : return tsvector_update_trigger(fcinfo, true);
2568 : }
2569 :
2570 : static Datum
2571 18 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
2572 : {
2573 : TriggerData *trigdata;
2574 : Trigger *trigger;
2575 : Relation rel;
2576 18 : HeapTuple rettuple = NULL;
2577 : int tsvector_attr_num,
2578 : i;
2579 : ParsedText prs;
2580 : Datum datum;
2581 : bool isnull;
2582 : text *txt;
2583 : Oid cfgId;
2584 : bool update_needed;
2585 :
2586 : /* Check call context */
2587 18 : if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2588 0 : elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2589 :
2590 18 : trigdata = (TriggerData *) fcinfo->context;
2591 18 : if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2592 0 : elog(ERROR, "tsvector_update_trigger: must be fired for row");
2593 18 : if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2594 0 : elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2595 :
2596 18 : if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2597 : {
2598 12 : rettuple = trigdata->tg_trigtuple;
2599 12 : update_needed = true;
2600 : }
2601 6 : else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2602 : {
2603 6 : rettuple = trigdata->tg_newtuple;
2604 6 : update_needed = false; /* computed below */
2605 : }
2606 : else
2607 0 : elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2608 :
2609 18 : trigger = trigdata->tg_trigger;
2610 18 : rel = trigdata->tg_relation;
2611 :
2612 18 : if (trigger->tgnargs < 3)
2613 0 : elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2614 :
2615 : /* Find the target tsvector column */
2616 18 : tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2617 18 : if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2618 0 : ereport(ERROR,
2619 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2620 : errmsg("tsvector column \"%s\" does not exist",
2621 : trigger->tgargs[0])));
2622 : /* This will effectively reject system columns, so no separate test: */
2623 18 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2624 : TSVECTOROID))
2625 0 : ereport(ERROR,
2626 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2627 : errmsg("column \"%s\" is not of tsvector type",
2628 : trigger->tgargs[0])));
2629 :
2630 : /* Find the configuration to use */
2631 18 : if (config_column)
2632 : {
2633 : int config_attr_num;
2634 :
2635 0 : config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2636 0 : if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2637 0 : ereport(ERROR,
2638 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2639 : errmsg("configuration column \"%s\" does not exist",
2640 : trigger->tgargs[1])));
2641 0 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2642 : REGCONFIGOID))
2643 0 : ereport(ERROR,
2644 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2645 : errmsg("column \"%s\" is not of regconfig type",
2646 : trigger->tgargs[1])));
2647 :
2648 0 : datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2649 0 : if (isnull)
2650 0 : ereport(ERROR,
2651 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2652 : errmsg("configuration column \"%s\" must not be null",
2653 : trigger->tgargs[1])));
2654 0 : cfgId = DatumGetObjectId(datum);
2655 : }
2656 : else
2657 : {
2658 : List *names;
2659 :
2660 18 : names = stringToQualifiedNameList(trigger->tgargs[1]);
2661 : /* require a schema so that results are not search path dependent */
2662 18 : if (list_length(names) < 2)
2663 0 : ereport(ERROR,
2664 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2665 : errmsg("text search configuration name \"%s\" must be schema-qualified",
2666 : trigger->tgargs[1])));
2667 18 : cfgId = get_ts_config_oid(names, false);
2668 : }
2669 :
2670 : /* initialize parse state */
2671 18 : prs.lenwords = 32;
2672 18 : prs.curwords = 0;
2673 18 : prs.pos = 0;
2674 18 : prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
2675 :
2676 : /* find all words in indexable column(s) */
2677 36 : for (i = 2; i < trigger->tgnargs; i++)
2678 : {
2679 : int numattr;
2680 :
2681 18 : numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2682 18 : if (numattr == SPI_ERROR_NOATTRIBUTE)
2683 0 : ereport(ERROR,
2684 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2685 : errmsg("column \"%s\" does not exist",
2686 : trigger->tgargs[i])));
2687 18 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2688 0 : ereport(ERROR,
2689 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2690 : errmsg("column \"%s\" is not of a character type",
2691 : trigger->tgargs[i])));
2692 :
2693 18 : if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
2694 6 : update_needed = true;
2695 :
2696 18 : datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2697 18 : if (isnull)
2698 6 : continue;
2699 :
2700 12 : txt = DatumGetTextPP(datum);
2701 :
2702 12 : parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2703 :
2704 12 : if (txt != (text *) DatumGetPointer(datum))
2705 0 : pfree(txt);
2706 : }
2707 :
2708 18 : if (update_needed)
2709 : {
2710 : /* make tsvector value */
2711 18 : datum = TSVectorGetDatum(make_tsvector(&prs));
2712 18 : isnull = false;
2713 :
2714 : /* and insert it into tuple */
2715 18 : rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2716 : 1, &tsvector_attr_num,
2717 : &datum, &isnull);
2718 :
2719 18 : pfree(DatumGetPointer(datum));
2720 : }
2721 :
2722 18 : return PointerGetDatum(rettuple);
2723 : }
|