Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsvector_op.c
4 : * operations over tsvector
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsvector_op.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include <limits.h>
17 :
18 : #include "access/htup_details.h"
19 : #include "catalog/namespace.h"
20 : #include "catalog/pg_type.h"
21 : #include "commands/trigger.h"
22 : #include "common/int.h"
23 : #include "executor/spi.h"
24 : #include "funcapi.h"
25 : #include "lib/qunique.h"
26 : #include "mb/pg_wchar.h"
27 : #include "miscadmin.h"
28 : #include "parser/parse_coerce.h"
29 : #include "tsearch/ts_utils.h"
30 : #include "utils/array.h"
31 : #include "utils/builtins.h"
32 : #include "utils/regproc.h"
33 : #include "utils/rel.h"
34 :
35 :
36 : typedef struct
37 : {
38 : WordEntry *arrb;
39 : WordEntry *arre;
40 : char *values;
41 : char *operand;
42 : } CHKVAL;
43 :
44 :
45 : typedef struct StatEntry
46 : {
47 : uint32 ndoc; /* zero indicates that we were already here
48 : * while walking through the tree */
49 : uint32 nentry;
50 : struct StatEntry *left;
51 : struct StatEntry *right;
52 : uint32 lenlexeme;
53 : char lexeme[FLEXIBLE_ARRAY_MEMBER];
54 : } StatEntry;
55 :
56 : #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
57 :
58 : typedef struct
59 : {
60 : int32 weight;
61 :
62 : uint32 maxdepth;
63 :
64 : StatEntry **stack;
65 : uint32 stackpos;
66 :
67 : StatEntry *root;
68 : } TSVectorStat;
69 :
70 :
71 : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
72 : uint32 flags,
73 : TSExecuteCallback chkcond);
74 : static bool TS_execute_locations_recurse(QueryItem *curitem,
75 : void *arg,
76 : TSExecuteCallback chkcond,
77 : List **locations);
78 : static int tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len);
79 : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
80 :
81 :
82 : /*
83 : * Order: haspos, len, word, for all positions (pos, weight)
84 : */
85 : static int
86 233 : silly_cmp_tsvector(const TSVectorData *a, const TSVectorData *b)
87 : {
88 233 : if (VARSIZE(a) < VARSIZE(b))
89 0 : return -1;
90 233 : else if (VARSIZE(a) > VARSIZE(b))
91 0 : return 1;
92 233 : else if (a->size < b->size)
93 0 : return -1;
94 233 : else if (a->size > b->size)
95 0 : return 1;
96 : else
97 : {
98 233 : const WordEntry *aptr = ARRPTR(a);
99 233 : const WordEntry *bptr = ARRPTR(b);
100 233 : int i = 0;
101 : int res;
102 :
103 :
104 268 : for (i = 0; i < a->size; i++)
105 : {
106 235 : if (aptr->haspos != bptr->haspos)
107 : {
108 0 : return (aptr->haspos > bptr->haspos) ? -1 : 1;
109 : }
110 235 : else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
111 : {
112 200 : return res;
113 : }
114 35 : else if (aptr->haspos)
115 : {
116 32 : WordEntryPos *ap = POSDATAPTR(a, aptr);
117 32 : WordEntryPos *bp = POSDATAPTR(b, bptr);
118 : int j;
119 :
120 32 : if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
121 0 : return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
122 :
123 64 : for (j = 0; j < POSDATALEN(a, aptr); j++)
124 : {
125 32 : if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
126 : {
127 0 : return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
128 : }
129 32 : else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
130 : {
131 0 : return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
132 : }
133 32 : ap++, bp++;
134 : }
135 : }
136 :
137 35 : aptr++;
138 35 : bptr++;
139 : }
140 : }
141 :
142 33 : return 0;
143 : }
144 :
145 : #define TSVECTORCMPFUNC( type, action, ret ) \
146 : Datum \
147 : tsvector_##type(PG_FUNCTION_ARGS) \
148 : { \
149 : TSVector a = PG_GETARG_TSVECTOR(0); \
150 : TSVector b = PG_GETARG_TSVECTOR(1); \
151 : int res = silly_cmp_tsvector(a, b); \
152 : PG_FREE_IF_COPY(a,0); \
153 : PG_FREE_IF_COPY(b,1); \
154 : PG_RETURN_##ret( res action 0 ); \
155 : } \
156 : /* keep compiler quiet - no extra ; */ \
157 : extern int no_such_variable
158 :
159 0 : TSVECTORCMPFUNC(lt, <, BOOL);
160 0 : TSVECTORCMPFUNC(le, <=, BOOL);
161 1 : TSVECTORCMPFUNC(eq, ==, BOOL);
162 0 : TSVECTORCMPFUNC(ge, >=, BOOL);
163 0 : TSVECTORCMPFUNC(gt, >, BOOL);
164 0 : TSVECTORCMPFUNC(ne, !=, BOOL);
165 232 : TSVECTORCMPFUNC(cmp, +, INT32);
166 :
167 : Datum
168 73 : tsvector_strip(PG_FUNCTION_ARGS)
169 : {
170 73 : TSVector in = PG_GETARG_TSVECTOR(0);
171 : TSVector out;
172 : int i,
173 73 : len = 0;
174 73 : WordEntry *arrin = ARRPTR(in),
175 : *arrout;
176 : char *cur;
177 :
178 261 : for (i = 0; i < in->size; i++)
179 188 : len += arrin[i].len;
180 :
181 73 : len = CALCDATASIZE(in->size, len);
182 73 : out = (TSVector) palloc0(len);
183 73 : SET_VARSIZE(out, len);
184 73 : out->size = in->size;
185 73 : arrout = ARRPTR(out);
186 73 : cur = STRPTR(out);
187 261 : for (i = 0; i < in->size; i++)
188 : {
189 188 : memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
190 188 : arrout[i].haspos = 0;
191 188 : arrout[i].len = arrin[i].len;
192 188 : arrout[i].pos = cur - STRPTR(out);
193 188 : cur += arrout[i].len;
194 : }
195 :
196 73 : PG_FREE_IF_COPY(in, 0);
197 73 : PG_RETURN_POINTER(out);
198 : }
199 :
200 : Datum
201 7 : tsvector_length(PG_FUNCTION_ARGS)
202 : {
203 7 : TSVector in = PG_GETARG_TSVECTOR(0);
204 7 : int32 ret = in->size;
205 :
206 7 : PG_FREE_IF_COPY(in, 0);
207 7 : PG_RETURN_INT32(ret);
208 : }
209 :
210 : Datum
211 10 : tsvector_setweight(PG_FUNCTION_ARGS)
212 : {
213 10 : TSVector in = PG_GETARG_TSVECTOR(0);
214 10 : char cw = PG_GETARG_CHAR(1);
215 : TSVector out;
216 : int i,
217 : j;
218 : WordEntry *entry;
219 : WordEntryPos *p;
220 10 : int w = 0;
221 :
222 10 : switch (cw)
223 : {
224 0 : case 'A':
225 : case 'a':
226 0 : w = 3;
227 0 : break;
228 0 : case 'B':
229 : case 'b':
230 0 : w = 2;
231 0 : break;
232 10 : case 'C':
233 : case 'c':
234 10 : w = 1;
235 10 : break;
236 0 : case 'D':
237 : case 'd':
238 0 : w = 0;
239 0 : break;
240 0 : default:
241 : /* internal error */
242 0 : elog(ERROR, "unrecognized weight: %d", cw);
243 : }
244 :
245 10 : out = (TSVector) palloc(VARSIZE(in));
246 10 : memcpy(out, in, VARSIZE(in));
247 10 : entry = ARRPTR(out);
248 10 : i = out->size;
249 50 : while (i--)
250 : {
251 40 : if ((j = POSDATALEN(out, entry)) != 0)
252 : {
253 40 : p = POSDATAPTR(out, entry);
254 140 : while (j--)
255 : {
256 100 : WEP_SETWEIGHT(*p, w);
257 100 : p++;
258 : }
259 : }
260 40 : entry++;
261 : }
262 :
263 10 : PG_FREE_IF_COPY(in, 0);
264 10 : PG_RETURN_POINTER(out);
265 : }
266 :
267 : /*
268 : * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
269 : *
270 : * Assign weight w to elements of tsin that are listed in lexemes.
271 : */
272 : Datum
273 20 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
274 : {
275 20 : TSVector tsin = PG_GETARG_TSVECTOR(0);
276 20 : char char_weight = PG_GETARG_CHAR(1);
277 20 : ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
278 :
279 : TSVector tsout;
280 : int i,
281 : j,
282 : nlexemes,
283 : weight;
284 : WordEntry *entry;
285 : Datum *dlexemes;
286 : bool *nulls;
287 :
288 20 : switch (char_weight)
289 : {
290 0 : case 'A':
291 : case 'a':
292 0 : weight = 3;
293 0 : break;
294 0 : case 'B':
295 : case 'b':
296 0 : weight = 2;
297 0 : break;
298 20 : case 'C':
299 : case 'c':
300 20 : weight = 1;
301 20 : break;
302 0 : case 'D':
303 : case 'd':
304 0 : weight = 0;
305 0 : break;
306 0 : default:
307 : /* internal error */
308 0 : elog(ERROR, "unrecognized weight: %c", char_weight);
309 : }
310 :
311 20 : tsout = (TSVector) palloc(VARSIZE(tsin));
312 20 : memcpy(tsout, tsin, VARSIZE(tsin));
313 20 : entry = ARRPTR(tsout);
314 :
315 20 : deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes);
316 :
317 : /*
318 : * Assuming that lexemes array is significantly shorter than tsvector we
319 : * can iterate through lexemes performing binary search of each lexeme
320 : * from lexemes in tsvector.
321 : */
322 60 : for (i = 0; i < nlexemes; i++)
323 : {
324 : char *lex;
325 : int lex_len,
326 : lex_pos;
327 :
328 : /* Ignore null array elements, they surely don't match */
329 40 : if (nulls[i])
330 5 : continue;
331 :
332 35 : lex = VARDATA(DatumGetPointer(dlexemes[i]));
333 35 : lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
334 35 : lex_pos = tsvector_bsearch(tsout, lex, lex_len);
335 :
336 35 : if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
337 : {
338 20 : WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
339 :
340 65 : while (j--)
341 : {
342 45 : WEP_SETWEIGHT(*p, weight);
343 45 : p++;
344 : }
345 : }
346 : }
347 :
348 20 : PG_FREE_IF_COPY(tsin, 0);
349 20 : PG_FREE_IF_COPY(lexemes, 2);
350 :
351 20 : PG_RETURN_POINTER(tsout);
352 : }
353 :
354 : #define compareEntry(pa, a, pb, b) \
355 : tsCompareString((pa) + (a)->pos, (a)->len, \
356 : (pb) + (b)->pos, (b)->len, \
357 : false)
358 :
359 : /*
360 : * Add positions from src to dest after offsetting them by maxpos.
361 : * Return the number added (might be less than expected due to overflow)
362 : */
363 : static int32
364 10 : add_pos(TSVector src, WordEntry *srcptr,
365 : TSVector dest, WordEntry *destptr,
366 : int32 maxpos)
367 : {
368 10 : uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
369 : int i;
370 10 : uint16 slen = POSDATALEN(src, srcptr),
371 : startlen;
372 10 : WordEntryPos *spos = POSDATAPTR(src, srcptr),
373 10 : *dpos = POSDATAPTR(dest, destptr);
374 :
375 10 : if (!destptr->haspos)
376 0 : *clen = 0;
377 :
378 10 : startlen = *clen;
379 10 : for (i = 0;
380 20 : i < slen && *clen < MAXNUMPOS &&
381 10 : (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
382 10 : i++)
383 : {
384 10 : WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
385 10 : WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
386 10 : (*clen)++;
387 : }
388 :
389 10 : if (*clen != startlen)
390 10 : destptr->haspos = 1;
391 10 : return *clen - startlen;
392 : }
393 :
394 : /*
395 : * Perform binary search of given lexeme in TSVector.
396 : * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
397 : * found.
398 : */
399 : static int
400 165 : tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len)
401 : {
402 165 : const WordEntry *arrin = ARRPTR(tsv);
403 165 : int StopLow = 0,
404 165 : StopHigh = tsv->size,
405 : StopMiddle,
406 : cmp;
407 :
408 435 : while (StopLow < StopHigh)
409 : {
410 385 : StopMiddle = (StopLow + StopHigh) / 2;
411 :
412 385 : cmp = tsCompareString(lexeme, lexeme_len,
413 385 : STRPTR(tsv) + arrin[StopMiddle].pos,
414 385 : arrin[StopMiddle].len,
415 : false);
416 :
417 385 : if (cmp < 0)
418 180 : StopHigh = StopMiddle;
419 205 : else if (cmp > 0)
420 90 : StopLow = StopMiddle + 1;
421 : else /* found it */
422 115 : return StopMiddle;
423 : }
424 :
425 50 : return -1;
426 : }
427 :
428 : /*
429 : * qsort comparator functions
430 : */
431 :
432 : static int
433 65 : compare_int(const void *va, const void *vb)
434 : {
435 65 : int a = *((const int *) va);
436 65 : int b = *((const int *) vb);
437 :
438 65 : return pg_cmp_s32(a, b);
439 : }
440 :
441 : static int
442 85 : compare_text_lexemes(const void *va, const void *vb)
443 : {
444 85 : Datum a = *((const Datum *) va);
445 85 : Datum b = *((const Datum *) vb);
446 85 : char *alex = VARDATA_ANY(DatumGetPointer(a));
447 85 : int alex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(a));
448 85 : char *blex = VARDATA_ANY(DatumGetPointer(b));
449 85 : int blex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(b));
450 :
451 85 : return tsCompareString(alex, alex_len, blex, blex_len, false);
452 : }
453 :
454 : /*
455 : * Internal routine to delete lexemes from TSVector by array of offsets.
456 : *
457 : * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
458 : * int indices_count -- size of that array
459 : *
460 : * Returns new TSVector without given lexemes along with their positions
461 : * and weights.
462 : */
463 : static TSVector
464 55 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
465 : int indices_count)
466 : {
467 : TSVector tsout;
468 55 : WordEntry *arrin = ARRPTR(tsv),
469 : *arrout;
470 55 : char *data = STRPTR(tsv),
471 : *dataout;
472 : int i, /* index in arrin */
473 : j, /* index in arrout */
474 : k, /* index in indices_to_delete */
475 : curoff; /* index in dataout area */
476 :
477 : /*
478 : * Sort the filter array to simplify membership checks below. Also, get
479 : * rid of any duplicate entries, so that we can assume that indices_count
480 : * is exactly equal to the number of lexemes that will be removed.
481 : */
482 55 : if (indices_count > 1)
483 : {
484 25 : qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
485 25 : indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
486 : compare_int);
487 : }
488 :
489 : /*
490 : * Here we overestimate tsout size, since we don't know how much space is
491 : * used by the deleted lexeme(s). We will set exact size below.
492 : */
493 55 : tsout = (TSVector) palloc0(VARSIZE(tsv));
494 :
495 : /* This count must be correct because STRPTR(tsout) relies on it. */
496 55 : tsout->size = tsv->size - indices_count;
497 :
498 : /*
499 : * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
500 : */
501 55 : arrout = ARRPTR(tsout);
502 55 : dataout = STRPTR(tsout);
503 55 : curoff = 0;
504 330 : for (i = j = k = 0; i < tsv->size; i++)
505 : {
506 : /*
507 : * If current i is present in indices_to_delete, skip this lexeme.
508 : * Since indices_to_delete is already sorted, we only need to check
509 : * the current (k'th) entry.
510 : */
511 275 : if (k < indices_count && i == indices_to_delete[k])
512 : {
513 80 : k++;
514 80 : continue;
515 : }
516 :
517 : /* Copy lexeme and its positions and weights */
518 195 : memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
519 195 : arrout[j].haspos = arrin[i].haspos;
520 195 : arrout[j].len = arrin[i].len;
521 195 : arrout[j].pos = curoff;
522 195 : curoff += arrin[i].len;
523 195 : if (arrin[i].haspos)
524 : {
525 130 : int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
526 130 : + sizeof(uint16);
527 :
528 130 : curoff = SHORTALIGN(curoff);
529 130 : memcpy(dataout + curoff,
530 130 : STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
531 : len);
532 130 : curoff += len;
533 : }
534 :
535 195 : j++;
536 : }
537 :
538 : /*
539 : * k should now be exactly equal to indices_count. If it isn't then the
540 : * caller provided us with indices outside of [0, tsv->size) range and
541 : * estimation of tsout's size is wrong.
542 : */
543 : Assert(k == indices_count);
544 :
545 55 : SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
546 55 : return tsout;
547 : }
548 :
549 : /*
550 : * Delete given lexeme from tsvector.
551 : * Implementation of user-level ts_delete(tsvector, text).
552 : */
553 : Datum
554 30 : tsvector_delete_str(PG_FUNCTION_ARGS)
555 : {
556 30 : TSVector tsin = PG_GETARG_TSVECTOR(0),
557 : tsout;
558 30 : text *tlexeme = PG_GETARG_TEXT_PP(1);
559 30 : char *lexeme = VARDATA_ANY(tlexeme);
560 30 : int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
561 : skip_index;
562 :
563 30 : if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
564 10 : PG_RETURN_POINTER(tsin);
565 :
566 20 : tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
567 :
568 20 : PG_FREE_IF_COPY(tsin, 0);
569 20 : PG_FREE_IF_COPY(tlexeme, 1);
570 20 : PG_RETURN_POINTER(tsout);
571 : }
572 :
573 : /*
574 : * Delete given array of lexemes from tsvector.
575 : * Implementation of user-level ts_delete(tsvector, text[]).
576 : */
577 : Datum
578 35 : tsvector_delete_arr(PG_FUNCTION_ARGS)
579 : {
580 35 : TSVector tsin = PG_GETARG_TSVECTOR(0),
581 : tsout;
582 35 : ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
583 : int i,
584 : nlex,
585 : skip_count,
586 : *skip_indices;
587 : Datum *dlexemes;
588 : bool *nulls;
589 :
590 35 : deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex);
591 :
592 : /*
593 : * In typical use case array of lexemes to delete is relatively small. So
594 : * here we optimize things for that scenario: iterate through lexarr
595 : * performing binary search of each lexeme from lexarr in tsvector.
596 : */
597 35 : skip_indices = palloc0(nlex * sizeof(int));
598 140 : for (i = skip_count = 0; i < nlex; i++)
599 : {
600 : char *lex;
601 : int lex_len,
602 : lex_pos;
603 :
604 : /* Ignore null array elements, they surely don't match */
605 105 : if (nulls[i])
606 5 : continue;
607 :
608 100 : lex = VARDATA(DatumGetPointer(dlexemes[i]));
609 100 : lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
610 100 : lex_pos = tsvector_bsearch(tsin, lex, lex_len);
611 :
612 100 : if (lex_pos >= 0)
613 65 : skip_indices[skip_count++] = lex_pos;
614 : }
615 :
616 35 : tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
617 :
618 35 : pfree(skip_indices);
619 35 : PG_FREE_IF_COPY(tsin, 0);
620 35 : PG_FREE_IF_COPY(lexemes, 1);
621 :
622 35 : PG_RETURN_POINTER(tsout);
623 : }
624 :
625 : /*
626 : * Expand tsvector as table with following columns:
627 : * lexeme: lexeme text
628 : * positions: integer array of lexeme positions
629 : * weights: char array of weights corresponding to positions
630 : */
631 : Datum
632 120 : tsvector_unnest(PG_FUNCTION_ARGS)
633 : {
634 : FuncCallContext *funcctx;
635 : TSVector tsin;
636 :
637 120 : if (SRF_IS_FIRSTCALL())
638 : {
639 : MemoryContext oldcontext;
640 : TupleDesc tupdesc;
641 :
642 20 : funcctx = SRF_FIRSTCALL_INIT();
643 20 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
644 :
645 20 : tupdesc = CreateTemplateTupleDesc(3);
646 20 : TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
647 : TEXTOID, -1, 0);
648 20 : TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
649 : INT2ARRAYOID, -1, 0);
650 20 : TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
651 : TEXTARRAYOID, -1, 0);
652 20 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
653 0 : elog(ERROR, "return type must be a row type");
654 20 : TupleDescFinalize(tupdesc);
655 20 : funcctx->tuple_desc = tupdesc;
656 :
657 20 : funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
658 :
659 20 : MemoryContextSwitchTo(oldcontext);
660 : }
661 :
662 120 : funcctx = SRF_PERCALL_SETUP();
663 120 : tsin = (TSVector) funcctx->user_fctx;
664 :
665 120 : if (funcctx->call_cntr < tsin->size)
666 : {
667 100 : WordEntry *arrin = ARRPTR(tsin);
668 100 : char *data = STRPTR(tsin);
669 : HeapTuple tuple;
670 : int j,
671 100 : i = funcctx->call_cntr;
672 100 : bool nulls[] = {false, false, false};
673 : Datum values[3];
674 :
675 100 : values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
676 :
677 100 : if (arrin[i].haspos)
678 : {
679 : WordEntryPosVector *posv;
680 : Datum *positions;
681 : Datum *weights;
682 : char weight;
683 :
684 : /*
685 : * Internally tsvector stores position and weight in the same
686 : * uint16 (2 bits for weight, 14 for position). Here we extract
687 : * that in two separate arrays.
688 : */
689 60 : posv = _POSVECPTR(tsin, arrin + i);
690 60 : positions = palloc(posv->npos * sizeof(Datum));
691 60 : weights = palloc(posv->npos * sizeof(Datum));
692 168 : for (j = 0; j < posv->npos; j++)
693 : {
694 108 : positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
695 108 : weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
696 108 : weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
697 : 1));
698 : }
699 :
700 60 : values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID));
701 60 : values[2] = PointerGetDatum(construct_array_builtin(weights, posv->npos, TEXTOID));
702 : }
703 : else
704 : {
705 40 : nulls[1] = nulls[2] = true;
706 : }
707 :
708 100 : tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
709 100 : SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
710 : }
711 : else
712 : {
713 20 : SRF_RETURN_DONE(funcctx);
714 : }
715 : }
716 :
717 : /*
718 : * Convert tsvector to array of lexemes.
719 : */
720 : Datum
721 10 : tsvector_to_array(PG_FUNCTION_ARGS)
722 : {
723 10 : TSVector tsin = PG_GETARG_TSVECTOR(0);
724 10 : WordEntry *arrin = ARRPTR(tsin);
725 : Datum *elements;
726 : int i;
727 : ArrayType *array;
728 :
729 10 : elements = palloc(tsin->size * sizeof(Datum));
730 :
731 60 : for (i = 0; i < tsin->size; i++)
732 : {
733 50 : elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
734 50 : arrin[i].len));
735 : }
736 :
737 10 : array = construct_array_builtin(elements, tsin->size, TEXTOID);
738 :
739 10 : pfree(elements);
740 10 : PG_FREE_IF_COPY(tsin, 0);
741 10 : PG_RETURN_POINTER(array);
742 : }
743 :
744 : /*
745 : * Build tsvector from array of lexemes.
746 : */
747 : Datum
748 18 : array_to_tsvector(PG_FUNCTION_ARGS)
749 : {
750 18 : ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
751 : TSVector tsout;
752 : Datum *dlexemes;
753 : WordEntry *arrout;
754 : bool *nulls;
755 : int nitems,
756 : i,
757 : tslen,
758 18 : datalen = 0;
759 : char *cur;
760 :
761 18 : deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems);
762 :
763 : /*
764 : * Reject nulls and zero length strings (maybe we should just ignore them,
765 : * instead?)
766 : */
767 95 : for (i = 0; i < nitems; i++)
768 : {
769 85 : if (nulls[i])
770 4 : ereport(ERROR,
771 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
772 : errmsg("lexeme array may not contain nulls")));
773 :
774 81 : if (VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ == 0)
775 4 : ereport(ERROR,
776 : (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
777 : errmsg("lexeme array may not contain empty strings")));
778 : }
779 :
780 : /* Sort and de-dup, because this is required for a valid tsvector. */
781 10 : if (nitems > 1)
782 : {
783 10 : qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
784 10 : nitems = qunique(dlexemes, nitems, sizeof(Datum),
785 : compare_text_lexemes);
786 : }
787 :
788 : /* Calculate space needed for surviving lexemes. */
789 50 : for (i = 0; i < nitems; i++)
790 40 : datalen += VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
791 10 : tslen = CALCDATASIZE(nitems, datalen);
792 :
793 : /* Allocate and fill tsvector. */
794 10 : tsout = (TSVector) palloc0(tslen);
795 10 : SET_VARSIZE(tsout, tslen);
796 10 : tsout->size = nitems;
797 :
798 10 : arrout = ARRPTR(tsout);
799 10 : cur = STRPTR(tsout);
800 50 : for (i = 0; i < nitems; i++)
801 : {
802 40 : char *lex = VARDATA(DatumGetPointer(dlexemes[i]));
803 40 : int lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
804 :
805 40 : memcpy(cur, lex, lex_len);
806 40 : arrout[i].haspos = 0;
807 40 : arrout[i].len = lex_len;
808 40 : arrout[i].pos = cur - STRPTR(tsout);
809 40 : cur += lex_len;
810 : }
811 :
812 10 : PG_FREE_IF_COPY(v, 0);
813 10 : PG_RETURN_POINTER(tsout);
814 : }
815 :
816 : /*
817 : * ts_filter(): keep only lexemes with given weights in tsvector.
818 : */
819 : Datum
820 14 : tsvector_filter(PG_FUNCTION_ARGS)
821 : {
822 14 : TSVector tsin = PG_GETARG_TSVECTOR(0),
823 : tsout;
824 14 : ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1);
825 14 : WordEntry *arrin = ARRPTR(tsin),
826 : *arrout;
827 14 : char *datain = STRPTR(tsin),
828 : *dataout;
829 : Datum *dweights;
830 : bool *nulls;
831 : int nweights;
832 : int i,
833 : j;
834 14 : int cur_pos = 0;
835 14 : char mask = 0;
836 :
837 14 : deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights);
838 :
839 32 : for (i = 0; i < nweights; i++)
840 : {
841 : char char_weight;
842 :
843 22 : if (nulls[i])
844 4 : ereport(ERROR,
845 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
846 : errmsg("weight array may not contain nulls")));
847 :
848 18 : char_weight = DatumGetChar(dweights[i]);
849 18 : switch (char_weight)
850 : {
851 14 : case 'A':
852 : case 'a':
853 14 : mask = mask | 8;
854 14 : break;
855 4 : case 'B':
856 : case 'b':
857 4 : mask = mask | 4;
858 4 : break;
859 0 : case 'C':
860 : case 'c':
861 0 : mask = mask | 2;
862 0 : break;
863 0 : case 'D':
864 : case 'd':
865 0 : mask = mask | 1;
866 0 : break;
867 0 : default:
868 0 : ereport(ERROR,
869 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
870 : errmsg("unrecognized weight: \"%c\"", char_weight)));
871 : }
872 : }
873 :
874 10 : tsout = (TSVector) palloc0(VARSIZE(tsin));
875 10 : tsout->size = tsin->size;
876 10 : arrout = ARRPTR(tsout);
877 10 : dataout = STRPTR(tsout);
878 :
879 90 : for (i = j = 0; i < tsin->size; i++)
880 : {
881 : WordEntryPosVector *posvin,
882 : *posvout;
883 80 : int npos = 0;
884 : int k;
885 :
886 80 : if (!arrin[i].haspos)
887 25 : continue;
888 :
889 55 : posvin = _POSVECPTR(tsin, arrin + i);
890 55 : posvout = (WordEntryPosVector *)
891 55 : (dataout + SHORTALIGN(cur_pos + arrin[i].len));
892 :
893 110 : for (k = 0; k < posvin->npos; k++)
894 : {
895 55 : if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
896 25 : posvout->pos[npos++] = posvin->pos[k];
897 : }
898 :
899 : /* if no satisfactory positions found, skip lexeme */
900 55 : if (!npos)
901 30 : continue;
902 :
903 25 : arrout[j].haspos = true;
904 25 : arrout[j].len = arrin[i].len;
905 25 : arrout[j].pos = cur_pos;
906 :
907 25 : memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
908 25 : posvout->npos = npos;
909 25 : cur_pos += SHORTALIGN(arrin[i].len);
910 25 : cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
911 : sizeof(uint16);
912 25 : j++;
913 : }
914 :
915 10 : tsout->size = j;
916 10 : if (dataout != STRPTR(tsout))
917 10 : memmove(STRPTR(tsout), dataout, cur_pos);
918 :
919 10 : SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
920 :
921 10 : PG_FREE_IF_COPY(tsin, 0);
922 10 : PG_RETURN_POINTER(tsout);
923 : }
924 :
925 : Datum
926 9 : tsvector_concat(PG_FUNCTION_ARGS)
927 : {
928 9 : TSVector in1 = PG_GETARG_TSVECTOR(0);
929 9 : TSVector in2 = PG_GETARG_TSVECTOR(1);
930 : TSVector out;
931 : WordEntry *ptr;
932 : WordEntry *ptr1,
933 : *ptr2;
934 : WordEntryPos *p;
935 9 : int maxpos = 0,
936 : i,
937 : j,
938 : i1,
939 : i2,
940 : dataoff,
941 : output_bytes,
942 : output_size;
943 : char *data,
944 : *data1,
945 : *data2;
946 :
947 : /* Get max position in in1; we'll need this to offset in2's positions */
948 9 : ptr = ARRPTR(in1);
949 9 : i = in1->size;
950 23 : while (i--)
951 : {
952 14 : if ((j = POSDATALEN(in1, ptr)) != 0)
953 : {
954 14 : p = POSDATAPTR(in1, ptr);
955 28 : while (j--)
956 : {
957 14 : if (WEP_GETPOS(*p) > maxpos)
958 9 : maxpos = WEP_GETPOS(*p);
959 14 : p++;
960 : }
961 : }
962 14 : ptr++;
963 : }
964 :
965 9 : ptr1 = ARRPTR(in1);
966 9 : ptr2 = ARRPTR(in2);
967 9 : data1 = STRPTR(in1);
968 9 : data2 = STRPTR(in2);
969 9 : i1 = in1->size;
970 9 : i2 = in2->size;
971 :
972 : /*
973 : * Conservative estimate of space needed. We might need all the data in
974 : * both inputs, and conceivably add a pad byte before position data for
975 : * each item where there was none before.
976 : */
977 9 : output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
978 :
979 9 : out = (TSVector) palloc0(output_bytes);
980 9 : SET_VARSIZE(out, output_bytes);
981 :
982 : /*
983 : * We must make out->size valid so that STRPTR(out) is sensible. We'll
984 : * collapse out any unused space at the end.
985 : */
986 9 : out->size = in1->size + in2->size;
987 :
988 9 : ptr = ARRPTR(out);
989 9 : data = STRPTR(out);
990 9 : dataoff = 0;
991 23 : while (i1 && i2)
992 : {
993 14 : int cmp = compareEntry(data1, ptr1, data2, ptr2);
994 :
995 14 : if (cmp < 0)
996 : { /* in1 first */
997 5 : ptr->haspos = ptr1->haspos;
998 5 : ptr->len = ptr1->len;
999 5 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1000 5 : ptr->pos = dataoff;
1001 5 : dataoff += ptr1->len;
1002 5 : if (ptr->haspos)
1003 : {
1004 5 : dataoff = SHORTALIGN(dataoff);
1005 5 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1006 5 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1007 : }
1008 :
1009 5 : ptr++;
1010 5 : ptr1++;
1011 5 : i1--;
1012 : }
1013 9 : else if (cmp > 0)
1014 : { /* in2 first */
1015 4 : ptr->haspos = ptr2->haspos;
1016 4 : ptr->len = ptr2->len;
1017 4 : memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1018 4 : ptr->pos = dataoff;
1019 4 : dataoff += ptr2->len;
1020 4 : if (ptr->haspos)
1021 : {
1022 0 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1023 :
1024 0 : if (addlen == 0)
1025 0 : ptr->haspos = 0;
1026 : else
1027 : {
1028 0 : dataoff = SHORTALIGN(dataoff);
1029 0 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1030 : }
1031 : }
1032 :
1033 4 : ptr++;
1034 4 : ptr2++;
1035 4 : i2--;
1036 : }
1037 : else
1038 : {
1039 5 : ptr->haspos = ptr1->haspos | ptr2->haspos;
1040 5 : ptr->len = ptr1->len;
1041 5 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1042 5 : ptr->pos = dataoff;
1043 5 : dataoff += ptr1->len;
1044 5 : if (ptr->haspos)
1045 : {
1046 5 : if (ptr1->haspos)
1047 : {
1048 5 : dataoff = SHORTALIGN(dataoff);
1049 5 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1050 5 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1051 5 : if (ptr2->haspos)
1052 5 : dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1053 : }
1054 : else /* must have ptr2->haspos */
1055 : {
1056 0 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1057 :
1058 0 : if (addlen == 0)
1059 0 : ptr->haspos = 0;
1060 : else
1061 : {
1062 0 : dataoff = SHORTALIGN(dataoff);
1063 0 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1064 : }
1065 : }
1066 : }
1067 :
1068 5 : ptr++;
1069 5 : ptr1++;
1070 5 : ptr2++;
1071 5 : i1--;
1072 5 : i2--;
1073 : }
1074 : }
1075 :
1076 13 : while (i1)
1077 : {
1078 4 : ptr->haspos = ptr1->haspos;
1079 4 : ptr->len = ptr1->len;
1080 4 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1081 4 : ptr->pos = dataoff;
1082 4 : dataoff += ptr1->len;
1083 4 : if (ptr->haspos)
1084 : {
1085 4 : dataoff = SHORTALIGN(dataoff);
1086 4 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1087 4 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1088 : }
1089 :
1090 4 : ptr++;
1091 4 : ptr1++;
1092 4 : i1--;
1093 : }
1094 :
1095 14 : while (i2)
1096 : {
1097 5 : ptr->haspos = ptr2->haspos;
1098 5 : ptr->len = ptr2->len;
1099 5 : memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1100 5 : ptr->pos = dataoff;
1101 5 : dataoff += ptr2->len;
1102 5 : if (ptr->haspos)
1103 : {
1104 5 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1105 :
1106 5 : if (addlen == 0)
1107 0 : ptr->haspos = 0;
1108 : else
1109 : {
1110 5 : dataoff = SHORTALIGN(dataoff);
1111 5 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1112 : }
1113 : }
1114 :
1115 5 : ptr++;
1116 5 : ptr2++;
1117 5 : i2--;
1118 : }
1119 :
1120 : /*
1121 : * Instead of checking each offset individually, we check for overflow of
1122 : * pos fields once at the end.
1123 : */
1124 9 : if (dataoff > MAXSTRPOS)
1125 0 : ereport(ERROR,
1126 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1127 : errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1128 :
1129 : /*
1130 : * Adjust sizes (asserting that we didn't overrun the original estimates)
1131 : * and collapse out any unused array entries.
1132 : */
1133 9 : output_size = ptr - ARRPTR(out);
1134 : Assert(output_size <= out->size);
1135 9 : out->size = output_size;
1136 9 : if (data != STRPTR(out))
1137 5 : memmove(STRPTR(out), data, dataoff);
1138 9 : output_bytes = CALCDATASIZE(out->size, dataoff);
1139 : Assert(output_bytes <= VARSIZE(out));
1140 9 : SET_VARSIZE(out, output_bytes);
1141 :
1142 9 : PG_FREE_IF_COPY(in1, 0);
1143 9 : PG_FREE_IF_COPY(in2, 1);
1144 9 : PG_RETURN_POINTER(out);
1145 : }
1146 :
1147 : /*
1148 : * Compare two strings by tsvector rules.
1149 : *
1150 : * if prefix = true then it returns zero value iff b has prefix a
1151 : */
1152 : int32
1153 4194184 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1154 : {
1155 : int cmp;
1156 :
1157 4194184 : if (lena == 0)
1158 : {
1159 30 : if (prefix)
1160 0 : cmp = 0; /* empty string is prefix of anything */
1161 : else
1162 30 : cmp = (lenb > 0) ? -1 : 0;
1163 : }
1164 4194154 : else if (lenb == 0)
1165 : {
1166 0 : cmp = (lena > 0) ? 1 : 0;
1167 : }
1168 : else
1169 : {
1170 4194154 : cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
1171 :
1172 4194154 : if (prefix)
1173 : {
1174 11021 : if (cmp == 0 && lena > lenb)
1175 0 : cmp = 1; /* a is longer, so not a prefix of b */
1176 : }
1177 4183133 : else if (cmp == 0 && lena != lenb)
1178 : {
1179 21676 : cmp = (lena < lenb) ? -1 : 1;
1180 : }
1181 : }
1182 :
1183 4194184 : return cmp;
1184 : }
1185 :
1186 : /*
1187 : * Check weight info or/and fill 'data' with the required positions
1188 : */
1189 : static TSTernaryValue
1190 45576 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
1191 : ExecPhraseData *data)
1192 : {
1193 45576 : TSTernaryValue result = TS_NO;
1194 :
1195 : Assert(data == NULL || data->npos == 0);
1196 :
1197 45576 : if (entry->haspos)
1198 : {
1199 : WordEntryPosVector *posvec;
1200 :
1201 : /*
1202 : * We can't use the _POSVECPTR macro here because the pointer to the
1203 : * tsvector's lexeme storage is already contained in chkval->values.
1204 : */
1205 3160 : posvec = (WordEntryPosVector *)
1206 3160 : (chkval->values + SHORTALIGN(entry->pos + entry->len));
1207 :
1208 3160 : if (val->weight && data)
1209 40 : {
1210 40 : WordEntryPos *posvec_iter = posvec->pos;
1211 : WordEntryPos *dptr;
1212 :
1213 : /*
1214 : * Filter position information by weights
1215 : */
1216 40 : dptr = data->pos = palloc_array(WordEntryPos, posvec->npos);
1217 40 : data->allocated = true;
1218 :
1219 : /* Is there a position with a matching weight? */
1220 80 : while (posvec_iter < posvec->pos + posvec->npos)
1221 : {
1222 : /* If true, append this position to the data->pos */
1223 40 : if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1224 : {
1225 20 : *dptr = WEP_GETPOS(*posvec_iter);
1226 20 : dptr++;
1227 : }
1228 :
1229 40 : posvec_iter++;
1230 : }
1231 :
1232 40 : data->npos = dptr - data->pos;
1233 :
1234 40 : if (data->npos > 0)
1235 20 : result = TS_YES;
1236 : else
1237 : {
1238 20 : pfree(data->pos);
1239 20 : data->pos = NULL;
1240 20 : data->allocated = false;
1241 : }
1242 : }
1243 3120 : else if (val->weight)
1244 : {
1245 332 : WordEntryPos *posvec_iter = posvec->pos;
1246 :
1247 : /* Is there a position with a matching weight? */
1248 503 : while (posvec_iter < posvec->pos + posvec->npos)
1249 : {
1250 372 : if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1251 : {
1252 201 : result = TS_YES;
1253 201 : break; /* no need to go further */
1254 : }
1255 :
1256 171 : posvec_iter++;
1257 : }
1258 : }
1259 2788 : else if (data)
1260 : {
1261 1645 : data->npos = posvec->npos;
1262 1645 : data->pos = posvec->pos;
1263 1645 : data->allocated = false;
1264 1645 : result = TS_YES;
1265 : }
1266 : else
1267 : {
1268 : /* simplest case: no weight check, positions not needed */
1269 1143 : result = TS_YES;
1270 : }
1271 : }
1272 : else
1273 : {
1274 : /*
1275 : * Position info is lacking, so if the caller requires it, we can only
1276 : * say that maybe there is a match.
1277 : *
1278 : * Notice, however, that we *don't* check val->weight here.
1279 : * Historically, stripped tsvectors are considered to match queries
1280 : * whether or not the query has a weight restriction; that's a little
1281 : * dubious but we'll preserve the behavior.
1282 : */
1283 42416 : if (data)
1284 15385 : result = TS_MAYBE;
1285 : else
1286 27031 : result = TS_YES;
1287 : }
1288 :
1289 45576 : return result;
1290 : }
1291 :
1292 : /*
1293 : * TS_execute callback for matching a tsquery operand to plain tsvector data
1294 : */
1295 : static TSTernaryValue
1296 189554 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
1297 : {
1298 189554 : CHKVAL *chkval = (CHKVAL *) checkval;
1299 189554 : WordEntry *StopLow = chkval->arrb;
1300 189554 : WordEntry *StopHigh = chkval->arre;
1301 189554 : WordEntry *StopMiddle = StopHigh;
1302 189554 : TSTernaryValue res = TS_NO;
1303 :
1304 : /* Loop invariant: StopLow <= val < StopHigh */
1305 1191598 : while (StopLow < StopHigh)
1306 : {
1307 : int difference;
1308 :
1309 1037564 : StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1310 1037564 : difference = tsCompareString(chkval->operand + val->distance,
1311 1037564 : val->length,
1312 1037564 : chkval->values + StopMiddle->pos,
1313 1037564 : StopMiddle->len,
1314 : false);
1315 :
1316 1037564 : if (difference == 0)
1317 : {
1318 : /* Check weight info & fill 'data' with positions */
1319 35520 : res = checkclass_str(chkval, StopMiddle, val, data);
1320 35520 : break;
1321 : }
1322 1002044 : else if (difference > 0)
1323 565083 : StopLow = StopMiddle + 1;
1324 : else
1325 436961 : StopHigh = StopMiddle;
1326 : }
1327 :
1328 : /*
1329 : * If it's a prefix search, we should also consider lexemes that the
1330 : * search term is a prefix of (which will necessarily immediately follow
1331 : * the place we found in the above loop). But we can skip them if there
1332 : * was a definite match on the exact term AND the caller doesn't need
1333 : * position info.
1334 : */
1335 189554 : if (val->prefix && (res != TS_YES || data))
1336 : {
1337 11040 : WordEntryPos *allpos = NULL;
1338 11040 : int npos = 0,
1339 11040 : totalpos = 0;
1340 :
1341 : /* adjust start position for corner case */
1342 11040 : if (StopLow >= StopHigh)
1343 11030 : StopMiddle = StopHigh;
1344 :
1345 : /* we don't try to re-use any data from the initial match */
1346 11040 : if (data)
1347 : {
1348 30 : if (data->allocated)
1349 0 : pfree(data->pos);
1350 30 : data->pos = NULL;
1351 30 : data->allocated = false;
1352 30 : data->npos = 0;
1353 : }
1354 11040 : res = TS_NO;
1355 :
1356 21011 : while ((res != TS_YES || data) &&
1357 31751 : StopMiddle < chkval->arre &&
1358 10655 : tsCompareString(chkval->operand + val->distance,
1359 10655 : val->length,
1360 10655 : chkval->values + StopMiddle->pos,
1361 10655 : StopMiddle->len,
1362 : true) == 0)
1363 : {
1364 : TSTernaryValue subres;
1365 :
1366 10056 : subres = checkclass_str(chkval, StopMiddle, val, data);
1367 :
1368 10056 : if (subres != TS_NO)
1369 : {
1370 10006 : if (data)
1371 : {
1372 : /*
1373 : * We need to join position information
1374 : */
1375 35 : if (subres == TS_MAYBE)
1376 : {
1377 : /*
1378 : * No position info for this match, so we must report
1379 : * MAYBE overall.
1380 : */
1381 0 : res = TS_MAYBE;
1382 : /* forget any previous positions */
1383 0 : npos = 0;
1384 : /* don't leak storage */
1385 0 : if (allpos)
1386 0 : pfree(allpos);
1387 0 : break;
1388 : }
1389 :
1390 65 : while (npos + data->npos > totalpos)
1391 : {
1392 30 : if (totalpos == 0)
1393 : {
1394 30 : totalpos = 256;
1395 30 : allpos = palloc_array(WordEntryPos, totalpos);
1396 : }
1397 : else
1398 : {
1399 0 : totalpos *= 2;
1400 0 : allpos = repalloc_array(allpos, WordEntryPos, totalpos);
1401 : }
1402 : }
1403 :
1404 35 : memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1405 35 : npos += data->npos;
1406 :
1407 : /* don't leak storage from individual matches */
1408 35 : if (data->allocated)
1409 20 : pfree(data->pos);
1410 35 : data->pos = NULL;
1411 35 : data->allocated = false;
1412 : /* it's important to reset data->npos before next loop */
1413 35 : data->npos = 0;
1414 : }
1415 : else
1416 : {
1417 : /* Don't need positions, just handle YES/MAYBE */
1418 9971 : if (subres == TS_YES || res == TS_NO)
1419 9971 : res = subres;
1420 : }
1421 : }
1422 :
1423 10056 : StopMiddle++;
1424 : }
1425 :
1426 11040 : if (data && npos > 0)
1427 : {
1428 : /* Sort and make unique array of found positions */
1429 30 : data->pos = allpos;
1430 30 : qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1431 30 : data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1432 : compareWordEntryPos);
1433 30 : data->allocated = true;
1434 30 : res = TS_YES;
1435 : }
1436 : }
1437 :
1438 189554 : return res;
1439 : }
1440 :
1441 : /*
1442 : * Compute output position list for a tsquery operator in phrase mode.
1443 : *
1444 : * Merge the position lists in Ldata and Rdata as specified by "emit",
1445 : * returning the result list into *data. The input position lists must be
1446 : * sorted and unique, and the output will be as well.
1447 : *
1448 : * data: pointer to initially-all-zeroes output struct, or NULL
1449 : * Ldata, Rdata: input position lists
1450 : * emit: bitmask of TSPO_XXX flags
1451 : * Loffset: offset to be added to Ldata positions before comparing/outputting
1452 : * Roffset: offset to be added to Rdata positions before comparing/outputting
1453 : * max_npos: maximum possible required size of output position array
1454 : *
1455 : * Loffset and Roffset should not be negative, else we risk trying to output
1456 : * negative positions, which won't fit into WordEntryPos.
1457 : *
1458 : * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1459 : * we return it as TSTernaryValue.
1460 : *
1461 : * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1462 : * returns TS_YES if any positions would have been emitted.
1463 : */
1464 : #define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1465 : #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1466 : #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1467 :
1468 : static TSTernaryValue
1469 20111 : TS_phrase_output(ExecPhraseData *data,
1470 : ExecPhraseData *Ldata,
1471 : ExecPhraseData *Rdata,
1472 : int emit,
1473 : int Loffset,
1474 : int Roffset,
1475 : int max_npos)
1476 : {
1477 : int Lindex,
1478 : Rindex;
1479 :
1480 : /* Loop until both inputs are exhausted */
1481 20111 : Lindex = Rindex = 0;
1482 20903 : while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1483 : {
1484 : int Lpos,
1485 : Rpos;
1486 1748 : int output_pos = 0;
1487 :
1488 : /*
1489 : * Fetch current values to compare. WEP_GETPOS() is needed because
1490 : * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1491 : */
1492 1748 : if (Lindex < Ldata->npos)
1493 1288 : Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1494 : else
1495 : {
1496 : /* L array exhausted, so we're done if R_ONLY isn't set */
1497 460 : if (!(emit & TSPO_R_ONLY))
1498 113 : break;
1499 347 : Lpos = INT_MAX;
1500 : }
1501 1635 : if (Rindex < Rdata->npos)
1502 1445 : Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1503 : else
1504 : {
1505 : /* R array exhausted, so we're done if L_ONLY isn't set */
1506 190 : if (!(emit & TSPO_L_ONLY))
1507 122 : break;
1508 68 : Rpos = INT_MAX;
1509 : }
1510 :
1511 : /* Merge-join the two input lists */
1512 1513 : if (Lpos < Rpos)
1513 : {
1514 : /* Lpos is not matched in Rdata, should we output it? */
1515 365 : if (emit & TSPO_L_ONLY)
1516 116 : output_pos = Lpos;
1517 365 : Lindex++;
1518 : }
1519 1148 : else if (Lpos == Rpos)
1520 : {
1521 : /* Lpos and Rpos match ... should we output it? */
1522 621 : if (emit & TSPO_BOTH)
1523 553 : output_pos = Rpos;
1524 621 : Lindex++;
1525 621 : Rindex++;
1526 : }
1527 : else /* Lpos > Rpos */
1528 : {
1529 : /* Rpos is not matched in Ldata, should we output it? */
1530 527 : if (emit & TSPO_R_ONLY)
1531 376 : output_pos = Rpos;
1532 527 : Rindex++;
1533 : }
1534 :
1535 1513 : if (output_pos > 0)
1536 : {
1537 1045 : if (data)
1538 : {
1539 : /* Store position, first allocating output array if needed */
1540 324 : if (data->pos == NULL)
1541 : {
1542 261 : data->pos = (WordEntryPos *)
1543 261 : palloc(max_npos * sizeof(WordEntryPos));
1544 261 : data->allocated = true;
1545 : }
1546 324 : data->pos[data->npos++] = output_pos;
1547 : }
1548 : else
1549 : {
1550 : /*
1551 : * Exact positions not needed, so return TS_YES as soon as we
1552 : * know there is at least one.
1553 : */
1554 721 : return TS_YES;
1555 : }
1556 : }
1557 : }
1558 :
1559 19390 : if (data && data->npos > 0)
1560 : {
1561 : /* Let's assert we didn't overrun the array */
1562 : Assert(data->npos <= max_npos);
1563 261 : return TS_YES;
1564 : }
1565 19129 : return TS_NO;
1566 : }
1567 :
1568 : /*
1569 : * Execute tsquery at or below an OP_PHRASE operator.
1570 : *
1571 : * This handles tsquery execution at recursion levels where we need to care
1572 : * about match locations.
1573 : *
1574 : * In addition to the same arguments used for TS_execute, the caller may pass
1575 : * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1576 : * match position info on success. data == NULL if no position data need be
1577 : * returned.
1578 : * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1579 : * This is OK because an outside call always starts from an OP_PHRASE node,
1580 : * and all internal recursion cases pass data != NULL.
1581 : *
1582 : * The detailed semantics of the match data, given that the function returned
1583 : * TS_YES (successful match), are:
1584 : *
1585 : * npos > 0, negate = false:
1586 : * query is matched at specified position(s) (and only those positions)
1587 : * npos > 0, negate = true:
1588 : * query is matched at all positions *except* specified position(s)
1589 : * npos = 0, negate = true:
1590 : * query is matched at all positions
1591 : * npos = 0, negate = false:
1592 : * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1593 : *
1594 : * Successful matches also return a "width" value which is the match width in
1595 : * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1596 : * and is the sum of the phrase operator distances for phrase matches. Note
1597 : * that when width > 0, the listed positions represent the ends of matches not
1598 : * the starts. (This unintuitive rule is needed to avoid possibly generating
1599 : * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1600 : *
1601 : * If the TSExecuteCallback function reports that an operand is present
1602 : * but fails to provide position(s) for it, we will return TS_MAYBE when
1603 : * it is possible but not certain that the query is matched.
1604 : *
1605 : * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1606 : * negate = false (which is the state initialized by the caller); but the
1607 : * "width" output in such cases is undefined.
1608 : */
1609 : static TSTernaryValue
1610 468514 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1611 : TSExecuteCallback chkcond,
1612 : ExecPhraseData *data)
1613 : {
1614 : ExecPhraseData Ldata,
1615 : Rdata;
1616 : TSTernaryValue lmatch,
1617 : rmatch;
1618 : int Loffset,
1619 : Roffset,
1620 : maxwidth;
1621 :
1622 : /* since this function recurses, it could be driven to stack overflow */
1623 468514 : check_stack_depth();
1624 :
1625 : /* ... and let's check for query cancel while we're at it */
1626 468514 : CHECK_FOR_INTERRUPTS();
1627 :
1628 468514 : if (curitem->type == QI_VAL)
1629 230504 : return chkcond(arg, (QueryOperand *) curitem, data);
1630 :
1631 238010 : switch (curitem->qoperator.oper)
1632 : {
1633 80604 : case OP_NOT:
1634 :
1635 : /*
1636 : * We need not touch data->width, since a NOT operation does not
1637 : * change the match width.
1638 : */
1639 80604 : if (flags & TS_EXEC_SKIP_NOT)
1640 : {
1641 : /* with SKIP_NOT, report NOT as "match everywhere" */
1642 : Assert(data->npos == 0 && !data->negate);
1643 0 : data->negate = true;
1644 0 : return TS_YES;
1645 : }
1646 80604 : switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1647 : {
1648 70453 : case TS_NO:
1649 : /* change "match nowhere" to "match everywhere" */
1650 : Assert(data->npos == 0 && !data->negate);
1651 70453 : data->negate = true;
1652 70453 : return TS_YES;
1653 273 : case TS_YES:
1654 273 : if (data->npos > 0)
1655 : {
1656 : /* we have some positions, invert negate flag */
1657 268 : data->negate = !data->negate;
1658 268 : return TS_YES;
1659 : }
1660 5 : else if (data->negate)
1661 : {
1662 : /* change "match everywhere" to "match nowhere" */
1663 5 : data->negate = false;
1664 5 : return TS_NO;
1665 : }
1666 : /* Should not get here if result was TS_YES */
1667 : Assert(false);
1668 0 : break;
1669 9878 : case TS_MAYBE:
1670 : /* match positions are, and remain, uncertain */
1671 9878 : return TS_MAYBE;
1672 : }
1673 0 : break;
1674 :
1675 157280 : case OP_PHRASE:
1676 : case OP_AND:
1677 157280 : memset(&Ldata, 0, sizeof(Ldata));
1678 157280 : memset(&Rdata, 0, sizeof(Rdata));
1679 :
1680 157280 : lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1681 : arg, flags, chkcond, &Ldata);
1682 157280 : if (lmatch == TS_NO)
1683 83972 : return TS_NO;
1684 :
1685 73308 : rmatch = TS_phrase_execute(curitem + 1,
1686 : arg, flags, chkcond, &Rdata);
1687 73308 : if (rmatch == TS_NO)
1688 36146 : return TS_NO;
1689 :
1690 : /*
1691 : * If either operand has no position information, then we can't
1692 : * return reliable position data, only a MAYBE result.
1693 : */
1694 37162 : if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1695 17177 : return TS_MAYBE;
1696 :
1697 19985 : if (curitem->qoperator.oper == OP_PHRASE)
1698 : {
1699 : /*
1700 : * Compute Loffset and Roffset suitable for phrase match, and
1701 : * compute overall width of whole phrase match.
1702 : */
1703 19980 : Loffset = curitem->qoperator.distance + Rdata.width;
1704 19980 : Roffset = 0;
1705 19980 : if (data)
1706 155 : data->width = curitem->qoperator.distance +
1707 155 : Ldata.width + Rdata.width;
1708 : }
1709 : else
1710 : {
1711 : /*
1712 : * For OP_AND, set output width and alignment like OP_OR (see
1713 : * comment below)
1714 : */
1715 5 : maxwidth = Max(Ldata.width, Rdata.width);
1716 5 : Loffset = maxwidth - Ldata.width;
1717 5 : Roffset = maxwidth - Rdata.width;
1718 5 : if (data)
1719 5 : data->width = maxwidth;
1720 : }
1721 :
1722 19985 : if (Ldata.negate && Rdata.negate)
1723 : {
1724 : /* !L & !R: treat as !(L | R) */
1725 18957 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1726 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1727 : Loffset, Roffset,
1728 18957 : Ldata.npos + Rdata.npos);
1729 18957 : if (data)
1730 0 : data->negate = true;
1731 18957 : return TS_YES;
1732 : }
1733 1028 : else if (Ldata.negate)
1734 : {
1735 : /* !L & R */
1736 309 : return TS_phrase_output(data, &Ldata, &Rdata,
1737 : TSPO_R_ONLY,
1738 : Loffset, Roffset,
1739 : Rdata.npos);
1740 : }
1741 719 : else if (Rdata.negate)
1742 : {
1743 : /* L & !R */
1744 5 : return TS_phrase_output(data, &Ldata, &Rdata,
1745 : TSPO_L_ONLY,
1746 : Loffset, Roffset,
1747 : Ldata.npos);
1748 : }
1749 : else
1750 : {
1751 : /* straight AND */
1752 714 : return TS_phrase_output(data, &Ldata, &Rdata,
1753 : TSPO_BOTH,
1754 : Loffset, Roffset,
1755 714 : Min(Ldata.npos, Rdata.npos));
1756 : }
1757 :
1758 126 : case OP_OR:
1759 126 : memset(&Ldata, 0, sizeof(Ldata));
1760 126 : memset(&Rdata, 0, sizeof(Rdata));
1761 :
1762 126 : lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1763 : arg, flags, chkcond, &Ldata);
1764 126 : rmatch = TS_phrase_execute(curitem + 1,
1765 : arg, flags, chkcond, &Rdata);
1766 :
1767 126 : if (lmatch == TS_NO && rmatch == TS_NO)
1768 10 : return TS_NO;
1769 :
1770 : /*
1771 : * If either operand has no position information, then we can't
1772 : * return reliable position data, only a MAYBE result.
1773 : */
1774 116 : if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1775 0 : return TS_MAYBE;
1776 :
1777 : /*
1778 : * Cope with undefined output width from failed submatch. (This
1779 : * takes less code than trying to ensure that all failure returns
1780 : * set data->width to zero.)
1781 : */
1782 116 : if (lmatch == TS_NO)
1783 15 : Ldata.width = 0;
1784 116 : if (rmatch == TS_NO)
1785 68 : Rdata.width = 0;
1786 :
1787 : /*
1788 : * For OP_AND and OP_OR, report the width of the wider of the two
1789 : * inputs, and align the narrower input's positions to the right
1790 : * end of that width. This rule deals at least somewhat
1791 : * reasonably with cases like "x <-> (y | z <-> q)".
1792 : */
1793 116 : maxwidth = Max(Ldata.width, Rdata.width);
1794 116 : Loffset = maxwidth - Ldata.width;
1795 116 : Roffset = maxwidth - Rdata.width;
1796 116 : data->width = maxwidth;
1797 :
1798 116 : if (Ldata.negate && Rdata.negate)
1799 : {
1800 : /* !L | !R: treat as !(L & R) */
1801 5 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1802 : TSPO_BOTH,
1803 : Loffset, Roffset,
1804 5 : Min(Ldata.npos, Rdata.npos));
1805 5 : data->negate = true;
1806 5 : return TS_YES;
1807 : }
1808 111 : else if (Ldata.negate)
1809 : {
1810 : /* !L | R: treat as !(L & !R) */
1811 25 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1812 : TSPO_L_ONLY,
1813 : Loffset, Roffset,
1814 : Ldata.npos);
1815 25 : data->negate = true;
1816 25 : return TS_YES;
1817 : }
1818 86 : else if (Rdata.negate)
1819 : {
1820 : /* L | !R: treat as !(!L & R) */
1821 5 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1822 : TSPO_R_ONLY,
1823 : Loffset, Roffset,
1824 : Rdata.npos);
1825 5 : data->negate = true;
1826 5 : return TS_YES;
1827 : }
1828 : else
1829 : {
1830 : /* straight OR */
1831 81 : return TS_phrase_output(data, &Ldata, &Rdata,
1832 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1833 : Loffset, Roffset,
1834 81 : Ldata.npos + Rdata.npos);
1835 : }
1836 :
1837 0 : default:
1838 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1839 : }
1840 :
1841 : /* not reachable, but keep compiler quiet */
1842 0 : return TS_NO;
1843 : }
1844 :
1845 :
1846 : /*
1847 : * Evaluate tsquery boolean expression.
1848 : *
1849 : * curitem: current tsquery item (initially, the first one)
1850 : * arg: opaque value to pass through to callback function
1851 : * flags: bitmask of flag bits shown in ts_utils.h
1852 : * chkcond: callback function to check whether a primitive value is present
1853 : */
1854 : bool
1855 347150 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1856 : TSExecuteCallback chkcond)
1857 : {
1858 : /*
1859 : * If we get TS_MAYBE from the recursion, return true. We could only see
1860 : * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1861 : * need to check again.
1862 : */
1863 347150 : return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1864 : }
1865 :
1866 : /*
1867 : * Evaluate tsquery boolean expression.
1868 : *
1869 : * This is the same as TS_execute except that TS_MAYBE is returned as-is.
1870 : */
1871 : TSTernaryValue
1872 24628 : TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1873 : TSExecuteCallback chkcond)
1874 : {
1875 24628 : return TS_execute_recurse(curitem, arg, flags, chkcond);
1876 : }
1877 :
1878 : /*
1879 : * TS_execute recursion for operators above any phrase operator. Here we do
1880 : * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
1881 : * operator, we pass it off to TS_phrase_execute which does worry.
1882 : */
1883 : static TSTernaryValue
1884 703791 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1885 : TSExecuteCallback chkcond)
1886 : {
1887 : TSTernaryValue lmatch;
1888 :
1889 : /* since this function recurses, it could be driven to stack overflow */
1890 703791 : check_stack_depth();
1891 :
1892 : /* ... and let's check for query cancel while we're at it */
1893 703791 : CHECK_FOR_INTERRUPTS();
1894 :
1895 703791 : if (curitem->type == QI_VAL)
1896 282764 : return chkcond(arg, (QueryOperand *) curitem,
1897 : NULL /* don't need position info */ );
1898 :
1899 421027 : switch (curitem->qoperator.oper)
1900 : {
1901 135491 : case OP_NOT:
1902 135491 : if (flags & TS_EXEC_SKIP_NOT)
1903 0 : return TS_YES;
1904 135491 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1905 : {
1906 127831 : case TS_NO:
1907 127831 : return TS_YES;
1908 3264 : case TS_YES:
1909 3264 : return TS_NO;
1910 4396 : case TS_MAYBE:
1911 4396 : return TS_MAYBE;
1912 : }
1913 0 : break;
1914 :
1915 55853 : case OP_AND:
1916 55853 : lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1917 : flags, chkcond);
1918 55853 : if (lmatch == TS_NO)
1919 44366 : return TS_NO;
1920 11487 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1921 : {
1922 6743 : case TS_NO:
1923 6743 : return TS_NO;
1924 2236 : case TS_YES:
1925 2236 : return lmatch;
1926 2508 : case TS_MAYBE:
1927 2508 : return TS_MAYBE;
1928 : }
1929 0 : break;
1930 :
1931 72663 : case OP_OR:
1932 72663 : lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1933 : flags, chkcond);
1934 72663 : if (lmatch == TS_YES)
1935 16144 : return TS_YES;
1936 56519 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1937 : {
1938 38338 : case TS_NO:
1939 38338 : return lmatch;
1940 4962 : case TS_YES:
1941 4962 : return TS_YES;
1942 13219 : case TS_MAYBE:
1943 13219 : return TS_MAYBE;
1944 : }
1945 0 : break;
1946 :
1947 157020 : case OP_PHRASE:
1948 :
1949 : /*
1950 : * If we get a MAYBE result, and the caller doesn't want that,
1951 : * convert it to NO. It would be more consistent, perhaps, to
1952 : * return the result of TS_phrase_execute() verbatim and then
1953 : * convert MAYBE results at the top of the recursion. But
1954 : * converting at the topmost phrase operator gives results that
1955 : * are bug-compatible with the old implementation, so do it like
1956 : * this for now.
1957 : */
1958 157020 : switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1959 : {
1960 120247 : case TS_NO:
1961 120247 : return TS_NO;
1962 19601 : case TS_YES:
1963 19601 : return TS_YES;
1964 17172 : case TS_MAYBE:
1965 17172 : return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1966 : }
1967 0 : break;
1968 :
1969 0 : default:
1970 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1971 : }
1972 :
1973 : /* not reachable, but keep compiler quiet */
1974 0 : return TS_NO;
1975 : }
1976 :
1977 : /*
1978 : * Evaluate tsquery and report locations of matching terms.
1979 : *
1980 : * This is like TS_execute except that it returns match locations not just
1981 : * success/failure status. The callback function is required to provide
1982 : * position data (we report failure if it doesn't).
1983 : *
1984 : * On successful match, the result is a List of ExecPhraseData structs, one
1985 : * for each AND'ed term or phrase operator in the query. Each struct includes
1986 : * a sorted array of lexeme positions matching that term. (Recall that for
1987 : * phrase operators, the match includes width+1 lexemes, and the recorded
1988 : * position is that of the rightmost lexeme.)
1989 : *
1990 : * OR subexpressions are handled by union'ing their match locations into a
1991 : * single List element, which is valid since any of those locations contains
1992 : * a match. However, when some of the OR'ed terms are phrase operators, we
1993 : * report the maximum width of any of the OR'ed terms, making such cases
1994 : * slightly imprecise in the conservative direction. (For example, if the
1995 : * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
1996 : * reported as though it includes the lexeme to the left of C.)
1997 : *
1998 : * Locations of NOT subexpressions are not reported. (Obviously, there can
1999 : * be no successful NOT matches at top level, or the match would have failed.
2000 : * So this amounts to ignoring NOTs underneath ORs.)
2001 : *
2002 : * The result is NIL if no match, or if position data was not returned.
2003 : *
2004 : * Arguments are the same as for TS_execute, although flags is currently
2005 : * vestigial since none of the defined bits are sensible here.
2006 : */
2007 : List *
2008 281 : TS_execute_locations(QueryItem *curitem, void *arg,
2009 : uint32 flags,
2010 : TSExecuteCallback chkcond)
2011 : {
2012 : List *result;
2013 :
2014 : /* No flags supported, as yet */
2015 : Assert(flags == TS_EXEC_EMPTY);
2016 281 : if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
2017 106 : return result;
2018 175 : return NIL;
2019 : }
2020 :
2021 : /*
2022 : * TS_execute_locations recursion for operators above any phrase operator.
2023 : * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
2024 : */
2025 : static bool
2026 827 : TS_execute_locations_recurse(QueryItem *curitem, void *arg,
2027 : TSExecuteCallback chkcond,
2028 : List **locations)
2029 : {
2030 : bool lmatch,
2031 : rmatch;
2032 : List *llocations,
2033 : *rlocations;
2034 : ExecPhraseData *data;
2035 :
2036 : /* since this function recurses, it could be driven to stack overflow */
2037 827 : check_stack_depth();
2038 :
2039 : /* ... and let's check for query cancel while we're at it */
2040 827 : CHECK_FOR_INTERRUPTS();
2041 :
2042 : /* Default locations result is empty */
2043 827 : *locations = NIL;
2044 :
2045 827 : if (curitem->type == QI_VAL)
2046 : {
2047 347 : data = palloc0_object(ExecPhraseData);
2048 347 : if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
2049 : {
2050 172 : *locations = list_make1(data);
2051 172 : return true;
2052 : }
2053 175 : pfree(data);
2054 175 : return false;
2055 : }
2056 :
2057 480 : switch (curitem->qoperator.oper)
2058 : {
2059 10 : case OP_NOT:
2060 10 : if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
2061 : &llocations))
2062 0 : return true; /* we don't pass back any locations */
2063 10 : return false;
2064 :
2065 400 : case OP_AND:
2066 400 : if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2067 : arg, chkcond,
2068 : &llocations))
2069 304 : return false;
2070 96 : if (!TS_execute_locations_recurse(curitem + 1,
2071 : arg, chkcond,
2072 : &rlocations))
2073 41 : return false;
2074 55 : *locations = list_concat(llocations, rlocations);
2075 55 : return true;
2076 :
2077 20 : case OP_OR:
2078 20 : lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2079 : arg, chkcond,
2080 : &llocations);
2081 20 : rmatch = TS_execute_locations_recurse(curitem + 1,
2082 : arg, chkcond,
2083 : &rlocations);
2084 20 : if (lmatch || rmatch)
2085 : {
2086 : /*
2087 : * We generate an AND'able location struct from each
2088 : * combination of sub-matches, following the disjunctive law
2089 : * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
2090 : *
2091 : * However, if either input didn't produce locations (i.e., it
2092 : * failed or was a NOT), we must just return the other list.
2093 : */
2094 20 : if (llocations == NIL)
2095 0 : *locations = rlocations;
2096 20 : else if (rlocations == NIL)
2097 10 : *locations = llocations;
2098 : else
2099 : {
2100 : ListCell *ll;
2101 :
2102 20 : foreach(ll, llocations)
2103 : {
2104 10 : ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
2105 : ListCell *lr;
2106 :
2107 20 : foreach(lr, rlocations)
2108 : {
2109 10 : ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
2110 :
2111 10 : data = palloc0_object(ExecPhraseData);
2112 10 : (void) TS_phrase_output(data, ldata, rdata,
2113 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
2114 : 0, 0,
2115 10 : ldata->npos + rdata->npos);
2116 : /* Report the larger width, as explained above. */
2117 10 : data->width = Max(ldata->width, rdata->width);
2118 10 : *locations = lappend(*locations, data);
2119 : }
2120 : }
2121 : }
2122 :
2123 20 : return true;
2124 : }
2125 0 : return false;
2126 :
2127 50 : case OP_PHRASE:
2128 : /* We can hand this off to TS_phrase_execute */
2129 50 : data = palloc0_object(ExecPhraseData);
2130 50 : if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
2131 : data) == TS_YES)
2132 : {
2133 50 : if (!data->negate)
2134 50 : *locations = list_make1(data);
2135 50 : return true;
2136 : }
2137 0 : pfree(data);
2138 0 : return false;
2139 :
2140 0 : default:
2141 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2142 : }
2143 :
2144 : /* not reachable, but keep compiler quiet */
2145 : return false;
2146 : }
2147 :
2148 : /*
2149 : * Detect whether a tsquery boolean expression requires any positive matches
2150 : * to values shown in the tsquery.
2151 : *
2152 : * This is needed to know whether a GIN index search requires full index scan.
2153 : * For example, 'x & !y' requires a match of x, so it's sufficient to scan
2154 : * entries for x; but 'x | !y' could match rows containing neither x nor y.
2155 : */
2156 : bool
2157 631 : tsquery_requires_match(QueryItem *curitem)
2158 : {
2159 : /* since this function recurses, it could be driven to stack overflow */
2160 631 : check_stack_depth();
2161 :
2162 631 : if (curitem->type == QI_VAL)
2163 301 : return true;
2164 :
2165 330 : switch (curitem->qoperator.oper)
2166 : {
2167 127 : case OP_NOT:
2168 :
2169 : /*
2170 : * Assume there are no required matches underneath a NOT. For
2171 : * some cases with nested NOTs, we could prove there's a required
2172 : * match, but it seems unlikely to be worth the trouble.
2173 : */
2174 127 : return false;
2175 :
2176 153 : case OP_PHRASE:
2177 :
2178 : /*
2179 : * Treat OP_PHRASE as OP_AND here
2180 : */
2181 : case OP_AND:
2182 : /* If either side requires a match, we're good */
2183 153 : if (tsquery_requires_match(curitem + curitem->qoperator.left))
2184 117 : return true;
2185 : else
2186 36 : return tsquery_requires_match(curitem + 1);
2187 :
2188 50 : case OP_OR:
2189 : /* Both sides must require a match */
2190 50 : if (tsquery_requires_match(curitem + curitem->qoperator.left))
2191 50 : return tsquery_requires_match(curitem + 1);
2192 : else
2193 0 : return false;
2194 :
2195 0 : default:
2196 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2197 : }
2198 :
2199 : /* not reachable, but keep compiler quiet */
2200 : return false;
2201 : }
2202 :
2203 : /*
2204 : * boolean operations
2205 : */
2206 : Datum
2207 40 : ts_match_qv(PG_FUNCTION_ARGS)
2208 : {
2209 40 : PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
2210 : PG_GETARG_DATUM(1),
2211 : PG_GETARG_DATUM(0)));
2212 : }
2213 :
2214 : Datum
2215 146808 : ts_match_vq(PG_FUNCTION_ARGS)
2216 : {
2217 146808 : TSVector val = PG_GETARG_TSVECTOR(0);
2218 146808 : TSQuery query = PG_GETARG_TSQUERY(1);
2219 : CHKVAL chkval;
2220 : bool result;
2221 :
2222 : /* empty query matches nothing */
2223 146808 : if (!query->size)
2224 : {
2225 0 : PG_FREE_IF_COPY(val, 0);
2226 0 : PG_FREE_IF_COPY(query, 1);
2227 0 : PG_RETURN_BOOL(false);
2228 : }
2229 :
2230 146808 : chkval.arrb = ARRPTR(val);
2231 146808 : chkval.arre = chkval.arrb + val->size;
2232 146808 : chkval.values = STRPTR(val);
2233 146808 : chkval.operand = GETOPERAND(query);
2234 146808 : result = TS_execute(GETQUERY(query),
2235 : &chkval,
2236 : TS_EXEC_EMPTY,
2237 : checkcondition_str);
2238 :
2239 146808 : PG_FREE_IF_COPY(val, 0);
2240 146808 : PG_FREE_IF_COPY(query, 1);
2241 146808 : PG_RETURN_BOOL(result);
2242 : }
2243 :
2244 : Datum
2245 0 : ts_match_tt(PG_FUNCTION_ARGS)
2246 : {
2247 : TSVector vector;
2248 : TSQuery query;
2249 : bool res;
2250 :
2251 0 : vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2252 : PG_GETARG_DATUM(0)));
2253 0 : query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
2254 : PG_GETARG_DATUM(1)));
2255 :
2256 0 : res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2257 : TSVectorGetDatum(vector),
2258 : TSQueryGetDatum(query)));
2259 :
2260 0 : pfree(vector);
2261 0 : pfree(query);
2262 :
2263 0 : PG_RETURN_BOOL(res);
2264 : }
2265 :
2266 : Datum
2267 0 : ts_match_tq(PG_FUNCTION_ARGS)
2268 : {
2269 : TSVector vector;
2270 0 : TSQuery query = PG_GETARG_TSQUERY(1);
2271 : bool res;
2272 :
2273 0 : vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2274 : PG_GETARG_DATUM(0)));
2275 :
2276 0 : res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2277 : TSVectorGetDatum(vector),
2278 : TSQueryGetDatum(query)));
2279 :
2280 0 : pfree(vector);
2281 0 : PG_FREE_IF_COPY(query, 1);
2282 :
2283 0 : PG_RETURN_BOOL(res);
2284 : }
2285 :
2286 : /*
2287 : * ts_stat statistic function support
2288 : */
2289 :
2290 :
2291 : /*
2292 : * Returns the number of positions in value 'wptr' within tsvector 'txt',
2293 : * that have a weight equal to one of the weights in 'weight' bitmask.
2294 : */
2295 : static int
2296 5452 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
2297 : {
2298 5452 : int len = POSDATALEN(txt, wptr);
2299 5452 : int num = 0;
2300 5452 : WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2301 :
2302 11100 : while (len--)
2303 : {
2304 5648 : if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2305 8 : num++;
2306 5648 : ptr++;
2307 : }
2308 5452 : return num;
2309 : }
2310 :
2311 : #define compareStatWord(a,e,t) \
2312 : tsCompareString((a)->lexeme, (a)->lenlexeme, \
2313 : STRPTR(t) + (e)->pos, (e)->len, \
2314 : false)
2315 :
2316 : static void
2317 230416 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
2318 : {
2319 230416 : WordEntry *we = ARRPTR(txt) + off;
2320 230416 : StatEntry *node = stat->root,
2321 230416 : *pnode = NULL;
2322 : int n,
2323 230416 : res = 0;
2324 230416 : uint32 depth = 1;
2325 :
2326 230416 : if (stat->weight == 0)
2327 115208 : n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2328 : else
2329 115208 : n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2330 :
2331 230416 : if (n == 0)
2332 115204 : return; /* nothing to insert */
2333 :
2334 1163592 : while (node)
2335 : {
2336 1159016 : res = compareStatWord(node, we, txt);
2337 :
2338 1159016 : if (res == 0)
2339 : {
2340 110636 : break;
2341 : }
2342 : else
2343 : {
2344 1048380 : pnode = node;
2345 1048380 : node = (res < 0) ? node->left : node->right;
2346 : }
2347 1048380 : depth++;
2348 : }
2349 :
2350 115212 : if (depth > stat->maxdepth)
2351 84 : stat->maxdepth = depth;
2352 :
2353 115212 : if (node == NULL)
2354 : {
2355 4576 : node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2356 4576 : node->left = node->right = NULL;
2357 4576 : node->ndoc = 1;
2358 4576 : node->nentry = n;
2359 4576 : node->lenlexeme = we->len;
2360 4576 : memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2361 :
2362 4576 : if (pnode == NULL)
2363 : {
2364 8 : stat->root = node;
2365 : }
2366 : else
2367 : {
2368 4568 : if (res < 0)
2369 2254 : pnode->left = node;
2370 : else
2371 2314 : pnode->right = node;
2372 : }
2373 : }
2374 : else
2375 : {
2376 110636 : node->ndoc++;
2377 110636 : node->nentry += n;
2378 : }
2379 : }
2380 :
2381 : static void
2382 330256 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
2383 : uint32 low, uint32 high, uint32 offset)
2384 : {
2385 : uint32 pos;
2386 330256 : uint32 middle = (low + high) >> 1;
2387 :
2388 330256 : pos = (low + middle) >> 1;
2389 330256 : if (low != middle && pos >= offset && pos - offset < txt->size)
2390 113552 : insertStatEntry(persistentContext, stat, txt, pos - offset);
2391 330256 : pos = (high + middle + 1) >> 1;
2392 330256 : if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2393 112856 : insertStatEntry(persistentContext, stat, txt, pos - offset);
2394 :
2395 330256 : if (low != middle)
2396 165128 : chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2397 330256 : if (high != middle + 1)
2398 161120 : chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2399 330256 : }
2400 :
2401 : /*
2402 : * This is written like a custom aggregate function, because the
2403 : * original plan was to do just that. Unfortunately, an aggregate function
2404 : * can't return a set, so that plan was abandoned. If that limitation is
2405 : * lifted in the future, ts_stat could be a real aggregate function so that
2406 : * you could use it like this:
2407 : *
2408 : * SELECT ts_stat(vector_column) FROM vector_table;
2409 : *
2410 : * where vector_column is a tsvector-type column in vector_table.
2411 : */
2412 :
2413 : static TSVectorStat *
2414 4072 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2415 : {
2416 4072 : TSVector txt = DatumGetTSVector(data);
2417 : uint32 i,
2418 4072 : nbit = 0,
2419 : offset;
2420 :
2421 4072 : if (stat == NULL)
2422 : { /* Init in first */
2423 0 : stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2424 0 : stat->maxdepth = 1;
2425 : }
2426 :
2427 : /* simple check of correctness */
2428 4072 : if (txt == NULL || txt->size == 0)
2429 : {
2430 64 : if (txt && txt != (TSVector) DatumGetPointer(data))
2431 64 : pfree(txt);
2432 64 : return stat;
2433 : }
2434 :
2435 4008 : i = txt->size - 1;
2436 28480 : for (; i > 0; i >>= 1)
2437 24472 : nbit++;
2438 :
2439 4008 : nbit = 1 << nbit;
2440 4008 : offset = (nbit - txt->size) / 2;
2441 :
2442 4008 : insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2443 4008 : chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2444 :
2445 4008 : return stat;
2446 : }
2447 :
2448 : static void
2449 8 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
2450 : TSVectorStat *stat)
2451 : {
2452 : TupleDesc tupdesc;
2453 : MemoryContext oldcontext;
2454 : StatEntry *node;
2455 :
2456 8 : funcctx->user_fctx = stat;
2457 :
2458 8 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2459 :
2460 8 : stat->stack = palloc0_array(StatEntry *, stat->maxdepth + 1);
2461 8 : stat->stackpos = 0;
2462 :
2463 8 : node = stat->root;
2464 : /* find leftmost value */
2465 8 : if (node == NULL)
2466 0 : stat->stack[stat->stackpos] = NULL;
2467 : else
2468 : for (;;)
2469 : {
2470 32 : stat->stack[stat->stackpos] = node;
2471 32 : if (node->left)
2472 : {
2473 24 : stat->stackpos++;
2474 24 : node = node->left;
2475 : }
2476 : else
2477 8 : break;
2478 : }
2479 : Assert(stat->stackpos <= stat->maxdepth);
2480 :
2481 8 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2482 0 : elog(ERROR, "return type must be a row type");
2483 8 : funcctx->tuple_desc = tupdesc;
2484 8 : funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2485 :
2486 8 : MemoryContextSwitchTo(oldcontext);
2487 8 : }
2488 :
2489 : static StatEntry *
2490 9152 : walkStatEntryTree(TSVectorStat *stat)
2491 : {
2492 9152 : StatEntry *node = stat->stack[stat->stackpos];
2493 :
2494 9152 : if (node == NULL)
2495 0 : return NULL;
2496 :
2497 9152 : if (node->ndoc != 0)
2498 : {
2499 : /* return entry itself: we already was at left sublink */
2500 2262 : return node;
2501 : }
2502 6890 : else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2503 : {
2504 : /* go on right sublink */
2505 2314 : stat->stackpos++;
2506 2314 : node = node->right;
2507 :
2508 : /* find most-left value */
2509 : for (;;)
2510 : {
2511 4544 : stat->stack[stat->stackpos] = node;
2512 4544 : if (node->left)
2513 : {
2514 2230 : stat->stackpos++;
2515 2230 : node = node->left;
2516 : }
2517 : else
2518 2314 : break;
2519 : }
2520 2314 : Assert(stat->stackpos <= stat->maxdepth);
2521 : }
2522 : else
2523 : {
2524 : /* we already return all left subtree, itself and right subtree */
2525 4576 : if (stat->stackpos == 0)
2526 8 : return NULL;
2527 :
2528 4568 : stat->stackpos--;
2529 4568 : return walkStatEntryTree(stat);
2530 : }
2531 :
2532 2314 : return node;
2533 : }
2534 :
2535 : static Datum
2536 4584 : ts_process_call(FuncCallContext *funcctx)
2537 : {
2538 : TSVectorStat *st;
2539 : StatEntry *entry;
2540 :
2541 4584 : st = (TSVectorStat *) funcctx->user_fctx;
2542 :
2543 4584 : entry = walkStatEntryTree(st);
2544 :
2545 4584 : if (entry != NULL)
2546 : {
2547 : Datum result;
2548 : char *values[3];
2549 : char ndoc[16];
2550 : char nentry[16];
2551 : HeapTuple tuple;
2552 :
2553 4576 : values[0] = palloc(entry->lenlexeme + 1);
2554 4576 : memcpy(values[0], entry->lexeme, entry->lenlexeme);
2555 4576 : (values[0])[entry->lenlexeme] = '\0';
2556 4576 : sprintf(ndoc, "%d", entry->ndoc);
2557 4576 : values[1] = ndoc;
2558 4576 : sprintf(nentry, "%d", entry->nentry);
2559 4576 : values[2] = nentry;
2560 :
2561 4576 : tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2562 4576 : result = HeapTupleGetDatum(tuple);
2563 :
2564 4576 : pfree(values[0]);
2565 :
2566 : /* mark entry as already visited */
2567 4576 : entry->ndoc = 0;
2568 :
2569 4576 : return result;
2570 : }
2571 :
2572 8 : return (Datum) 0;
2573 : }
2574 :
2575 : static TSVectorStat *
2576 8 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2577 : {
2578 8 : char *query = text_to_cstring(txt);
2579 : TSVectorStat *stat;
2580 : bool isnull;
2581 : Portal portal;
2582 : SPIPlanPtr plan;
2583 :
2584 8 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2585 : /* internal error */
2586 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2587 :
2588 8 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2589 : /* internal error */
2590 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2591 :
2592 8 : SPI_cursor_fetch(portal, true, 100);
2593 :
2594 8 : if (SPI_tuptable == NULL ||
2595 8 : SPI_tuptable->tupdesc->natts != 1 ||
2596 8 : !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
2597 : TSVECTOROID))
2598 0 : ereport(ERROR,
2599 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2600 : errmsg("ts_stat query must return one tsvector column")));
2601 :
2602 8 : stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2603 8 : stat->maxdepth = 1;
2604 :
2605 8 : if (ws)
2606 : {
2607 : char *buf;
2608 : const char *end;
2609 :
2610 4 : buf = VARDATA_ANY(ws);
2611 4 : end = buf + VARSIZE_ANY_EXHDR(ws);
2612 12 : while (buf < end)
2613 : {
2614 8 : int len = pg_mblen_range(buf, end);
2615 :
2616 8 : if (len == 1)
2617 : {
2618 8 : switch (*buf)
2619 : {
2620 4 : case 'A':
2621 : case 'a':
2622 4 : stat->weight |= 1 << 3;
2623 4 : break;
2624 4 : case 'B':
2625 : case 'b':
2626 4 : stat->weight |= 1 << 2;
2627 4 : break;
2628 0 : case 'C':
2629 : case 'c':
2630 0 : stat->weight |= 1 << 1;
2631 0 : break;
2632 0 : case 'D':
2633 : case 'd':
2634 0 : stat->weight |= 1;
2635 0 : break;
2636 0 : default:
2637 0 : stat->weight |= 0;
2638 : }
2639 : }
2640 8 : buf += len;
2641 : }
2642 : }
2643 :
2644 56 : while (SPI_processed > 0)
2645 : {
2646 : uint64 i;
2647 :
2648 4120 : for (i = 0; i < SPI_processed; i++)
2649 : {
2650 4072 : Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
2651 :
2652 4072 : if (!isnull)
2653 4072 : stat = ts_accum(persistentContext, stat, data);
2654 : }
2655 :
2656 48 : SPI_freetuptable(SPI_tuptable);
2657 48 : SPI_cursor_fetch(portal, true, 100);
2658 : }
2659 :
2660 8 : SPI_freetuptable(SPI_tuptable);
2661 8 : SPI_cursor_close(portal);
2662 8 : SPI_freeplan(plan);
2663 8 : pfree(query);
2664 :
2665 8 : return stat;
2666 : }
2667 :
2668 : Datum
2669 4576 : ts_stat1(PG_FUNCTION_ARGS)
2670 : {
2671 : FuncCallContext *funcctx;
2672 : Datum result;
2673 :
2674 4576 : if (SRF_IS_FIRSTCALL())
2675 : {
2676 : TSVectorStat *stat;
2677 4 : text *txt = PG_GETARG_TEXT_PP(0);
2678 :
2679 4 : funcctx = SRF_FIRSTCALL_INIT();
2680 4 : SPI_connect();
2681 4 : stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2682 4 : PG_FREE_IF_COPY(txt, 0);
2683 4 : ts_setup_firstcall(fcinfo, funcctx, stat);
2684 4 : SPI_finish();
2685 : }
2686 :
2687 4576 : funcctx = SRF_PERCALL_SETUP();
2688 4576 : if ((result = ts_process_call(funcctx)) != (Datum) 0)
2689 4572 : SRF_RETURN_NEXT(funcctx, result);
2690 4 : SRF_RETURN_DONE(funcctx);
2691 : }
2692 :
2693 : Datum
2694 8 : ts_stat2(PG_FUNCTION_ARGS)
2695 : {
2696 : FuncCallContext *funcctx;
2697 : Datum result;
2698 :
2699 8 : if (SRF_IS_FIRSTCALL())
2700 : {
2701 : TSVectorStat *stat;
2702 4 : text *txt = PG_GETARG_TEXT_PP(0);
2703 4 : text *ws = PG_GETARG_TEXT_PP(1);
2704 :
2705 4 : funcctx = SRF_FIRSTCALL_INIT();
2706 4 : SPI_connect();
2707 4 : stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2708 4 : PG_FREE_IF_COPY(txt, 0);
2709 4 : PG_FREE_IF_COPY(ws, 1);
2710 4 : ts_setup_firstcall(fcinfo, funcctx, stat);
2711 4 : SPI_finish();
2712 : }
2713 :
2714 8 : funcctx = SRF_PERCALL_SETUP();
2715 8 : if ((result = ts_process_call(funcctx)) != (Datum) 0)
2716 4 : SRF_RETURN_NEXT(funcctx, result);
2717 4 : SRF_RETURN_DONE(funcctx);
2718 : }
2719 :
2720 :
2721 : /*
2722 : * Triggers for automatic update of a tsvector column from text column(s)
2723 : *
2724 : * Trigger arguments are either
2725 : * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2726 : * name of tsvector col, name of regconfig col, name(s) of text col(s)
2727 : * ie, tsconfig can either be specified by name, or indirectly as the
2728 : * contents of a regconfig field in the row. If the name is used, it must
2729 : * be explicitly schema-qualified.
2730 : */
2731 : Datum
2732 12 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
2733 : {
2734 12 : return tsvector_update_trigger(fcinfo, false);
2735 : }
2736 :
2737 : Datum
2738 0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
2739 : {
2740 0 : return tsvector_update_trigger(fcinfo, true);
2741 : }
2742 :
2743 : static Datum
2744 12 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
2745 : {
2746 : TriggerData *trigdata;
2747 : Trigger *trigger;
2748 : Relation rel;
2749 12 : HeapTuple rettuple = NULL;
2750 : int tsvector_attr_num,
2751 : i;
2752 : ParsedText prs;
2753 : Datum datum;
2754 : bool isnull;
2755 : text *txt;
2756 : Oid cfgId;
2757 : bool update_needed;
2758 :
2759 : /* Check call context */
2760 12 : if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2761 0 : elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2762 :
2763 12 : trigdata = (TriggerData *) fcinfo->context;
2764 12 : if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2765 0 : elog(ERROR, "tsvector_update_trigger: must be fired for row");
2766 12 : if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2767 0 : elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2768 :
2769 12 : if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2770 : {
2771 8 : rettuple = trigdata->tg_trigtuple;
2772 8 : update_needed = true;
2773 : }
2774 4 : else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2775 : {
2776 4 : rettuple = trigdata->tg_newtuple;
2777 4 : update_needed = false; /* computed below */
2778 : }
2779 : else
2780 0 : elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2781 :
2782 12 : trigger = trigdata->tg_trigger;
2783 12 : rel = trigdata->tg_relation;
2784 :
2785 12 : if (trigger->tgnargs < 3)
2786 0 : elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2787 :
2788 : /* Find the target tsvector column */
2789 12 : tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2790 12 : if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2791 0 : ereport(ERROR,
2792 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2793 : errmsg("tsvector column \"%s\" does not exist",
2794 : trigger->tgargs[0])));
2795 : /* This will effectively reject system columns, so no separate test: */
2796 12 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2797 : TSVECTOROID))
2798 0 : ereport(ERROR,
2799 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2800 : errmsg("column \"%s\" is not of tsvector type",
2801 : trigger->tgargs[0])));
2802 :
2803 : /* Find the configuration to use */
2804 12 : if (config_column)
2805 : {
2806 : int config_attr_num;
2807 :
2808 0 : config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2809 0 : if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2810 0 : ereport(ERROR,
2811 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2812 : errmsg("configuration column \"%s\" does not exist",
2813 : trigger->tgargs[1])));
2814 0 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2815 : REGCONFIGOID))
2816 0 : ereport(ERROR,
2817 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2818 : errmsg("column \"%s\" is not of regconfig type",
2819 : trigger->tgargs[1])));
2820 :
2821 0 : datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2822 0 : if (isnull)
2823 0 : ereport(ERROR,
2824 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2825 : errmsg("configuration column \"%s\" must not be null",
2826 : trigger->tgargs[1])));
2827 0 : cfgId = DatumGetObjectId(datum);
2828 : }
2829 : else
2830 : {
2831 : List *names;
2832 :
2833 12 : names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
2834 : /* require a schema so that results are not search path dependent */
2835 12 : if (list_length(names) < 2)
2836 0 : ereport(ERROR,
2837 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2838 : errmsg("text search configuration name \"%s\" must be schema-qualified",
2839 : trigger->tgargs[1])));
2840 12 : cfgId = get_ts_config_oid(names, false);
2841 : }
2842 :
2843 : /* initialize parse state */
2844 12 : prs.lenwords = 32;
2845 12 : prs.curwords = 0;
2846 12 : prs.pos = 0;
2847 12 : prs.words = palloc_array(ParsedWord, prs.lenwords);
2848 :
2849 : /* find all words in indexable column(s) */
2850 24 : for (i = 2; i < trigger->tgnargs; i++)
2851 : {
2852 : int numattr;
2853 :
2854 12 : numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2855 12 : if (numattr == SPI_ERROR_NOATTRIBUTE)
2856 0 : ereport(ERROR,
2857 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2858 : errmsg("column \"%s\" does not exist",
2859 : trigger->tgargs[i])));
2860 12 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2861 0 : ereport(ERROR,
2862 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2863 : errmsg("column \"%s\" is not of a character type",
2864 : trigger->tgargs[i])));
2865 :
2866 12 : if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
2867 4 : update_needed = true;
2868 :
2869 12 : datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2870 12 : if (isnull)
2871 4 : continue;
2872 :
2873 8 : txt = DatumGetTextPP(datum);
2874 :
2875 8 : parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2876 :
2877 8 : if (txt != (text *) DatumGetPointer(datum))
2878 0 : pfree(txt);
2879 : }
2880 :
2881 12 : if (update_needed)
2882 : {
2883 : /* make tsvector value */
2884 12 : datum = TSVectorGetDatum(make_tsvector(&prs));
2885 12 : isnull = false;
2886 :
2887 : /* and insert it into tuple */
2888 12 : rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2889 : 1, &tsvector_attr_num,
2890 : &datum, &isnull);
2891 :
2892 12 : pfree(DatumGetPointer(datum));
2893 : }
2894 :
2895 12 : return PointerGetDatum(rettuple);
2896 : }
|