Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsvector_op.c
4 : * operations over tsvector
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsvector_op.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include <limits.h>
17 :
18 : #include "access/htup_details.h"
19 : #include "catalog/namespace.h"
20 : #include "catalog/pg_type.h"
21 : #include "commands/trigger.h"
22 : #include "common/int.h"
23 : #include "executor/spi.h"
24 : #include "funcapi.h"
25 : #include "lib/qunique.h"
26 : #include "mb/pg_wchar.h"
27 : #include "miscadmin.h"
28 : #include "parser/parse_coerce.h"
29 : #include "tsearch/ts_utils.h"
30 : #include "utils/array.h"
31 : #include "utils/builtins.h"
32 : #include "utils/regproc.h"
33 : #include "utils/rel.h"
34 :
35 :
36 : typedef struct
37 : {
38 : WordEntry *arrb;
39 : WordEntry *arre;
40 : char *values;
41 : char *operand;
42 : } CHKVAL;
43 :
44 :
45 : typedef struct StatEntry
46 : {
47 : uint32 ndoc; /* zero indicates that we were already here
48 : * while walking through the tree */
49 : uint32 nentry;
50 : struct StatEntry *left;
51 : struct StatEntry *right;
52 : uint32 lenlexeme;
53 : char lexeme[FLEXIBLE_ARRAY_MEMBER];
54 : } StatEntry;
55 :
56 : #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
57 :
58 : typedef struct
59 : {
60 : int32 weight;
61 :
62 : uint32 maxdepth;
63 :
64 : StatEntry **stack;
65 : uint32 stackpos;
66 :
67 : StatEntry *root;
68 : } TSVectorStat;
69 :
70 :
71 : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
72 : uint32 flags,
73 : TSExecuteCallback chkcond);
74 : static bool TS_execute_locations_recurse(QueryItem *curitem,
75 : void *arg,
76 : TSExecuteCallback chkcond,
77 : List **locations);
78 : static int tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len);
79 : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
80 :
81 :
82 : /*
83 : * Order: haspos, len, word, for all positions (pos, weight)
84 : */
85 : static int
86 233 : silly_cmp_tsvector(const TSVectorData *a, const TSVectorData *b)
87 : {
88 233 : if (VARSIZE(a) < VARSIZE(b))
89 0 : return -1;
90 233 : else if (VARSIZE(a) > VARSIZE(b))
91 0 : return 1;
92 233 : else if (a->size < b->size)
93 0 : return -1;
94 233 : else if (a->size > b->size)
95 0 : return 1;
96 : else
97 : {
98 233 : const WordEntry *aptr = ARRPTR(a);
99 233 : const WordEntry *bptr = ARRPTR(b);
100 233 : int i = 0;
101 : int res;
102 :
103 :
104 268 : for (i = 0; i < a->size; i++)
105 : {
106 235 : if (aptr->haspos != bptr->haspos)
107 : {
108 0 : return (aptr->haspos > bptr->haspos) ? -1 : 1;
109 : }
110 235 : else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
111 : {
112 200 : return res;
113 : }
114 35 : else if (aptr->haspos)
115 : {
116 32 : WordEntryPos *ap = POSDATAPTR(a, aptr);
117 32 : WordEntryPos *bp = POSDATAPTR(b, bptr);
118 : int j;
119 :
120 32 : if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
121 0 : return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
122 :
123 64 : for (j = 0; j < POSDATALEN(a, aptr); j++)
124 : {
125 32 : if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
126 : {
127 0 : return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
128 : }
129 32 : else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
130 : {
131 0 : return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
132 : }
133 32 : ap++, bp++;
134 : }
135 : }
136 :
137 35 : aptr++;
138 35 : bptr++;
139 : }
140 : }
141 :
142 33 : return 0;
143 : }
144 :
145 : #define TSVECTORCMPFUNC( type, action, ret ) \
146 : Datum \
147 : tsvector_##type(PG_FUNCTION_ARGS) \
148 : { \
149 : TSVector a = PG_GETARG_TSVECTOR(0); \
150 : TSVector b = PG_GETARG_TSVECTOR(1); \
151 : int res = silly_cmp_tsvector(a, b); \
152 : PG_FREE_IF_COPY(a,0); \
153 : PG_FREE_IF_COPY(b,1); \
154 : PG_RETURN_##ret( res action 0 ); \
155 : } \
156 : /* keep compiler quiet - no extra ; */ \
157 : extern int no_such_variable
158 :
159 0 : TSVECTORCMPFUNC(lt, <, BOOL);
160 0 : TSVECTORCMPFUNC(le, <=, BOOL);
161 1 : TSVECTORCMPFUNC(eq, ==, BOOL);
162 0 : TSVECTORCMPFUNC(ge, >=, BOOL);
163 0 : TSVECTORCMPFUNC(gt, >, BOOL);
164 0 : TSVECTORCMPFUNC(ne, !=, BOOL);
165 232 : TSVECTORCMPFUNC(cmp, +, INT32);
166 :
167 : Datum
168 73 : tsvector_strip(PG_FUNCTION_ARGS)
169 : {
170 73 : TSVector in = PG_GETARG_TSVECTOR(0);
171 : TSVector out;
172 : int i,
173 73 : len = 0;
174 73 : WordEntry *arrin = ARRPTR(in),
175 : *arrout;
176 : char *cur;
177 :
178 261 : for (i = 0; i < in->size; i++)
179 188 : len += arrin[i].len;
180 :
181 73 : len = CALCDATASIZE(in->size, len);
182 73 : out = (TSVector) palloc0(len);
183 73 : SET_VARSIZE(out, len);
184 73 : out->size = in->size;
185 73 : arrout = ARRPTR(out);
186 73 : cur = STRPTR(out);
187 261 : for (i = 0; i < in->size; i++)
188 : {
189 188 : memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
190 188 : arrout[i].haspos = 0;
191 188 : arrout[i].len = arrin[i].len;
192 188 : arrout[i].pos = cur - STRPTR(out);
193 188 : cur += arrout[i].len;
194 : }
195 :
196 73 : PG_FREE_IF_COPY(in, 0);
197 73 : PG_RETURN_POINTER(out);
198 : }
199 :
200 : Datum
201 7 : tsvector_length(PG_FUNCTION_ARGS)
202 : {
203 7 : TSVector in = PG_GETARG_TSVECTOR(0);
204 7 : int32 ret = in->size;
205 :
206 7 : PG_FREE_IF_COPY(in, 0);
207 7 : PG_RETURN_INT32(ret);
208 : }
209 :
210 : static int
211 48 : parse_weight(char cw)
212 : {
213 : int w;
214 :
215 48 : switch (cw)
216 : {
217 14 : case 'A':
218 : case 'a':
219 14 : w = 3;
220 14 : break;
221 4 : case 'B':
222 : case 'b':
223 4 : w = 2;
224 4 : break;
225 30 : case 'C':
226 : case 'c':
227 30 : w = 1;
228 30 : break;
229 0 : case 'D':
230 : case 'd':
231 0 : w = 0;
232 0 : break;
233 0 : default:
234 : /* Avoid printing non-ASCII bytes, else we have encoding issues */
235 0 : if (cw >= ' ' && cw < 0x7f)
236 0 : ereport(ERROR,
237 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
238 : errmsg("unrecognized weight: \"%c\"", cw)));
239 : else /* use \ooo format, like charout() */
240 0 : ereport(ERROR,
241 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
242 : errmsg("unrecognized weight: \"\\%03o\"",
243 : (unsigned char) cw)));
244 : }
245 48 : return w;
246 : }
247 :
248 :
249 : Datum
250 10 : tsvector_setweight(PG_FUNCTION_ARGS)
251 : {
252 10 : TSVector in = PG_GETARG_TSVECTOR(0);
253 10 : char cw = PG_GETARG_CHAR(1);
254 : TSVector out;
255 : int i,
256 : j;
257 : WordEntry *entry;
258 : WordEntryPos *p;
259 10 : int w = parse_weight(cw);
260 :
261 10 : out = (TSVector) palloc(VARSIZE(in));
262 10 : memcpy(out, in, VARSIZE(in));
263 10 : entry = ARRPTR(out);
264 10 : i = out->size;
265 50 : while (i--)
266 : {
267 40 : if ((j = POSDATALEN(out, entry)) != 0)
268 : {
269 40 : p = POSDATAPTR(out, entry);
270 140 : while (j--)
271 : {
272 100 : WEP_SETWEIGHT(*p, w);
273 100 : p++;
274 : }
275 : }
276 40 : entry++;
277 : }
278 :
279 10 : PG_FREE_IF_COPY(in, 0);
280 10 : PG_RETURN_POINTER(out);
281 : }
282 :
283 : /*
284 : * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
285 : *
286 : * Assign weight w to elements of tsin that are listed in lexemes.
287 : */
288 : Datum
289 20 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
290 : {
291 20 : TSVector tsin = PG_GETARG_TSVECTOR(0);
292 20 : char char_weight = PG_GETARG_CHAR(1);
293 20 : ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
294 :
295 : TSVector tsout;
296 : int i,
297 : j,
298 : nlexemes,
299 : weight;
300 : WordEntry *entry;
301 : Datum *dlexemes;
302 : bool *nulls;
303 :
304 20 : weight = parse_weight(char_weight);
305 :
306 20 : tsout = (TSVector) palloc(VARSIZE(tsin));
307 20 : memcpy(tsout, tsin, VARSIZE(tsin));
308 20 : entry = ARRPTR(tsout);
309 :
310 20 : deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes);
311 :
312 : /*
313 : * Assuming that lexemes array is significantly shorter than tsvector we
314 : * can iterate through lexemes performing binary search of each lexeme
315 : * from lexemes in tsvector.
316 : */
317 60 : for (i = 0; i < nlexemes; i++)
318 : {
319 : char *lex;
320 : int lex_len,
321 : lex_pos;
322 :
323 : /* Ignore null array elements, they surely don't match */
324 40 : if (nulls[i])
325 5 : continue;
326 :
327 35 : lex = VARDATA(DatumGetPointer(dlexemes[i]));
328 35 : lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
329 35 : lex_pos = tsvector_bsearch(tsout, lex, lex_len);
330 :
331 35 : if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
332 : {
333 20 : WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
334 :
335 65 : while (j--)
336 : {
337 45 : WEP_SETWEIGHT(*p, weight);
338 45 : p++;
339 : }
340 : }
341 : }
342 :
343 20 : PG_FREE_IF_COPY(tsin, 0);
344 20 : PG_FREE_IF_COPY(lexemes, 2);
345 :
346 20 : PG_RETURN_POINTER(tsout);
347 : }
348 :
349 : #define compareEntry(pa, a, pb, b) \
350 : tsCompareString((pa) + (a)->pos, (a)->len, \
351 : (pb) + (b)->pos, (b)->len, \
352 : false)
353 :
354 : /*
355 : * Add positions from src to dest after offsetting them by maxpos.
356 : * Return the number added (might be less than expected due to overflow)
357 : */
358 : static int32
359 10 : add_pos(TSVector src, WordEntry *srcptr,
360 : TSVector dest, WordEntry *destptr,
361 : int32 maxpos)
362 : {
363 10 : uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
364 : int i;
365 10 : uint16 slen = POSDATALEN(src, srcptr),
366 : startlen;
367 10 : WordEntryPos *spos = POSDATAPTR(src, srcptr),
368 10 : *dpos = POSDATAPTR(dest, destptr);
369 :
370 10 : if (!destptr->haspos)
371 0 : *clen = 0;
372 :
373 10 : startlen = *clen;
374 10 : for (i = 0;
375 20 : i < slen && *clen < MAXNUMPOS &&
376 10 : (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
377 10 : i++)
378 : {
379 10 : WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
380 10 : WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
381 10 : (*clen)++;
382 : }
383 :
384 10 : if (*clen != startlen)
385 10 : destptr->haspos = 1;
386 10 : return *clen - startlen;
387 : }
388 :
389 : /*
390 : * Perform binary search of given lexeme in TSVector.
391 : * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
392 : * found.
393 : */
394 : static int
395 165 : tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len)
396 : {
397 165 : const WordEntry *arrin = ARRPTR(tsv);
398 165 : int StopLow = 0,
399 165 : StopHigh = tsv->size,
400 : StopMiddle,
401 : cmp;
402 :
403 435 : while (StopLow < StopHigh)
404 : {
405 385 : StopMiddle = (StopLow + StopHigh) / 2;
406 :
407 385 : cmp = tsCompareString(lexeme, lexeme_len,
408 385 : STRPTR(tsv) + arrin[StopMiddle].pos,
409 385 : arrin[StopMiddle].len,
410 : false);
411 :
412 385 : if (cmp < 0)
413 180 : StopHigh = StopMiddle;
414 205 : else if (cmp > 0)
415 90 : StopLow = StopMiddle + 1;
416 : else /* found it */
417 115 : return StopMiddle;
418 : }
419 :
420 50 : return -1;
421 : }
422 :
423 : /*
424 : * qsort comparator functions
425 : */
426 :
427 : static int
428 65 : compare_int(const void *va, const void *vb)
429 : {
430 65 : int a = *((const int *) va);
431 65 : int b = *((const int *) vb);
432 :
433 65 : return pg_cmp_s32(a, b);
434 : }
435 :
436 : static int
437 85 : compare_text_lexemes(const void *va, const void *vb)
438 : {
439 85 : Datum a = *((const Datum *) va);
440 85 : Datum b = *((const Datum *) vb);
441 85 : char *alex = VARDATA_ANY(DatumGetPointer(a));
442 85 : int alex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(a));
443 85 : char *blex = VARDATA_ANY(DatumGetPointer(b));
444 85 : int blex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(b));
445 :
446 85 : return tsCompareString(alex, alex_len, blex, blex_len, false);
447 : }
448 :
449 : /*
450 : * Internal routine to delete lexemes from TSVector by array of offsets.
451 : *
452 : * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
453 : * int indices_count -- size of that array
454 : *
455 : * Returns new TSVector without given lexemes along with their positions
456 : * and weights.
457 : */
458 : static TSVector
459 55 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
460 : int indices_count)
461 : {
462 : TSVector tsout;
463 55 : WordEntry *arrin = ARRPTR(tsv),
464 : *arrout;
465 55 : char *data = STRPTR(tsv),
466 : *dataout;
467 : int i, /* index in arrin */
468 : j, /* index in arrout */
469 : k, /* index in indices_to_delete */
470 : curoff; /* index in dataout area */
471 :
472 : /*
473 : * Sort the filter array to simplify membership checks below. Also, get
474 : * rid of any duplicate entries, so that we can assume that indices_count
475 : * is exactly equal to the number of lexemes that will be removed.
476 : */
477 55 : if (indices_count > 1)
478 : {
479 25 : qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
480 25 : indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
481 : compare_int);
482 : }
483 :
484 : /*
485 : * Here we overestimate tsout size, since we don't know how much space is
486 : * used by the deleted lexeme(s). We will set exact size below.
487 : */
488 55 : tsout = (TSVector) palloc0(VARSIZE(tsv));
489 :
490 : /* This count must be correct because STRPTR(tsout) relies on it. */
491 55 : tsout->size = tsv->size - indices_count;
492 :
493 : /*
494 : * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
495 : */
496 55 : arrout = ARRPTR(tsout);
497 55 : dataout = STRPTR(tsout);
498 55 : curoff = 0;
499 330 : for (i = j = k = 0; i < tsv->size; i++)
500 : {
501 : /*
502 : * If current i is present in indices_to_delete, skip this lexeme.
503 : * Since indices_to_delete is already sorted, we only need to check
504 : * the current (k'th) entry.
505 : */
506 275 : if (k < indices_count && i == indices_to_delete[k])
507 : {
508 80 : k++;
509 80 : continue;
510 : }
511 :
512 : /* Copy lexeme and its positions and weights */
513 195 : memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
514 195 : arrout[j].haspos = arrin[i].haspos;
515 195 : arrout[j].len = arrin[i].len;
516 195 : arrout[j].pos = curoff;
517 195 : curoff += arrin[i].len;
518 195 : if (arrin[i].haspos)
519 : {
520 130 : int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
521 130 : + sizeof(uint16);
522 :
523 130 : curoff = SHORTALIGN(curoff);
524 130 : memcpy(dataout + curoff,
525 130 : STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
526 : len);
527 130 : curoff += len;
528 : }
529 :
530 195 : j++;
531 : }
532 :
533 : /*
534 : * k should now be exactly equal to indices_count. If it isn't then the
535 : * caller provided us with indices outside of [0, tsv->size) range and
536 : * estimation of tsout's size is wrong.
537 : */
538 : Assert(k == indices_count);
539 :
540 55 : SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
541 55 : return tsout;
542 : }
543 :
544 : /*
545 : * Delete given lexeme from tsvector.
546 : * Implementation of user-level ts_delete(tsvector, text).
547 : */
548 : Datum
549 30 : tsvector_delete_str(PG_FUNCTION_ARGS)
550 : {
551 30 : TSVector tsin = PG_GETARG_TSVECTOR(0),
552 : tsout;
553 30 : text *tlexeme = PG_GETARG_TEXT_PP(1);
554 30 : char *lexeme = VARDATA_ANY(tlexeme);
555 30 : int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
556 : skip_index;
557 :
558 30 : if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
559 10 : PG_RETURN_POINTER(tsin);
560 :
561 20 : tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
562 :
563 20 : PG_FREE_IF_COPY(tsin, 0);
564 20 : PG_FREE_IF_COPY(tlexeme, 1);
565 20 : PG_RETURN_POINTER(tsout);
566 : }
567 :
568 : /*
569 : * Delete given array of lexemes from tsvector.
570 : * Implementation of user-level ts_delete(tsvector, text[]).
571 : */
572 : Datum
573 35 : tsvector_delete_arr(PG_FUNCTION_ARGS)
574 : {
575 35 : TSVector tsin = PG_GETARG_TSVECTOR(0),
576 : tsout;
577 35 : ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
578 : int i,
579 : nlex,
580 : skip_count,
581 : *skip_indices;
582 : Datum *dlexemes;
583 : bool *nulls;
584 :
585 35 : deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex);
586 :
587 : /*
588 : * In typical use case array of lexemes to delete is relatively small. So
589 : * here we optimize things for that scenario: iterate through lexarr
590 : * performing binary search of each lexeme from lexarr in tsvector.
591 : */
592 35 : skip_indices = palloc0(nlex * sizeof(int));
593 140 : for (i = skip_count = 0; i < nlex; i++)
594 : {
595 : char *lex;
596 : int lex_len,
597 : lex_pos;
598 :
599 : /* Ignore null array elements, they surely don't match */
600 105 : if (nulls[i])
601 5 : continue;
602 :
603 100 : lex = VARDATA(DatumGetPointer(dlexemes[i]));
604 100 : lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
605 100 : lex_pos = tsvector_bsearch(tsin, lex, lex_len);
606 :
607 100 : if (lex_pos >= 0)
608 65 : skip_indices[skip_count++] = lex_pos;
609 : }
610 :
611 35 : tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
612 :
613 35 : pfree(skip_indices);
614 35 : PG_FREE_IF_COPY(tsin, 0);
615 35 : PG_FREE_IF_COPY(lexemes, 1);
616 :
617 35 : PG_RETURN_POINTER(tsout);
618 : }
619 :
620 : /*
621 : * Expand tsvector as table with following columns:
622 : * lexeme: lexeme text
623 : * positions: integer array of lexeme positions
624 : * weights: char array of weights corresponding to positions
625 : */
626 : Datum
627 120 : tsvector_unnest(PG_FUNCTION_ARGS)
628 : {
629 : FuncCallContext *funcctx;
630 : TSVector tsin;
631 :
632 120 : if (SRF_IS_FIRSTCALL())
633 : {
634 : MemoryContext oldcontext;
635 : TupleDesc tupdesc;
636 :
637 20 : funcctx = SRF_FIRSTCALL_INIT();
638 20 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
639 :
640 20 : tupdesc = CreateTemplateTupleDesc(3);
641 20 : TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
642 : TEXTOID, -1, 0);
643 20 : TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
644 : INT2ARRAYOID, -1, 0);
645 20 : TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
646 : TEXTARRAYOID, -1, 0);
647 20 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
648 0 : elog(ERROR, "return type must be a row type");
649 20 : TupleDescFinalize(tupdesc);
650 20 : funcctx->tuple_desc = tupdesc;
651 :
652 20 : funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
653 :
654 20 : MemoryContextSwitchTo(oldcontext);
655 : }
656 :
657 120 : funcctx = SRF_PERCALL_SETUP();
658 120 : tsin = (TSVector) funcctx->user_fctx;
659 :
660 120 : if (funcctx->call_cntr < tsin->size)
661 : {
662 100 : WordEntry *arrin = ARRPTR(tsin);
663 100 : char *data = STRPTR(tsin);
664 : HeapTuple tuple;
665 : int j,
666 100 : i = funcctx->call_cntr;
667 100 : bool nulls[] = {false, false, false};
668 : Datum values[3];
669 :
670 100 : values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
671 :
672 100 : if (arrin[i].haspos)
673 : {
674 : WordEntryPosVector *posv;
675 : Datum *positions;
676 : Datum *weights;
677 : char weight;
678 :
679 : /*
680 : * Internally tsvector stores position and weight in the same
681 : * uint16 (2 bits for weight, 14 for position). Here we extract
682 : * that in two separate arrays.
683 : */
684 60 : posv = _POSVECPTR(tsin, arrin + i);
685 60 : positions = palloc(posv->npos * sizeof(Datum));
686 60 : weights = palloc(posv->npos * sizeof(Datum));
687 168 : for (j = 0; j < posv->npos; j++)
688 : {
689 108 : positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
690 108 : weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
691 108 : weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
692 : 1));
693 : }
694 :
695 60 : values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID));
696 60 : values[2] = PointerGetDatum(construct_array_builtin(weights, posv->npos, TEXTOID));
697 : }
698 : else
699 : {
700 40 : nulls[1] = nulls[2] = true;
701 : }
702 :
703 100 : tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
704 100 : SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
705 : }
706 : else
707 : {
708 20 : SRF_RETURN_DONE(funcctx);
709 : }
710 : }
711 :
712 : /*
713 : * Convert tsvector to array of lexemes.
714 : */
715 : Datum
716 10 : tsvector_to_array(PG_FUNCTION_ARGS)
717 : {
718 10 : TSVector tsin = PG_GETARG_TSVECTOR(0);
719 10 : WordEntry *arrin = ARRPTR(tsin);
720 : Datum *elements;
721 : int i;
722 : ArrayType *array;
723 :
724 10 : elements = palloc(tsin->size * sizeof(Datum));
725 :
726 60 : for (i = 0; i < tsin->size; i++)
727 : {
728 50 : elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
729 : arrin[i].len));
730 : }
731 :
732 10 : array = construct_array_builtin(elements, tsin->size, TEXTOID);
733 :
734 10 : pfree(elements);
735 10 : PG_FREE_IF_COPY(tsin, 0);
736 10 : PG_RETURN_POINTER(array);
737 : }
738 :
739 : /*
740 : * Build tsvector from array of lexemes.
741 : */
742 : Datum
743 18 : array_to_tsvector(PG_FUNCTION_ARGS)
744 : {
745 18 : ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
746 : TSVector tsout;
747 : Datum *dlexemes;
748 : WordEntry *arrout;
749 : bool *nulls;
750 : int nitems,
751 : i,
752 : tslen,
753 18 : datalen = 0;
754 : char *cur;
755 :
756 18 : deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems);
757 :
758 : /*
759 : * Reject nulls and zero length strings (maybe we should just ignore them,
760 : * instead?)
761 : */
762 95 : for (i = 0; i < nitems; i++)
763 : {
764 85 : if (nulls[i])
765 4 : ereport(ERROR,
766 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
767 : errmsg("lexeme array may not contain nulls")));
768 :
769 81 : if (VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ == 0)
770 4 : ereport(ERROR,
771 : (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
772 : errmsg("lexeme array may not contain empty strings")));
773 : }
774 :
775 : /* Sort and de-dup, because this is required for a valid tsvector. */
776 10 : if (nitems > 1)
777 : {
778 10 : qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
779 10 : nitems = qunique(dlexemes, nitems, sizeof(Datum),
780 : compare_text_lexemes);
781 : }
782 :
783 : /* Calculate space needed for surviving lexemes. */
784 50 : for (i = 0; i < nitems; i++)
785 40 : datalen += VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
786 10 : tslen = CALCDATASIZE(nitems, datalen);
787 :
788 : /* Allocate and fill tsvector. */
789 10 : tsout = (TSVector) palloc0(tslen);
790 10 : SET_VARSIZE(tsout, tslen);
791 10 : tsout->size = nitems;
792 :
793 10 : arrout = ARRPTR(tsout);
794 10 : cur = STRPTR(tsout);
795 50 : for (i = 0; i < nitems; i++)
796 : {
797 40 : char *lex = VARDATA(DatumGetPointer(dlexemes[i]));
798 40 : int lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
799 :
800 40 : memcpy(cur, lex, lex_len);
801 40 : arrout[i].haspos = 0;
802 40 : arrout[i].len = lex_len;
803 40 : arrout[i].pos = cur - STRPTR(tsout);
804 40 : cur += lex_len;
805 : }
806 :
807 10 : PG_FREE_IF_COPY(v, 0);
808 10 : PG_RETURN_POINTER(tsout);
809 : }
810 :
811 : /*
812 : * ts_filter(): keep only lexemes with given weights in tsvector.
813 : */
814 : Datum
815 14 : tsvector_filter(PG_FUNCTION_ARGS)
816 : {
817 14 : TSVector tsin = PG_GETARG_TSVECTOR(0),
818 : tsout;
819 14 : ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1);
820 14 : WordEntry *arrin = ARRPTR(tsin),
821 : *arrout;
822 14 : char *datain = STRPTR(tsin),
823 : *dataout;
824 : Datum *dweights;
825 : bool *nulls;
826 : int nweights;
827 : int i,
828 : j;
829 14 : int cur_pos = 0;
830 14 : char mask = 0;
831 :
832 14 : deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights);
833 :
834 32 : for (i = 0; i < nweights; i++)
835 : {
836 : char char_weight;
837 :
838 22 : if (nulls[i])
839 4 : ereport(ERROR,
840 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
841 : errmsg("weight array may not contain nulls")));
842 :
843 18 : char_weight = DatumGetChar(dweights[i]);
844 18 : mask |= 1 << parse_weight(char_weight);
845 : }
846 :
847 10 : tsout = (TSVector) palloc0(VARSIZE(tsin));
848 10 : tsout->size = tsin->size;
849 10 : arrout = ARRPTR(tsout);
850 10 : dataout = STRPTR(tsout);
851 :
852 90 : for (i = j = 0; i < tsin->size; i++)
853 : {
854 : WordEntryPosVector *posvin,
855 : *posvout;
856 80 : int npos = 0;
857 : int k;
858 :
859 80 : if (!arrin[i].haspos)
860 25 : continue;
861 :
862 55 : posvin = _POSVECPTR(tsin, arrin + i);
863 55 : posvout = (WordEntryPosVector *)
864 55 : (dataout + SHORTALIGN(cur_pos + arrin[i].len));
865 :
866 110 : for (k = 0; k < posvin->npos; k++)
867 : {
868 55 : if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
869 25 : posvout->pos[npos++] = posvin->pos[k];
870 : }
871 :
872 : /* if no satisfactory positions found, skip lexeme */
873 55 : if (!npos)
874 30 : continue;
875 :
876 25 : arrout[j].haspos = true;
877 25 : arrout[j].len = arrin[i].len;
878 25 : arrout[j].pos = cur_pos;
879 :
880 25 : memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
881 25 : posvout->npos = npos;
882 25 : cur_pos += SHORTALIGN(arrin[i].len);
883 25 : cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
884 : sizeof(uint16);
885 25 : j++;
886 : }
887 :
888 10 : tsout->size = j;
889 10 : if (dataout != STRPTR(tsout))
890 10 : memmove(STRPTR(tsout), dataout, cur_pos);
891 :
892 10 : SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
893 :
894 10 : PG_FREE_IF_COPY(tsin, 0);
895 10 : PG_RETURN_POINTER(tsout);
896 : }
897 :
898 : Datum
899 9 : tsvector_concat(PG_FUNCTION_ARGS)
900 : {
901 9 : TSVector in1 = PG_GETARG_TSVECTOR(0);
902 9 : TSVector in2 = PG_GETARG_TSVECTOR(1);
903 : TSVector out;
904 : WordEntry *ptr;
905 : WordEntry *ptr1,
906 : *ptr2;
907 : WordEntryPos *p;
908 9 : int maxpos = 0,
909 : i,
910 : j,
911 : i1,
912 : i2,
913 : dataoff,
914 : output_bytes,
915 : output_size;
916 : char *data,
917 : *data1,
918 : *data2;
919 :
920 : /* Get max position in in1; we'll need this to offset in2's positions */
921 9 : ptr = ARRPTR(in1);
922 9 : i = in1->size;
923 23 : while (i--)
924 : {
925 14 : if ((j = POSDATALEN(in1, ptr)) != 0)
926 : {
927 14 : p = POSDATAPTR(in1, ptr);
928 28 : while (j--)
929 : {
930 14 : if (WEP_GETPOS(*p) > maxpos)
931 9 : maxpos = WEP_GETPOS(*p);
932 14 : p++;
933 : }
934 : }
935 14 : ptr++;
936 : }
937 :
938 9 : ptr1 = ARRPTR(in1);
939 9 : ptr2 = ARRPTR(in2);
940 9 : data1 = STRPTR(in1);
941 9 : data2 = STRPTR(in2);
942 9 : i1 = in1->size;
943 9 : i2 = in2->size;
944 :
945 : /*
946 : * Conservative estimate of space needed. We might need all the data in
947 : * both inputs, and conceivably add a pad byte before position data for
948 : * each item where there was none before.
949 : */
950 9 : output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
951 :
952 9 : out = (TSVector) palloc0(output_bytes);
953 9 : SET_VARSIZE(out, output_bytes);
954 :
955 : /*
956 : * We must make out->size valid so that STRPTR(out) is sensible. We'll
957 : * collapse out any unused space at the end.
958 : */
959 9 : out->size = in1->size + in2->size;
960 :
961 9 : ptr = ARRPTR(out);
962 9 : data = STRPTR(out);
963 9 : dataoff = 0;
964 23 : while (i1 && i2)
965 : {
966 14 : int cmp = compareEntry(data1, ptr1, data2, ptr2);
967 :
968 14 : if (cmp < 0)
969 : { /* in1 first */
970 5 : ptr->haspos = ptr1->haspos;
971 5 : ptr->len = ptr1->len;
972 5 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
973 5 : ptr->pos = dataoff;
974 5 : dataoff += ptr1->len;
975 5 : if (ptr->haspos)
976 : {
977 5 : dataoff = SHORTALIGN(dataoff);
978 5 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
979 5 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
980 : }
981 :
982 5 : ptr++;
983 5 : ptr1++;
984 5 : i1--;
985 : }
986 9 : else if (cmp > 0)
987 : { /* in2 first */
988 4 : ptr->haspos = ptr2->haspos;
989 4 : ptr->len = ptr2->len;
990 4 : memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
991 4 : ptr->pos = dataoff;
992 4 : dataoff += ptr2->len;
993 4 : if (ptr->haspos)
994 : {
995 0 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
996 :
997 0 : if (addlen == 0)
998 0 : ptr->haspos = 0;
999 : else
1000 : {
1001 0 : dataoff = SHORTALIGN(dataoff);
1002 0 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1003 : }
1004 : }
1005 :
1006 4 : ptr++;
1007 4 : ptr2++;
1008 4 : i2--;
1009 : }
1010 : else
1011 : {
1012 5 : ptr->haspos = ptr1->haspos | ptr2->haspos;
1013 5 : ptr->len = ptr1->len;
1014 5 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1015 5 : ptr->pos = dataoff;
1016 5 : dataoff += ptr1->len;
1017 5 : if (ptr->haspos)
1018 : {
1019 5 : if (ptr1->haspos)
1020 : {
1021 5 : dataoff = SHORTALIGN(dataoff);
1022 5 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1023 5 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1024 5 : if (ptr2->haspos)
1025 5 : dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1026 : }
1027 : else /* must have ptr2->haspos */
1028 : {
1029 0 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1030 :
1031 0 : if (addlen == 0)
1032 0 : ptr->haspos = 0;
1033 : else
1034 : {
1035 0 : dataoff = SHORTALIGN(dataoff);
1036 0 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1037 : }
1038 : }
1039 : }
1040 :
1041 5 : ptr++;
1042 5 : ptr1++;
1043 5 : ptr2++;
1044 5 : i1--;
1045 5 : i2--;
1046 : }
1047 : }
1048 :
1049 13 : while (i1)
1050 : {
1051 4 : ptr->haspos = ptr1->haspos;
1052 4 : ptr->len = ptr1->len;
1053 4 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1054 4 : ptr->pos = dataoff;
1055 4 : dataoff += ptr1->len;
1056 4 : if (ptr->haspos)
1057 : {
1058 4 : dataoff = SHORTALIGN(dataoff);
1059 4 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1060 4 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1061 : }
1062 :
1063 4 : ptr++;
1064 4 : ptr1++;
1065 4 : i1--;
1066 : }
1067 :
1068 14 : while (i2)
1069 : {
1070 5 : ptr->haspos = ptr2->haspos;
1071 5 : ptr->len = ptr2->len;
1072 5 : memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1073 5 : ptr->pos = dataoff;
1074 5 : dataoff += ptr2->len;
1075 5 : if (ptr->haspos)
1076 : {
1077 5 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1078 :
1079 5 : if (addlen == 0)
1080 0 : ptr->haspos = 0;
1081 : else
1082 : {
1083 5 : dataoff = SHORTALIGN(dataoff);
1084 5 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1085 : }
1086 : }
1087 :
1088 5 : ptr++;
1089 5 : ptr2++;
1090 5 : i2--;
1091 : }
1092 :
1093 : /*
1094 : * Instead of checking each offset individually, we check for overflow of
1095 : * pos fields once at the end.
1096 : */
1097 9 : if (dataoff > MAXSTRPOS)
1098 0 : ereport(ERROR,
1099 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1100 : errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1101 :
1102 : /*
1103 : * Adjust sizes (asserting that we didn't overrun the original estimates)
1104 : * and collapse out any unused array entries.
1105 : */
1106 9 : output_size = ptr - ARRPTR(out);
1107 : Assert(output_size <= out->size);
1108 9 : out->size = output_size;
1109 9 : if (data != STRPTR(out))
1110 5 : memmove(STRPTR(out), data, dataoff);
1111 9 : output_bytes = CALCDATASIZE(out->size, dataoff);
1112 : Assert(output_bytes <= VARSIZE(out));
1113 9 : SET_VARSIZE(out, output_bytes);
1114 :
1115 9 : PG_FREE_IF_COPY(in1, 0);
1116 9 : PG_FREE_IF_COPY(in2, 1);
1117 9 : PG_RETURN_POINTER(out);
1118 : }
1119 :
1120 : /*
1121 : * Compare two strings by tsvector rules.
1122 : *
1123 : * if prefix = true then it returns zero value iff b has prefix a
1124 : */
1125 : int32
1126 4195612 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1127 : {
1128 : int cmp;
1129 :
1130 4195612 : if (lena == 0)
1131 : {
1132 30 : if (prefix)
1133 0 : cmp = 0; /* empty string is prefix of anything */
1134 : else
1135 30 : cmp = (lenb > 0) ? -1 : 0;
1136 : }
1137 4195582 : else if (lenb == 0)
1138 : {
1139 0 : cmp = (lena > 0) ? 1 : 0;
1140 : }
1141 : else
1142 : {
1143 4195582 : cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
1144 :
1145 4195582 : if (prefix)
1146 : {
1147 11021 : if (cmp == 0 && lena > lenb)
1148 0 : cmp = 1; /* a is longer, so not a prefix of b */
1149 : }
1150 4184561 : else if (cmp == 0 && lena != lenb)
1151 : {
1152 21675 : cmp = (lena < lenb) ? -1 : 1;
1153 : }
1154 : }
1155 :
1156 4195612 : return cmp;
1157 : }
1158 :
1159 : /*
1160 : * Check weight info or/and fill 'data' with the required positions
1161 : */
1162 : static TSTernaryValue
1163 45576 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
1164 : ExecPhraseData *data)
1165 : {
1166 45576 : TSTernaryValue result = TS_NO;
1167 :
1168 : Assert(data == NULL || data->npos == 0);
1169 :
1170 45576 : if (entry->haspos)
1171 : {
1172 : WordEntryPosVector *posvec;
1173 :
1174 : /*
1175 : * We can't use the _POSVECPTR macro here because the pointer to the
1176 : * tsvector's lexeme storage is already contained in chkval->values.
1177 : */
1178 3160 : posvec = (WordEntryPosVector *)
1179 3160 : (chkval->values + SHORTALIGN(entry->pos + entry->len));
1180 :
1181 3160 : if (val->weight && data)
1182 40 : {
1183 40 : WordEntryPos *posvec_iter = posvec->pos;
1184 : WordEntryPos *dptr;
1185 :
1186 : /*
1187 : * Filter position information by weights
1188 : */
1189 40 : dptr = data->pos = palloc_array(WordEntryPos, posvec->npos);
1190 40 : data->allocated = true;
1191 :
1192 : /* Is there a position with a matching weight? */
1193 80 : while (posvec_iter < posvec->pos + posvec->npos)
1194 : {
1195 : /* If true, append this position to the data->pos */
1196 40 : if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1197 : {
1198 20 : *dptr = WEP_GETPOS(*posvec_iter);
1199 20 : dptr++;
1200 : }
1201 :
1202 40 : posvec_iter++;
1203 : }
1204 :
1205 40 : data->npos = dptr - data->pos;
1206 :
1207 40 : if (data->npos > 0)
1208 20 : result = TS_YES;
1209 : else
1210 : {
1211 20 : pfree(data->pos);
1212 20 : data->pos = NULL;
1213 20 : data->allocated = false;
1214 : }
1215 : }
1216 3120 : else if (val->weight)
1217 : {
1218 332 : WordEntryPos *posvec_iter = posvec->pos;
1219 :
1220 : /* Is there a position with a matching weight? */
1221 503 : while (posvec_iter < posvec->pos + posvec->npos)
1222 : {
1223 372 : if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1224 : {
1225 201 : result = TS_YES;
1226 201 : break; /* no need to go further */
1227 : }
1228 :
1229 171 : posvec_iter++;
1230 : }
1231 : }
1232 2788 : else if (data)
1233 : {
1234 1645 : data->npos = posvec->npos;
1235 1645 : data->pos = posvec->pos;
1236 1645 : data->allocated = false;
1237 1645 : result = TS_YES;
1238 : }
1239 : else
1240 : {
1241 : /* simplest case: no weight check, positions not needed */
1242 1143 : result = TS_YES;
1243 : }
1244 : }
1245 : else
1246 : {
1247 : /*
1248 : * Position info is lacking, so if the caller requires it, we can only
1249 : * say that maybe there is a match.
1250 : *
1251 : * Notice, however, that we *don't* check val->weight here.
1252 : * Historically, stripped tsvectors are considered to match queries
1253 : * whether or not the query has a weight restriction; that's a little
1254 : * dubious but we'll preserve the behavior.
1255 : */
1256 42416 : if (data)
1257 15385 : result = TS_MAYBE;
1258 : else
1259 27031 : result = TS_YES;
1260 : }
1261 :
1262 45576 : return result;
1263 : }
1264 :
1265 : /*
1266 : * TS_execute callback for matching a tsquery operand to plain tsvector data
1267 : */
1268 : static TSTernaryValue
1269 189554 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
1270 : {
1271 189554 : CHKVAL *chkval = (CHKVAL *) checkval;
1272 189554 : WordEntry *StopLow = chkval->arrb;
1273 189554 : WordEntry *StopHigh = chkval->arre;
1274 189554 : WordEntry *StopMiddle = StopHigh;
1275 189554 : TSTernaryValue res = TS_NO;
1276 :
1277 : /* Loop invariant: StopLow <= val < StopHigh */
1278 1191598 : while (StopLow < StopHigh)
1279 : {
1280 : int difference;
1281 :
1282 1037564 : StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1283 1037564 : difference = tsCompareString(chkval->operand + val->distance,
1284 1037564 : val->length,
1285 1037564 : chkval->values + StopMiddle->pos,
1286 1037564 : StopMiddle->len,
1287 : false);
1288 :
1289 1037564 : if (difference == 0)
1290 : {
1291 : /* Check weight info & fill 'data' with positions */
1292 35520 : res = checkclass_str(chkval, StopMiddle, val, data);
1293 35520 : break;
1294 : }
1295 1002044 : else if (difference > 0)
1296 565083 : StopLow = StopMiddle + 1;
1297 : else
1298 436961 : StopHigh = StopMiddle;
1299 : }
1300 :
1301 : /*
1302 : * If it's a prefix search, we should also consider lexemes that the
1303 : * search term is a prefix of (which will necessarily immediately follow
1304 : * the place we found in the above loop). But we can skip them if there
1305 : * was a definite match on the exact term AND the caller doesn't need
1306 : * position info.
1307 : */
1308 189554 : if (val->prefix && (res != TS_YES || data))
1309 : {
1310 11040 : WordEntryPos *allpos = NULL;
1311 11040 : int npos = 0,
1312 11040 : totalpos = 0;
1313 :
1314 : /* adjust start position for corner case */
1315 11040 : if (StopLow >= StopHigh)
1316 11030 : StopMiddle = StopHigh;
1317 :
1318 : /* we don't try to re-use any data from the initial match */
1319 11040 : if (data)
1320 : {
1321 30 : if (data->allocated)
1322 0 : pfree(data->pos);
1323 30 : data->pos = NULL;
1324 30 : data->allocated = false;
1325 30 : data->npos = 0;
1326 : }
1327 11040 : res = TS_NO;
1328 :
1329 21011 : while ((res != TS_YES || data) &&
1330 31751 : StopMiddle < chkval->arre &&
1331 10655 : tsCompareString(chkval->operand + val->distance,
1332 10655 : val->length,
1333 10655 : chkval->values + StopMiddle->pos,
1334 10655 : StopMiddle->len,
1335 : true) == 0)
1336 : {
1337 : TSTernaryValue subres;
1338 :
1339 10056 : subres = checkclass_str(chkval, StopMiddle, val, data);
1340 :
1341 10056 : if (subres != TS_NO)
1342 : {
1343 10006 : if (data)
1344 : {
1345 : /*
1346 : * We need to join position information
1347 : */
1348 35 : if (subres == TS_MAYBE)
1349 : {
1350 : /*
1351 : * No position info for this match, so we must report
1352 : * MAYBE overall.
1353 : */
1354 0 : res = TS_MAYBE;
1355 : /* forget any previous positions */
1356 0 : npos = 0;
1357 : /* don't leak storage */
1358 0 : if (allpos)
1359 0 : pfree(allpos);
1360 0 : break;
1361 : }
1362 :
1363 65 : while (npos + data->npos > totalpos)
1364 : {
1365 30 : if (totalpos == 0)
1366 : {
1367 30 : totalpos = 256;
1368 30 : allpos = palloc_array(WordEntryPos, totalpos);
1369 : }
1370 : else
1371 : {
1372 0 : totalpos *= 2;
1373 0 : allpos = repalloc_array(allpos, WordEntryPos, totalpos);
1374 : }
1375 : }
1376 :
1377 35 : memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1378 35 : npos += data->npos;
1379 :
1380 : /* don't leak storage from individual matches */
1381 35 : if (data->allocated)
1382 20 : pfree(data->pos);
1383 35 : data->pos = NULL;
1384 35 : data->allocated = false;
1385 : /* it's important to reset data->npos before next loop */
1386 35 : data->npos = 0;
1387 : }
1388 : else
1389 : {
1390 : /* Don't need positions, just handle YES/MAYBE */
1391 9971 : if (subres == TS_YES || res == TS_NO)
1392 9971 : res = subres;
1393 : }
1394 : }
1395 :
1396 10056 : StopMiddle++;
1397 : }
1398 :
1399 11040 : if (data && npos > 0)
1400 : {
1401 : /* Sort and make unique array of found positions */
1402 30 : data->pos = allpos;
1403 30 : qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1404 30 : data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1405 : compareWordEntryPos);
1406 30 : data->allocated = true;
1407 30 : res = TS_YES;
1408 : }
1409 : }
1410 :
1411 189554 : return res;
1412 : }
1413 :
1414 : /*
1415 : * Compute output position list for a tsquery operator in phrase mode.
1416 : *
1417 : * Merge the position lists in Ldata and Rdata as specified by "emit",
1418 : * returning the result list into *data. The input position lists must be
1419 : * sorted and unique, and the output will be as well.
1420 : *
1421 : * data: pointer to initially-all-zeroes output struct, or NULL
1422 : * Ldata, Rdata: input position lists
1423 : * emit: bitmask of TSPO_XXX flags
1424 : * Loffset: offset to be added to Ldata positions before comparing/outputting
1425 : * Roffset: offset to be added to Rdata positions before comparing/outputting
1426 : * max_npos: maximum possible required size of output position array
1427 : *
1428 : * Loffset and Roffset should not be negative, else we risk trying to output
1429 : * negative positions, which won't fit into WordEntryPos.
1430 : *
1431 : * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1432 : * we return it as TSTernaryValue.
1433 : *
1434 : * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1435 : * returns TS_YES if any positions would have been emitted.
1436 : */
1437 : #define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1438 : #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1439 : #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1440 :
1441 : static TSTernaryValue
1442 20112 : TS_phrase_output(ExecPhraseData *data,
1443 : ExecPhraseData *Ldata,
1444 : ExecPhraseData *Rdata,
1445 : int emit,
1446 : int Loffset,
1447 : int Roffset,
1448 : int max_npos)
1449 : {
1450 : int Lindex,
1451 : Rindex;
1452 :
1453 : /* Loop until both inputs are exhausted */
1454 20112 : Lindex = Rindex = 0;
1455 20904 : while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1456 : {
1457 : int Lpos,
1458 : Rpos;
1459 1748 : int output_pos = 0;
1460 :
1461 : /*
1462 : * Fetch current values to compare. WEP_GETPOS() is needed because
1463 : * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1464 : */
1465 1748 : if (Lindex < Ldata->npos)
1466 1288 : Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1467 : else
1468 : {
1469 : /* L array exhausted, so we're done if R_ONLY isn't set */
1470 460 : if (!(emit & TSPO_R_ONLY))
1471 113 : break;
1472 347 : Lpos = INT_MAX;
1473 : }
1474 1635 : if (Rindex < Rdata->npos)
1475 1445 : Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1476 : else
1477 : {
1478 : /* R array exhausted, so we're done if L_ONLY isn't set */
1479 190 : if (!(emit & TSPO_L_ONLY))
1480 122 : break;
1481 68 : Rpos = INT_MAX;
1482 : }
1483 :
1484 : /* Merge-join the two input lists */
1485 1513 : if (Lpos < Rpos)
1486 : {
1487 : /* Lpos is not matched in Rdata, should we output it? */
1488 365 : if (emit & TSPO_L_ONLY)
1489 116 : output_pos = Lpos;
1490 365 : Lindex++;
1491 : }
1492 1148 : else if (Lpos == Rpos)
1493 : {
1494 : /* Lpos and Rpos match ... should we output it? */
1495 621 : if (emit & TSPO_BOTH)
1496 553 : output_pos = Rpos;
1497 621 : Lindex++;
1498 621 : Rindex++;
1499 : }
1500 : else /* Lpos > Rpos */
1501 : {
1502 : /* Rpos is not matched in Ldata, should we output it? */
1503 527 : if (emit & TSPO_R_ONLY)
1504 376 : output_pos = Rpos;
1505 527 : Rindex++;
1506 : }
1507 :
1508 1513 : if (output_pos > 0)
1509 : {
1510 1045 : if (data)
1511 : {
1512 : /* Store position, first allocating output array if needed */
1513 324 : if (data->pos == NULL)
1514 : {
1515 261 : data->pos = (WordEntryPos *)
1516 261 : palloc(max_npos * sizeof(WordEntryPos));
1517 261 : data->allocated = true;
1518 : }
1519 324 : data->pos[data->npos++] = output_pos;
1520 : }
1521 : else
1522 : {
1523 : /*
1524 : * Exact positions not needed, so return TS_YES as soon as we
1525 : * know there is at least one.
1526 : */
1527 721 : return TS_YES;
1528 : }
1529 : }
1530 : }
1531 :
1532 19391 : if (data && data->npos > 0)
1533 : {
1534 : /* Let's assert we didn't overrun the array */
1535 : Assert(data->npos <= max_npos);
1536 261 : return TS_YES;
1537 : }
1538 19130 : return TS_NO;
1539 : }
1540 :
1541 : /*
1542 : * Execute tsquery at or below an OP_PHRASE operator.
1543 : *
1544 : * This handles tsquery execution at recursion levels where we need to care
1545 : * about match locations.
1546 : *
1547 : * In addition to the same arguments used for TS_execute, the caller may pass
1548 : * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1549 : * match position info on success. data == NULL if no position data need be
1550 : * returned.
1551 : * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1552 : * This is OK because an outside call always starts from an OP_PHRASE node,
1553 : * and all internal recursion cases pass data != NULL.
1554 : *
1555 : * The detailed semantics of the match data, given that the function returned
1556 : * TS_YES (successful match), are:
1557 : *
1558 : * npos > 0, negate = false:
1559 : * query is matched at specified position(s) (and only those positions)
1560 : * npos > 0, negate = true:
1561 : * query is matched at all positions *except* specified position(s)
1562 : * npos = 0, negate = true:
1563 : * query is matched at all positions
1564 : * npos = 0, negate = false:
1565 : * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1566 : *
1567 : * Successful matches also return a "width" value which is the match width in
1568 : * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1569 : * and is the sum of the phrase operator distances for phrase matches. Note
1570 : * that when width > 0, the listed positions represent the ends of matches not
1571 : * the starts. (This unintuitive rule is needed to avoid possibly generating
1572 : * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1573 : *
1574 : * If the TSExecuteCallback function reports that an operand is present
1575 : * but fails to provide position(s) for it, we will return TS_MAYBE when
1576 : * it is possible but not certain that the query is matched.
1577 : *
1578 : * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1579 : * negate = false (which is the state initialized by the caller); but the
1580 : * "width" output in such cases is undefined.
1581 : */
1582 : static TSTernaryValue
1583 467663 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1584 : TSExecuteCallback chkcond,
1585 : ExecPhraseData *data)
1586 : {
1587 : ExecPhraseData Ldata,
1588 : Rdata;
1589 : TSTernaryValue lmatch,
1590 : rmatch;
1591 : int Loffset,
1592 : Roffset,
1593 : maxwidth;
1594 :
1595 : /* since this function recurses, it could be driven to stack overflow */
1596 467663 : check_stack_depth();
1597 :
1598 : /* ... and let's check for query cancel while we're at it */
1599 467663 : CHECK_FOR_INTERRUPTS();
1600 :
1601 467663 : if (curitem->type == QI_VAL)
1602 230075 : return chkcond(arg, (QueryOperand *) curitem, data);
1603 :
1604 237588 : switch (curitem->qoperator.oper)
1605 : {
1606 80485 : case OP_NOT:
1607 :
1608 : /*
1609 : * We need not touch data->width, since a NOT operation does not
1610 : * change the match width.
1611 : */
1612 80485 : if (flags & TS_EXEC_SKIP_NOT)
1613 : {
1614 : /* with SKIP_NOT, report NOT as "match everywhere" */
1615 : Assert(data->npos == 0 && !data->negate);
1616 0 : data->negate = true;
1617 0 : return TS_YES;
1618 : }
1619 80485 : switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1620 : {
1621 70342 : case TS_NO:
1622 : /* change "match nowhere" to "match everywhere" */
1623 : Assert(data->npos == 0 && !data->negate);
1624 70342 : data->negate = true;
1625 70342 : return TS_YES;
1626 273 : case TS_YES:
1627 273 : if (data->npos > 0)
1628 : {
1629 : /* we have some positions, invert negate flag */
1630 268 : data->negate = !data->negate;
1631 268 : return TS_YES;
1632 : }
1633 5 : else if (data->negate)
1634 : {
1635 : /* change "match everywhere" to "match nowhere" */
1636 5 : data->negate = false;
1637 5 : return TS_NO;
1638 : }
1639 : /* Should not get here if result was TS_YES */
1640 : Assert(false);
1641 0 : break;
1642 9870 : case TS_MAYBE:
1643 : /* match positions are, and remain, uncertain */
1644 9870 : return TS_MAYBE;
1645 : }
1646 0 : break;
1647 :
1648 156977 : case OP_PHRASE:
1649 : case OP_AND:
1650 156977 : memset(&Ldata, 0, sizeof(Ldata));
1651 156977 : memset(&Rdata, 0, sizeof(Rdata));
1652 :
1653 156977 : lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1654 : arg, flags, chkcond, &Ldata);
1655 156977 : if (lmatch == TS_NO)
1656 83795 : return TS_NO;
1657 :
1658 73182 : rmatch = TS_phrase_execute(curitem + 1,
1659 : arg, flags, chkcond, &Rdata);
1660 73182 : if (rmatch == TS_NO)
1661 35992 : return TS_NO;
1662 :
1663 : /*
1664 : * If either operand has no position information, then we can't
1665 : * return reliable position data, only a MAYBE result.
1666 : */
1667 37190 : if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1668 17204 : return TS_MAYBE;
1669 :
1670 19986 : if (curitem->qoperator.oper == OP_PHRASE)
1671 : {
1672 : /*
1673 : * Compute Loffset and Roffset suitable for phrase match, and
1674 : * compute overall width of whole phrase match.
1675 : */
1676 19981 : Loffset = curitem->qoperator.distance + Rdata.width;
1677 19981 : Roffset = 0;
1678 19981 : if (data)
1679 155 : data->width = curitem->qoperator.distance +
1680 155 : Ldata.width + Rdata.width;
1681 : }
1682 : else
1683 : {
1684 : /*
1685 : * For OP_AND, set output width and alignment like OP_OR (see
1686 : * comment below)
1687 : */
1688 5 : maxwidth = Max(Ldata.width, Rdata.width);
1689 5 : Loffset = maxwidth - Ldata.width;
1690 5 : Roffset = maxwidth - Rdata.width;
1691 5 : if (data)
1692 5 : data->width = maxwidth;
1693 : }
1694 :
1695 19986 : if (Ldata.negate && Rdata.negate)
1696 : {
1697 : /* !L & !R: treat as !(L | R) */
1698 18958 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1699 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1700 : Loffset, Roffset,
1701 18958 : Ldata.npos + Rdata.npos);
1702 18958 : if (data)
1703 0 : data->negate = true;
1704 18958 : return TS_YES;
1705 : }
1706 1028 : else if (Ldata.negate)
1707 : {
1708 : /* !L & R */
1709 309 : return TS_phrase_output(data, &Ldata, &Rdata,
1710 : TSPO_R_ONLY,
1711 : Loffset, Roffset,
1712 : Rdata.npos);
1713 : }
1714 719 : else if (Rdata.negate)
1715 : {
1716 : /* L & !R */
1717 5 : return TS_phrase_output(data, &Ldata, &Rdata,
1718 : TSPO_L_ONLY,
1719 : Loffset, Roffset,
1720 : Ldata.npos);
1721 : }
1722 : else
1723 : {
1724 : /* straight AND */
1725 714 : return TS_phrase_output(data, &Ldata, &Rdata,
1726 : TSPO_BOTH,
1727 : Loffset, Roffset,
1728 714 : Min(Ldata.npos, Rdata.npos));
1729 : }
1730 :
1731 126 : case OP_OR:
1732 126 : memset(&Ldata, 0, sizeof(Ldata));
1733 126 : memset(&Rdata, 0, sizeof(Rdata));
1734 :
1735 126 : lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1736 : arg, flags, chkcond, &Ldata);
1737 126 : rmatch = TS_phrase_execute(curitem + 1,
1738 : arg, flags, chkcond, &Rdata);
1739 :
1740 126 : if (lmatch == TS_NO && rmatch == TS_NO)
1741 10 : return TS_NO;
1742 :
1743 : /*
1744 : * If either operand has no position information, then we can't
1745 : * return reliable position data, only a MAYBE result.
1746 : */
1747 116 : if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1748 0 : return TS_MAYBE;
1749 :
1750 : /*
1751 : * Cope with undefined output width from failed submatch. (This
1752 : * takes less code than trying to ensure that all failure returns
1753 : * set data->width to zero.)
1754 : */
1755 116 : if (lmatch == TS_NO)
1756 15 : Ldata.width = 0;
1757 116 : if (rmatch == TS_NO)
1758 68 : Rdata.width = 0;
1759 :
1760 : /*
1761 : * For OP_AND and OP_OR, report the width of the wider of the two
1762 : * inputs, and align the narrower input's positions to the right
1763 : * end of that width. This rule deals at least somewhat
1764 : * reasonably with cases like "x <-> (y | z <-> q)".
1765 : */
1766 116 : maxwidth = Max(Ldata.width, Rdata.width);
1767 116 : Loffset = maxwidth - Ldata.width;
1768 116 : Roffset = maxwidth - Rdata.width;
1769 116 : data->width = maxwidth;
1770 :
1771 116 : if (Ldata.negate && Rdata.negate)
1772 : {
1773 : /* !L | !R: treat as !(L & R) */
1774 5 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1775 : TSPO_BOTH,
1776 : Loffset, Roffset,
1777 5 : Min(Ldata.npos, Rdata.npos));
1778 5 : data->negate = true;
1779 5 : return TS_YES;
1780 : }
1781 111 : else if (Ldata.negate)
1782 : {
1783 : /* !L | R: treat as !(L & !R) */
1784 25 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1785 : TSPO_L_ONLY,
1786 : Loffset, Roffset,
1787 : Ldata.npos);
1788 25 : data->negate = true;
1789 25 : return TS_YES;
1790 : }
1791 86 : else if (Rdata.negate)
1792 : {
1793 : /* L | !R: treat as !(!L & R) */
1794 5 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1795 : TSPO_R_ONLY,
1796 : Loffset, Roffset,
1797 : Rdata.npos);
1798 5 : data->negate = true;
1799 5 : return TS_YES;
1800 : }
1801 : else
1802 : {
1803 : /* straight OR */
1804 81 : return TS_phrase_output(data, &Ldata, &Rdata,
1805 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1806 : Loffset, Roffset,
1807 81 : Ldata.npos + Rdata.npos);
1808 : }
1809 :
1810 0 : default:
1811 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1812 : }
1813 :
1814 : /* not reachable, but keep compiler quiet */
1815 0 : return TS_NO;
1816 : }
1817 :
1818 :
1819 : /*
1820 : * Evaluate tsquery boolean expression.
1821 : *
1822 : * curitem: current tsquery item (initially, the first one)
1823 : * arg: opaque value to pass through to callback function
1824 : * flags: bitmask of flag bits shown in ts_utils.h
1825 : * chkcond: callback function to check whether a primitive value is present
1826 : */
1827 : bool
1828 346294 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1829 : TSExecuteCallback chkcond)
1830 : {
1831 : /*
1832 : * If we get TS_MAYBE from the recursion, return true. We could only see
1833 : * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1834 : * need to check again.
1835 : */
1836 346294 : return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1837 : }
1838 :
1839 : /*
1840 : * Evaluate tsquery boolean expression.
1841 : *
1842 : * This is the same as TS_execute except that TS_MAYBE is returned as-is.
1843 : */
1844 : TSTernaryValue
1845 24628 : TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1846 : TSExecuteCallback chkcond)
1847 : {
1848 24628 : return TS_execute_recurse(curitem, arg, flags, chkcond);
1849 : }
1850 :
1851 : /*
1852 : * TS_execute recursion for operators above any phrase operator. Here we do
1853 : * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
1854 : * operator, we pass it off to TS_phrase_execute which does worry.
1855 : */
1856 : static TSTernaryValue
1857 702946 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1858 : TSExecuteCallback chkcond)
1859 : {
1860 : TSTernaryValue lmatch;
1861 :
1862 : /* since this function recurses, it could be driven to stack overflow */
1863 702946 : check_stack_depth();
1864 :
1865 : /* ... and let's check for query cancel while we're at it */
1866 702946 : CHECK_FOR_INTERRUPTS();
1867 :
1868 702946 : if (curitem->type == QI_VAL)
1869 282246 : return chkcond(arg, (QueryOperand *) curitem,
1870 : NULL /* don't need position info */ );
1871 :
1872 420700 : switch (curitem->qoperator.oper)
1873 : {
1874 135515 : case OP_NOT:
1875 135515 : if (flags & TS_EXEC_SKIP_NOT)
1876 0 : return TS_YES;
1877 135515 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1878 : {
1879 127862 : case TS_NO:
1880 127862 : return TS_YES;
1881 3264 : case TS_YES:
1882 3264 : return TS_NO;
1883 4389 : case TS_MAYBE:
1884 4389 : return TS_MAYBE;
1885 : }
1886 0 : break;
1887 :
1888 55800 : case OP_AND:
1889 55800 : lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1890 : flags, chkcond);
1891 55800 : if (lmatch == TS_NO)
1892 44283 : return TS_NO;
1893 11517 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1894 : {
1895 6755 : case TS_NO:
1896 6755 : return TS_NO;
1897 2236 : case TS_YES:
1898 2236 : return lmatch;
1899 2526 : case TS_MAYBE:
1900 2526 : return TS_MAYBE;
1901 : }
1902 0 : break;
1903 :
1904 72668 : case OP_OR:
1905 72668 : lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1906 : flags, chkcond);
1907 72668 : if (lmatch == TS_YES)
1908 16144 : return TS_YES;
1909 56524 : switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1910 : {
1911 38317 : case TS_NO:
1912 38317 : return lmatch;
1913 4962 : case TS_YES:
1914 4962 : return TS_YES;
1915 13245 : case TS_MAYBE:
1916 13245 : return TS_MAYBE;
1917 : }
1918 0 : break;
1919 :
1920 156717 : case OP_PHRASE:
1921 :
1922 : /*
1923 : * If we get a MAYBE result, and the caller doesn't want that,
1924 : * convert it to NO. It would be more consistent, perhaps, to
1925 : * return the result of TS_phrase_execute() verbatim and then
1926 : * convert MAYBE results at the top of the recursion. But
1927 : * converting at the topmost phrase operator gives results that
1928 : * are bug-compatible with the old implementation, so do it like
1929 : * this for now.
1930 : */
1931 156717 : switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1932 : {
1933 119916 : case TS_NO:
1934 119916 : return TS_NO;
1935 19602 : case TS_YES:
1936 19602 : return TS_YES;
1937 17199 : case TS_MAYBE:
1938 17199 : return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1939 : }
1940 0 : break;
1941 :
1942 0 : default:
1943 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1944 : }
1945 :
1946 : /* not reachable, but keep compiler quiet */
1947 0 : return TS_NO;
1948 : }
1949 :
1950 : /*
1951 : * Evaluate tsquery and report locations of matching terms.
1952 : *
1953 : * This is like TS_execute except that it returns match locations not just
1954 : * success/failure status. The callback function is required to provide
1955 : * position data (we report failure if it doesn't).
1956 : *
1957 : * On successful match, the result is a List of ExecPhraseData structs, one
1958 : * for each AND'ed term or phrase operator in the query. Each struct includes
1959 : * a sorted array of lexeme positions matching that term. (Recall that for
1960 : * phrase operators, the match includes width+1 lexemes, and the recorded
1961 : * position is that of the rightmost lexeme.)
1962 : *
1963 : * OR subexpressions are handled by union'ing their match locations into a
1964 : * single List element, which is valid since any of those locations contains
1965 : * a match. However, when some of the OR'ed terms are phrase operators, we
1966 : * report the maximum width of any of the OR'ed terms, making such cases
1967 : * slightly imprecise in the conservative direction. (For example, if the
1968 : * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
1969 : * reported as though it includes the lexeme to the left of C.)
1970 : *
1971 : * Locations of NOT subexpressions are not reported. (Obviously, there can
1972 : * be no successful NOT matches at top level, or the match would have failed.
1973 : * So this amounts to ignoring NOTs underneath ORs.)
1974 : *
1975 : * The result is NIL if no match, or if position data was not returned.
1976 : *
1977 : * Arguments are the same as for TS_execute, although flags is currently
1978 : * vestigial since none of the defined bits are sensible here.
1979 : */
1980 : List *
1981 293 : TS_execute_locations(QueryItem *curitem, void *arg,
1982 : uint32 flags,
1983 : TSExecuteCallback chkcond)
1984 : {
1985 : List *result;
1986 :
1987 : /* No flags supported, as yet */
1988 : Assert(flags == TS_EXEC_EMPTY);
1989 293 : if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
1990 118 : return result;
1991 175 : return NIL;
1992 : }
1993 :
1994 : /*
1995 : * TS_execute_locations recursion for operators above any phrase operator.
1996 : * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
1997 : */
1998 : static bool
1999 839 : TS_execute_locations_recurse(QueryItem *curitem, void *arg,
2000 : TSExecuteCallback chkcond,
2001 : List **locations)
2002 : {
2003 : bool lmatch,
2004 : rmatch;
2005 : List *llocations,
2006 : *rlocations;
2007 : ExecPhraseData *data;
2008 :
2009 : /* since this function recurses, it could be driven to stack overflow */
2010 839 : check_stack_depth();
2011 :
2012 : /* ... and let's check for query cancel while we're at it */
2013 839 : CHECK_FOR_INTERRUPTS();
2014 :
2015 : /* Default locations result is empty */
2016 839 : *locations = NIL;
2017 :
2018 839 : if (curitem->type == QI_VAL)
2019 : {
2020 359 : data = palloc0_object(ExecPhraseData);
2021 359 : if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
2022 : {
2023 184 : *locations = list_make1(data);
2024 184 : return true;
2025 : }
2026 175 : pfree(data);
2027 175 : return false;
2028 : }
2029 :
2030 480 : switch (curitem->qoperator.oper)
2031 : {
2032 10 : case OP_NOT:
2033 10 : if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
2034 : &llocations))
2035 0 : return true; /* we don't pass back any locations */
2036 10 : return false;
2037 :
2038 400 : case OP_AND:
2039 400 : if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2040 : arg, chkcond,
2041 : &llocations))
2042 304 : return false;
2043 96 : if (!TS_execute_locations_recurse(curitem + 1,
2044 : arg, chkcond,
2045 : &rlocations))
2046 41 : return false;
2047 55 : *locations = list_concat(llocations, rlocations);
2048 55 : return true;
2049 :
2050 20 : case OP_OR:
2051 20 : lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2052 : arg, chkcond,
2053 : &llocations);
2054 20 : rmatch = TS_execute_locations_recurse(curitem + 1,
2055 : arg, chkcond,
2056 : &rlocations);
2057 20 : if (lmatch || rmatch)
2058 : {
2059 : /*
2060 : * We generate an AND'able location struct from each
2061 : * combination of sub-matches, following the disjunctive law
2062 : * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
2063 : *
2064 : * However, if either input didn't produce locations (i.e., it
2065 : * failed or was a NOT), we must just return the other list.
2066 : */
2067 20 : if (llocations == NIL)
2068 0 : *locations = rlocations;
2069 20 : else if (rlocations == NIL)
2070 10 : *locations = llocations;
2071 : else
2072 : {
2073 : ListCell *ll;
2074 :
2075 20 : foreach(ll, llocations)
2076 : {
2077 10 : ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
2078 : ListCell *lr;
2079 :
2080 20 : foreach(lr, rlocations)
2081 : {
2082 10 : ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
2083 :
2084 10 : data = palloc0_object(ExecPhraseData);
2085 10 : (void) TS_phrase_output(data, ldata, rdata,
2086 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
2087 : 0, 0,
2088 10 : ldata->npos + rdata->npos);
2089 : /* Report the larger width, as explained above. */
2090 10 : data->width = Max(ldata->width, rdata->width);
2091 10 : *locations = lappend(*locations, data);
2092 : }
2093 : }
2094 : }
2095 :
2096 20 : return true;
2097 : }
2098 0 : return false;
2099 :
2100 50 : case OP_PHRASE:
2101 : /* We can hand this off to TS_phrase_execute */
2102 50 : data = palloc0_object(ExecPhraseData);
2103 50 : if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
2104 : data) == TS_YES)
2105 : {
2106 50 : if (!data->negate)
2107 50 : *locations = list_make1(data);
2108 50 : return true;
2109 : }
2110 0 : pfree(data);
2111 0 : return false;
2112 :
2113 0 : default:
2114 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2115 : }
2116 :
2117 : /* not reachable, but keep compiler quiet */
2118 : return false;
2119 : }
2120 :
2121 : /*
2122 : * Detect whether a tsquery boolean expression requires any positive matches
2123 : * to values shown in the tsquery.
2124 : *
2125 : * This is needed to know whether a GIN index search requires full index scan.
2126 : * For example, 'x & !y' requires a match of x, so it's sufficient to scan
2127 : * entries for x; but 'x | !y' could match rows containing neither x nor y.
2128 : */
2129 : bool
2130 631 : tsquery_requires_match(QueryItem *curitem)
2131 : {
2132 : /* since this function recurses, it could be driven to stack overflow */
2133 631 : check_stack_depth();
2134 :
2135 631 : if (curitem->type == QI_VAL)
2136 301 : return true;
2137 :
2138 330 : switch (curitem->qoperator.oper)
2139 : {
2140 127 : case OP_NOT:
2141 :
2142 : /*
2143 : * Assume there are no required matches underneath a NOT. For
2144 : * some cases with nested NOTs, we could prove there's a required
2145 : * match, but it seems unlikely to be worth the trouble.
2146 : */
2147 127 : return false;
2148 :
2149 153 : case OP_PHRASE:
2150 :
2151 : /*
2152 : * Treat OP_PHRASE as OP_AND here
2153 : */
2154 : case OP_AND:
2155 : /* If either side requires a match, we're good */
2156 153 : if (tsquery_requires_match(curitem + curitem->qoperator.left))
2157 117 : return true;
2158 : else
2159 36 : return tsquery_requires_match(curitem + 1);
2160 :
2161 50 : case OP_OR:
2162 : /* Both sides must require a match */
2163 50 : if (tsquery_requires_match(curitem + curitem->qoperator.left))
2164 50 : return tsquery_requires_match(curitem + 1);
2165 : else
2166 0 : return false;
2167 :
2168 0 : default:
2169 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2170 : }
2171 :
2172 : /* not reachable, but keep compiler quiet */
2173 : return false;
2174 : }
2175 :
2176 : /*
2177 : * boolean operations
2178 : */
2179 : Datum
2180 40 : ts_match_qv(PG_FUNCTION_ARGS)
2181 : {
2182 40 : PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
2183 : PG_GETARG_DATUM(1),
2184 : PG_GETARG_DATUM(0)));
2185 : }
2186 :
2187 : Datum
2188 146808 : ts_match_vq(PG_FUNCTION_ARGS)
2189 : {
2190 146808 : TSVector val = PG_GETARG_TSVECTOR(0);
2191 146808 : TSQuery query = PG_GETARG_TSQUERY(1);
2192 : CHKVAL chkval;
2193 : bool result;
2194 :
2195 : /* empty query matches nothing */
2196 146808 : if (!query->size)
2197 : {
2198 0 : PG_FREE_IF_COPY(val, 0);
2199 0 : PG_FREE_IF_COPY(query, 1);
2200 0 : PG_RETURN_BOOL(false);
2201 : }
2202 :
2203 146808 : chkval.arrb = ARRPTR(val);
2204 146808 : chkval.arre = chkval.arrb + val->size;
2205 146808 : chkval.values = STRPTR(val);
2206 146808 : chkval.operand = GETOPERAND(query);
2207 146808 : result = TS_execute(GETQUERY(query),
2208 : &chkval,
2209 : TS_EXEC_EMPTY,
2210 : checkcondition_str);
2211 :
2212 146808 : PG_FREE_IF_COPY(val, 0);
2213 146808 : PG_FREE_IF_COPY(query, 1);
2214 146808 : PG_RETURN_BOOL(result);
2215 : }
2216 :
2217 : Datum
2218 0 : ts_match_tt(PG_FUNCTION_ARGS)
2219 : {
2220 : TSVector vector;
2221 : TSQuery query;
2222 : bool res;
2223 :
2224 0 : vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2225 : PG_GETARG_DATUM(0)));
2226 0 : query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
2227 : PG_GETARG_DATUM(1)));
2228 :
2229 0 : res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2230 : TSVectorGetDatum(vector),
2231 : TSQueryGetDatum(query)));
2232 :
2233 0 : pfree(vector);
2234 0 : pfree(query);
2235 :
2236 0 : PG_RETURN_BOOL(res);
2237 : }
2238 :
2239 : Datum
2240 0 : ts_match_tq(PG_FUNCTION_ARGS)
2241 : {
2242 : TSVector vector;
2243 0 : TSQuery query = PG_GETARG_TSQUERY(1);
2244 : bool res;
2245 :
2246 0 : vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2247 : PG_GETARG_DATUM(0)));
2248 :
2249 0 : res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2250 : TSVectorGetDatum(vector),
2251 : TSQueryGetDatum(query)));
2252 :
2253 0 : pfree(vector);
2254 0 : PG_FREE_IF_COPY(query, 1);
2255 :
2256 0 : PG_RETURN_BOOL(res);
2257 : }
2258 :
2259 : /*
2260 : * ts_stat statistic function support
2261 : */
2262 :
2263 :
2264 : /*
2265 : * Returns the number of positions in value 'wptr' within tsvector 'txt',
2266 : * that have a weight equal to one of the weights in 'weight' bitmask.
2267 : */
2268 : static int
2269 5452 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
2270 : {
2271 5452 : int len = POSDATALEN(txt, wptr);
2272 5452 : int num = 0;
2273 5452 : WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2274 :
2275 11100 : while (len--)
2276 : {
2277 5648 : if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2278 8 : num++;
2279 5648 : ptr++;
2280 : }
2281 5452 : return num;
2282 : }
2283 :
2284 : #define compareStatWord(a,e,t) \
2285 : tsCompareString((a)->lexeme, (a)->lenlexeme, \
2286 : STRPTR(t) + (e)->pos, (e)->len, \
2287 : false)
2288 :
2289 : static void
2290 230416 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
2291 : {
2292 230416 : WordEntry *we = ARRPTR(txt) + off;
2293 230416 : StatEntry *node = stat->root,
2294 230416 : *pnode = NULL;
2295 : int n,
2296 230416 : res = 0;
2297 230416 : uint32 depth = 1;
2298 :
2299 230416 : if (stat->weight == 0)
2300 115208 : n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2301 : else
2302 115208 : n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2303 :
2304 230416 : if (n == 0)
2305 115204 : return; /* nothing to insert */
2306 :
2307 1163592 : while (node)
2308 : {
2309 1159016 : res = compareStatWord(node, we, txt);
2310 :
2311 1159016 : if (res == 0)
2312 : {
2313 110636 : break;
2314 : }
2315 : else
2316 : {
2317 1048380 : pnode = node;
2318 1048380 : node = (res < 0) ? node->left : node->right;
2319 : }
2320 1048380 : depth++;
2321 : }
2322 :
2323 115212 : if (depth > stat->maxdepth)
2324 84 : stat->maxdepth = depth;
2325 :
2326 115212 : if (node == NULL)
2327 : {
2328 4576 : node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2329 4576 : node->left = node->right = NULL;
2330 4576 : node->ndoc = 1;
2331 4576 : node->nentry = n;
2332 4576 : node->lenlexeme = we->len;
2333 4576 : memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2334 :
2335 4576 : if (pnode == NULL)
2336 : {
2337 8 : stat->root = node;
2338 : }
2339 : else
2340 : {
2341 4568 : if (res < 0)
2342 2254 : pnode->left = node;
2343 : else
2344 2314 : pnode->right = node;
2345 : }
2346 : }
2347 : else
2348 : {
2349 110636 : node->ndoc++;
2350 110636 : node->nentry += n;
2351 : }
2352 : }
2353 :
2354 : static void
2355 330256 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
2356 : uint32 low, uint32 high, uint32 offset)
2357 : {
2358 : uint32 pos;
2359 330256 : uint32 middle = (low + high) >> 1;
2360 :
2361 330256 : pos = (low + middle) >> 1;
2362 330256 : if (low != middle && pos >= offset && pos - offset < txt->size)
2363 113552 : insertStatEntry(persistentContext, stat, txt, pos - offset);
2364 330256 : pos = (high + middle + 1) >> 1;
2365 330256 : if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2366 112856 : insertStatEntry(persistentContext, stat, txt, pos - offset);
2367 :
2368 330256 : if (low != middle)
2369 165128 : chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2370 330256 : if (high != middle + 1)
2371 161120 : chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2372 330256 : }
2373 :
2374 : /*
2375 : * This is written like a custom aggregate function, because the
2376 : * original plan was to do just that. Unfortunately, an aggregate function
2377 : * can't return a set, so that plan was abandoned. If that limitation is
2378 : * lifted in the future, ts_stat could be a real aggregate function so that
2379 : * you could use it like this:
2380 : *
2381 : * SELECT ts_stat(vector_column) FROM vector_table;
2382 : *
2383 : * where vector_column is a tsvector-type column in vector_table.
2384 : */
2385 :
2386 : static TSVectorStat *
2387 4072 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2388 : {
2389 4072 : TSVector txt = DatumGetTSVector(data);
2390 : uint32 i,
2391 4072 : nbit = 0,
2392 : offset;
2393 :
2394 4072 : if (stat == NULL)
2395 : { /* Init in first */
2396 0 : stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2397 0 : stat->maxdepth = 1;
2398 : }
2399 :
2400 : /* simple check of correctness */
2401 4072 : if (txt == NULL || txt->size == 0)
2402 : {
2403 64 : if (txt && txt != (TSVector) DatumGetPointer(data))
2404 64 : pfree(txt);
2405 64 : return stat;
2406 : }
2407 :
2408 4008 : i = txt->size - 1;
2409 28480 : for (; i > 0; i >>= 1)
2410 24472 : nbit++;
2411 :
2412 4008 : nbit = 1 << nbit;
2413 4008 : offset = (nbit - txt->size) / 2;
2414 :
2415 4008 : insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2416 4008 : chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2417 :
2418 4008 : return stat;
2419 : }
2420 :
2421 : static void
2422 8 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
2423 : TSVectorStat *stat)
2424 : {
2425 : TupleDesc tupdesc;
2426 : MemoryContext oldcontext;
2427 : StatEntry *node;
2428 :
2429 8 : funcctx->user_fctx = stat;
2430 :
2431 8 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2432 :
2433 8 : stat->stack = palloc0_array(StatEntry *, stat->maxdepth + 1);
2434 8 : stat->stackpos = 0;
2435 :
2436 8 : node = stat->root;
2437 : /* find leftmost value */
2438 8 : if (node == NULL)
2439 0 : stat->stack[stat->stackpos] = NULL;
2440 : else
2441 : for (;;)
2442 : {
2443 32 : stat->stack[stat->stackpos] = node;
2444 32 : if (node->left)
2445 : {
2446 24 : stat->stackpos++;
2447 24 : node = node->left;
2448 : }
2449 : else
2450 8 : break;
2451 : }
2452 : Assert(stat->stackpos <= stat->maxdepth);
2453 :
2454 8 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2455 0 : elog(ERROR, "return type must be a row type");
2456 8 : funcctx->tuple_desc = tupdesc;
2457 8 : funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2458 :
2459 8 : MemoryContextSwitchTo(oldcontext);
2460 8 : }
2461 :
2462 : static StatEntry *
2463 9152 : walkStatEntryTree(TSVectorStat *stat)
2464 : {
2465 9152 : StatEntry *node = stat->stack[stat->stackpos];
2466 :
2467 9152 : if (node == NULL)
2468 0 : return NULL;
2469 :
2470 9152 : if (node->ndoc != 0)
2471 : {
2472 : /* return entry itself: we already was at left sublink */
2473 2262 : return node;
2474 : }
2475 6890 : else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2476 : {
2477 : /* go on right sublink */
2478 2314 : stat->stackpos++;
2479 2314 : node = node->right;
2480 :
2481 : /* find most-left value */
2482 : for (;;)
2483 : {
2484 4544 : stat->stack[stat->stackpos] = node;
2485 4544 : if (node->left)
2486 : {
2487 2230 : stat->stackpos++;
2488 2230 : node = node->left;
2489 : }
2490 : else
2491 2314 : break;
2492 : }
2493 2314 : Assert(stat->stackpos <= stat->maxdepth);
2494 : }
2495 : else
2496 : {
2497 : /* we already return all left subtree, itself and right subtree */
2498 4576 : if (stat->stackpos == 0)
2499 8 : return NULL;
2500 :
2501 4568 : stat->stackpos--;
2502 4568 : return walkStatEntryTree(stat);
2503 : }
2504 :
2505 2314 : return node;
2506 : }
2507 :
2508 : static Datum
2509 4584 : ts_process_call(FuncCallContext *funcctx)
2510 : {
2511 : TSVectorStat *st;
2512 : StatEntry *entry;
2513 :
2514 4584 : st = (TSVectorStat *) funcctx->user_fctx;
2515 :
2516 4584 : entry = walkStatEntryTree(st);
2517 :
2518 4584 : if (entry != NULL)
2519 : {
2520 : Datum result;
2521 : char *values[3];
2522 : char ndoc[16];
2523 : char nentry[16];
2524 : HeapTuple tuple;
2525 :
2526 4576 : values[0] = palloc(entry->lenlexeme + 1);
2527 4576 : memcpy(values[0], entry->lexeme, entry->lenlexeme);
2528 4576 : (values[0])[entry->lenlexeme] = '\0';
2529 4576 : sprintf(ndoc, "%d", entry->ndoc);
2530 4576 : values[1] = ndoc;
2531 4576 : sprintf(nentry, "%d", entry->nentry);
2532 4576 : values[2] = nentry;
2533 :
2534 4576 : tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2535 4576 : result = HeapTupleGetDatum(tuple);
2536 :
2537 4576 : pfree(values[0]);
2538 :
2539 : /* mark entry as already visited */
2540 4576 : entry->ndoc = 0;
2541 :
2542 4576 : return result;
2543 : }
2544 :
2545 8 : return (Datum) 0;
2546 : }
2547 :
2548 : static TSVectorStat *
2549 8 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2550 : {
2551 8 : char *query = text_to_cstring(txt);
2552 : TSVectorStat *stat;
2553 : bool isnull;
2554 : Portal portal;
2555 : SPIPlanPtr plan;
2556 :
2557 8 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2558 : /* internal error */
2559 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2560 :
2561 8 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2562 : /* internal error */
2563 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2564 :
2565 8 : SPI_cursor_fetch(portal, true, 100);
2566 :
2567 8 : if (SPI_tuptable == NULL ||
2568 8 : SPI_tuptable->tupdesc->natts != 1 ||
2569 8 : !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
2570 : TSVECTOROID))
2571 0 : ereport(ERROR,
2572 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2573 : errmsg("ts_stat query must return one tsvector column")));
2574 :
2575 8 : stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2576 8 : stat->maxdepth = 1;
2577 :
2578 8 : if (ws)
2579 : {
2580 : char *buf;
2581 : const char *end;
2582 :
2583 4 : buf = VARDATA_ANY(ws);
2584 4 : end = buf + VARSIZE_ANY_EXHDR(ws);
2585 12 : while (buf < end)
2586 : {
2587 8 : int len = pg_mblen_range(buf, end);
2588 :
2589 8 : if (len == 1)
2590 : {
2591 8 : switch (*buf)
2592 : {
2593 4 : case 'A':
2594 : case 'a':
2595 4 : stat->weight |= 1 << 3;
2596 4 : break;
2597 4 : case 'B':
2598 : case 'b':
2599 4 : stat->weight |= 1 << 2;
2600 4 : break;
2601 0 : case 'C':
2602 : case 'c':
2603 0 : stat->weight |= 1 << 1;
2604 0 : break;
2605 0 : case 'D':
2606 : case 'd':
2607 0 : stat->weight |= 1;
2608 0 : break;
2609 0 : default:
2610 0 : stat->weight |= 0;
2611 : }
2612 : }
2613 8 : buf += len;
2614 : }
2615 : }
2616 :
2617 56 : while (SPI_processed > 0)
2618 : {
2619 : uint64 i;
2620 :
2621 4120 : for (i = 0; i < SPI_processed; i++)
2622 : {
2623 4072 : Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
2624 :
2625 4072 : if (!isnull)
2626 4072 : stat = ts_accum(persistentContext, stat, data);
2627 : }
2628 :
2629 48 : SPI_freetuptable(SPI_tuptable);
2630 48 : SPI_cursor_fetch(portal, true, 100);
2631 : }
2632 :
2633 8 : SPI_freetuptable(SPI_tuptable);
2634 8 : SPI_cursor_close(portal);
2635 8 : SPI_freeplan(plan);
2636 8 : pfree(query);
2637 :
2638 8 : return stat;
2639 : }
2640 :
2641 : Datum
2642 4576 : ts_stat1(PG_FUNCTION_ARGS)
2643 : {
2644 : FuncCallContext *funcctx;
2645 : Datum result;
2646 :
2647 4576 : if (SRF_IS_FIRSTCALL())
2648 : {
2649 : TSVectorStat *stat;
2650 4 : text *txt = PG_GETARG_TEXT_PP(0);
2651 :
2652 4 : funcctx = SRF_FIRSTCALL_INIT();
2653 4 : SPI_connect();
2654 4 : stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2655 4 : PG_FREE_IF_COPY(txt, 0);
2656 4 : ts_setup_firstcall(fcinfo, funcctx, stat);
2657 4 : SPI_finish();
2658 : }
2659 :
2660 4576 : funcctx = SRF_PERCALL_SETUP();
2661 4576 : if ((result = ts_process_call(funcctx)) != (Datum) 0)
2662 4572 : SRF_RETURN_NEXT(funcctx, result);
2663 4 : SRF_RETURN_DONE(funcctx);
2664 : }
2665 :
2666 : Datum
2667 8 : ts_stat2(PG_FUNCTION_ARGS)
2668 : {
2669 : FuncCallContext *funcctx;
2670 : Datum result;
2671 :
2672 8 : if (SRF_IS_FIRSTCALL())
2673 : {
2674 : TSVectorStat *stat;
2675 4 : text *txt = PG_GETARG_TEXT_PP(0);
2676 4 : text *ws = PG_GETARG_TEXT_PP(1);
2677 :
2678 4 : funcctx = SRF_FIRSTCALL_INIT();
2679 4 : SPI_connect();
2680 4 : stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2681 4 : PG_FREE_IF_COPY(txt, 0);
2682 4 : PG_FREE_IF_COPY(ws, 1);
2683 4 : ts_setup_firstcall(fcinfo, funcctx, stat);
2684 4 : SPI_finish();
2685 : }
2686 :
2687 8 : funcctx = SRF_PERCALL_SETUP();
2688 8 : if ((result = ts_process_call(funcctx)) != (Datum) 0)
2689 4 : SRF_RETURN_NEXT(funcctx, result);
2690 4 : SRF_RETURN_DONE(funcctx);
2691 : }
2692 :
2693 :
2694 : /*
2695 : * Triggers for automatic update of a tsvector column from text column(s)
2696 : *
2697 : * Trigger arguments are either
2698 : * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2699 : * name of tsvector col, name of regconfig col, name(s) of text col(s)
2700 : * ie, tsconfig can either be specified by name, or indirectly as the
2701 : * contents of a regconfig field in the row. If the name is used, it must
2702 : * be explicitly schema-qualified.
2703 : */
2704 : Datum
2705 12 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
2706 : {
2707 12 : return tsvector_update_trigger(fcinfo, false);
2708 : }
2709 :
2710 : Datum
2711 0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
2712 : {
2713 0 : return tsvector_update_trigger(fcinfo, true);
2714 : }
2715 :
2716 : static Datum
2717 12 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
2718 : {
2719 : TriggerData *trigdata;
2720 : Trigger *trigger;
2721 : Relation rel;
2722 12 : HeapTuple rettuple = NULL;
2723 : int tsvector_attr_num,
2724 : i;
2725 : ParsedText prs;
2726 : Datum datum;
2727 : bool isnull;
2728 : text *txt;
2729 : Oid cfgId;
2730 : bool update_needed;
2731 :
2732 : /* Check call context */
2733 12 : if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2734 0 : elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2735 :
2736 12 : trigdata = (TriggerData *) fcinfo->context;
2737 12 : if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2738 0 : elog(ERROR, "tsvector_update_trigger: must be fired for row");
2739 12 : if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2740 0 : elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2741 :
2742 12 : if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2743 : {
2744 8 : rettuple = trigdata->tg_trigtuple;
2745 8 : update_needed = true;
2746 : }
2747 4 : else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2748 : {
2749 4 : rettuple = trigdata->tg_newtuple;
2750 4 : update_needed = false; /* computed below */
2751 : }
2752 : else
2753 0 : elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2754 :
2755 12 : trigger = trigdata->tg_trigger;
2756 12 : rel = trigdata->tg_relation;
2757 :
2758 12 : if (trigger->tgnargs < 3)
2759 0 : elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2760 :
2761 : /* Find the target tsvector column */
2762 12 : tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2763 12 : if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2764 0 : ereport(ERROR,
2765 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2766 : errmsg("tsvector column \"%s\" does not exist",
2767 : trigger->tgargs[0])));
2768 : /* This will effectively reject system columns, so no separate test: */
2769 12 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2770 : TSVECTOROID))
2771 0 : ereport(ERROR,
2772 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2773 : errmsg("column \"%s\" is not of tsvector type",
2774 : trigger->tgargs[0])));
2775 :
2776 : /* Find the configuration to use */
2777 12 : if (config_column)
2778 : {
2779 : int config_attr_num;
2780 :
2781 0 : config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2782 0 : if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2783 0 : ereport(ERROR,
2784 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2785 : errmsg("configuration column \"%s\" does not exist",
2786 : trigger->tgargs[1])));
2787 0 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2788 : REGCONFIGOID))
2789 0 : ereport(ERROR,
2790 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2791 : errmsg("column \"%s\" is not of regconfig type",
2792 : trigger->tgargs[1])));
2793 :
2794 0 : datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2795 0 : if (isnull)
2796 0 : ereport(ERROR,
2797 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2798 : errmsg("configuration column \"%s\" must not be null",
2799 : trigger->tgargs[1])));
2800 0 : cfgId = DatumGetObjectId(datum);
2801 : }
2802 : else
2803 : {
2804 : List *names;
2805 :
2806 12 : names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
2807 : /* require a schema so that results are not search path dependent */
2808 12 : if (list_length(names) < 2)
2809 0 : ereport(ERROR,
2810 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2811 : errmsg("text search configuration name \"%s\" must be schema-qualified",
2812 : trigger->tgargs[1])));
2813 12 : cfgId = get_ts_config_oid(names, false);
2814 : }
2815 :
2816 : /* initialize parse state */
2817 12 : prs.lenwords = 32;
2818 12 : prs.curwords = 0;
2819 12 : prs.pos = 0;
2820 12 : prs.words = palloc_array(ParsedWord, prs.lenwords);
2821 :
2822 : /* find all words in indexable column(s) */
2823 24 : for (i = 2; i < trigger->tgnargs; i++)
2824 : {
2825 : int numattr;
2826 :
2827 12 : numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2828 12 : if (numattr == SPI_ERROR_NOATTRIBUTE)
2829 0 : ereport(ERROR,
2830 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2831 : errmsg("column \"%s\" does not exist",
2832 : trigger->tgargs[i])));
2833 12 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2834 0 : ereport(ERROR,
2835 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2836 : errmsg("column \"%s\" is not of a character type",
2837 : trigger->tgargs[i])));
2838 :
2839 12 : if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
2840 4 : update_needed = true;
2841 :
2842 12 : datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2843 12 : if (isnull)
2844 4 : continue;
2845 :
2846 8 : txt = DatumGetTextPP(datum);
2847 :
2848 8 : parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2849 :
2850 8 : if (txt != (text *) DatumGetPointer(datum))
2851 0 : pfree(txt);
2852 : }
2853 :
2854 12 : if (update_needed)
2855 : {
2856 : /* make tsvector value */
2857 12 : datum = TSVectorGetDatum(make_tsvector(&prs));
2858 12 : isnull = false;
2859 :
2860 : /* and insert it into tuple */
2861 12 : rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2862 : 1, &tsvector_attr_num,
2863 : &datum, &isnull);
2864 :
2865 12 : pfree(DatumGetPointer(datum));
2866 : }
2867 :
2868 12 : return PointerGetDatum(rettuple);
2869 : }
|