Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsginidx.c
4 : * GIN support functions for tsvector_ops
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsginidx.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/gin.h"
17 : #include "tsearch/ts_type.h"
18 : #include "tsearch/ts_utils.h"
19 : #include "utils/builtins.h"
20 : #include "varatt.h"
21 :
22 :
23 : Datum
24 1810072 : gin_cmp_tslexeme(PG_FUNCTION_ARGS)
25 : {
26 1810072 : text *a = PG_GETARG_TEXT_PP(0);
27 1810072 : text *b = PG_GETARG_TEXT_PP(1);
28 : int cmp;
29 :
30 3620144 : cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
31 3620144 : VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
32 : false);
33 :
34 1810072 : PG_FREE_IF_COPY(a, 0);
35 1810072 : PG_FREE_IF_COPY(b, 1);
36 1810072 : PG_RETURN_INT32(cmp);
37 : }
38 :
39 : Datum
40 444 : gin_cmp_prefix(PG_FUNCTION_ARGS)
41 : {
42 444 : text *a = PG_GETARG_TEXT_PP(0);
43 444 : text *b = PG_GETARG_TEXT_PP(1);
44 :
45 : #ifdef NOT_USED
46 : StrategyNumber strategy = PG_GETARG_UINT16(2);
47 : Pointer extra_data = PG_GETARG_POINTER(3);
48 : #endif
49 : int cmp;
50 :
51 888 : cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
52 888 : VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
53 : true);
54 :
55 444 : if (cmp < 0)
56 12 : cmp = 1; /* prevent continue scan */
57 :
58 444 : PG_FREE_IF_COPY(a, 0);
59 444 : PG_FREE_IF_COPY(b, 1);
60 444 : PG_RETURN_INT32(cmp);
61 : }
62 :
63 : Datum
64 3096 : gin_extract_tsvector(PG_FUNCTION_ARGS)
65 : {
66 3096 : TSVector vector = PG_GETARG_TSVECTOR(0);
67 3096 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
68 3096 : Datum *entries = NULL;
69 :
70 3096 : *nentries = vector->size;
71 3096 : if (vector->size > 0)
72 : {
73 : int i;
74 3042 : WordEntry *we = ARRPTR(vector);
75 :
76 3042 : entries = (Datum *) palloc(sizeof(Datum) * vector->size);
77 :
78 175974 : for (i = 0; i < vector->size; i++)
79 : {
80 : text *txt;
81 :
82 172932 : txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
83 172932 : entries[i] = PointerGetDatum(txt);
84 :
85 172932 : we++;
86 : }
87 : }
88 :
89 3096 : PG_FREE_IF_COPY(vector, 0);
90 3096 : PG_RETURN_POINTER(entries);
91 : }
92 :
93 : Datum
94 450 : gin_extract_tsquery(PG_FUNCTION_ARGS)
95 : {
96 450 : TSQuery query = PG_GETARG_TSQUERY(0);
97 450 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
98 :
99 : /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
100 450 : bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
101 450 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
102 :
103 : /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
104 450 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
105 450 : Datum *entries = NULL;
106 :
107 450 : *nentries = 0;
108 :
109 450 : if (query->size > 0)
110 : {
111 450 : QueryItem *item = GETQUERY(query);
112 : int32 i,
113 : j;
114 : bool *partialmatch;
115 : int *map_item_operand;
116 :
117 : /*
118 : * If the query doesn't have any required positive matches (for
119 : * instance, it's something like '! foo'), we have to do a full index
120 : * scan.
121 : */
122 450 : if (tsquery_requires_match(item))
123 330 : *searchMode = GIN_SEARCH_MODE_DEFAULT;
124 : else
125 120 : *searchMode = GIN_SEARCH_MODE_ALL;
126 :
127 : /* count number of VAL items */
128 450 : j = 0;
129 1704 : for (i = 0; i < query->size; i++)
130 : {
131 1254 : if (item[i].type == QI_VAL)
132 768 : j++;
133 : }
134 450 : *nentries = j;
135 :
136 450 : entries = (Datum *) palloc(sizeof(Datum) * j);
137 450 : partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
138 :
139 : /*
140 : * Make map to convert item's number to corresponding operand's (the
141 : * same, entry's) number. Entry's number is used in check array in
142 : * consistent method. We use the same map for each entry.
143 : */
144 450 : *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
145 450 : map_item_operand = (int *) palloc0(sizeof(int) * query->size);
146 :
147 : /* Now rescan the VAL items and fill in the arrays */
148 450 : j = 0;
149 1704 : for (i = 0; i < query->size; i++)
150 : {
151 1254 : if (item[i].type == QI_VAL)
152 : {
153 768 : QueryOperand *val = &item[i].qoperand;
154 : text *txt;
155 :
156 768 : txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
157 768 : val->length);
158 768 : entries[j] = PointerGetDatum(txt);
159 768 : partialmatch[j] = val->prefix;
160 768 : (*extra_data)[j] = (Pointer) map_item_operand;
161 768 : map_item_operand[i] = j;
162 768 : j++;
163 : }
164 : }
165 : }
166 :
167 450 : PG_FREE_IF_COPY(query, 0);
168 :
169 450 : PG_RETURN_POINTER(entries);
170 : }
171 :
172 : typedef struct
173 : {
174 : QueryItem *first_item;
175 : GinTernaryValue *check;
176 : int *map_item_operand;
177 : } GinChkVal;
178 :
179 : /*
180 : * TS_execute callback for matching a tsquery operand to GIN index data
181 : */
182 : static TSTernaryValue
183 48366 : checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
184 : {
185 48366 : GinChkVal *gcv = (GinChkVal *) checkval;
186 : int j;
187 : GinTernaryValue result;
188 :
189 : /* convert item's number to corresponding entry's (operand's) number */
190 48366 : j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
191 :
192 : /* determine presence of current entry in indexed value */
193 48366 : result = gcv->check[j];
194 :
195 : /*
196 : * If any val requiring a weight is used or caller needs position
197 : * information then we must recheck, so replace TRUE with MAYBE.
198 : */
199 48366 : if (result == GIN_TRUE)
200 : {
201 15654 : if (val->weight != 0 || data != NULL)
202 6498 : result = GIN_MAYBE;
203 : }
204 :
205 : /*
206 : * We rely on GinTernaryValue and TSTernaryValue using equivalent value
207 : * assignments. We could use a switch statement to map the values if that
208 : * ever stops being true, but it seems unlikely to happen.
209 : */
210 48366 : return (TSTernaryValue) result;
211 : }
212 :
213 : Datum
214 24 : gin_tsquery_consistent(PG_FUNCTION_ARGS)
215 : {
216 24 : bool *check = (bool *) PG_GETARG_POINTER(0);
217 :
218 : /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
219 24 : TSQuery query = PG_GETARG_TSQUERY(2);
220 :
221 : /* int32 nkeys = PG_GETARG_INT32(3); */
222 24 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
223 24 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
224 24 : bool res = false;
225 :
226 : /* Initially assume query doesn't require recheck */
227 24 : *recheck = false;
228 :
229 24 : if (query->size > 0)
230 : {
231 : GinChkVal gcv;
232 :
233 : /*
234 : * check-parameter array has one entry for each value (operand) in the
235 : * query.
236 : */
237 24 : gcv.first_item = GETQUERY(query);
238 24 : gcv.check = (GinTernaryValue *) check;
239 24 : gcv.map_item_operand = (int *) (extra_data[0]);
240 :
241 24 : switch (TS_execute_ternary(GETQUERY(query),
242 : &gcv,
243 : TS_EXEC_PHRASE_NO_POS,
244 : checkcondition_gin))
245 : {
246 0 : case TS_NO:
247 0 : res = false;
248 0 : break;
249 24 : case TS_YES:
250 24 : res = true;
251 24 : break;
252 0 : case TS_MAYBE:
253 0 : res = true;
254 0 : *recheck = true;
255 0 : break;
256 : }
257 24 : }
258 :
259 24 : PG_RETURN_BOOL(res);
260 : }
261 :
262 : Datum
263 36918 : gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
264 : {
265 36918 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
266 :
267 : /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
268 36918 : TSQuery query = PG_GETARG_TSQUERY(2);
269 :
270 : /* int32 nkeys = PG_GETARG_INT32(3); */
271 36918 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
272 36918 : GinTernaryValue res = GIN_FALSE;
273 :
274 36918 : if (query->size > 0)
275 : {
276 : GinChkVal gcv;
277 :
278 : /*
279 : * check-parameter array has one entry for each value (operand) in the
280 : * query.
281 : */
282 36918 : gcv.first_item = GETQUERY(query);
283 36918 : gcv.check = check;
284 36918 : gcv.map_item_operand = (int *) (extra_data[0]);
285 :
286 36918 : res = TS_execute_ternary(GETQUERY(query),
287 : &gcv,
288 : TS_EXEC_PHRASE_NO_POS,
289 : checkcondition_gin);
290 : }
291 :
292 36918 : PG_RETURN_GIN_TERNARY_VALUE(res);
293 : }
294 :
295 : /*
296 : * Formerly, gin_extract_tsvector had only two arguments. Now it has three,
297 : * but we still need a pg_proc entry with two args to support reloading
298 : * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility
299 : * function should go away eventually. (Note: you might say "hey, but the
300 : * code above is only *using* two args, so let's just declare it that way".
301 : * If you try that you'll find the opr_sanity regression test complains.)
302 : */
303 : Datum
304 0 : gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
305 : {
306 0 : if (PG_NARGS() < 3) /* should not happen */
307 0 : elog(ERROR, "gin_extract_tsvector requires three arguments");
308 0 : return gin_extract_tsvector(fcinfo);
309 : }
310 :
311 : /*
312 : * Likewise, we need a stub version of gin_extract_tsquery declared with
313 : * only five arguments.
314 : */
315 : Datum
316 0 : gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
317 : {
318 0 : if (PG_NARGS() < 7) /* should not happen */
319 0 : elog(ERROR, "gin_extract_tsquery requires seven arguments");
320 0 : return gin_extract_tsquery(fcinfo);
321 : }
322 :
323 : /*
324 : * Likewise, we need a stub version of gin_tsquery_consistent declared with
325 : * only six arguments.
326 : */
327 : Datum
328 0 : gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
329 : {
330 0 : if (PG_NARGS() < 8) /* should not happen */
331 0 : elog(ERROR, "gin_tsquery_consistent requires eight arguments");
332 0 : return gin_tsquery_consistent(fcinfo);
333 : }
334 :
335 : /*
336 : * Likewise, a stub version of gin_extract_tsquery declared with argument
337 : * types that are no longer considered appropriate.
338 : */
339 : Datum
340 0 : gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
341 : {
342 0 : return gin_extract_tsquery(fcinfo);
343 : }
344 :
345 : /*
346 : * Likewise, a stub version of gin_tsquery_consistent declared with argument
347 : * types that are no longer considered appropriate.
348 : */
349 : Datum
350 0 : gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
351 : {
352 0 : return gin_tsquery_consistent(fcinfo);
353 : }
|