Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsginidx.c
4 : * GIN support functions for tsvector_ops
5 : *
6 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsginidx.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/gin.h"
17 : #include "access/stratnum.h"
18 : #include "miscadmin.h"
19 : #include "tsearch/ts_type.h"
20 : #include "tsearch/ts_utils.h"
21 : #include "utils/builtins.h"
22 :
23 :
24 : Datum
25 1809920 : gin_cmp_tslexeme(PG_FUNCTION_ARGS)
26 : {
27 1809920 : text *a = PG_GETARG_TEXT_PP(0);
28 1809920 : text *b = PG_GETARG_TEXT_PP(1);
29 : int cmp;
30 :
31 3619840 : cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
32 3619840 : VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
33 : false);
34 :
35 1809920 : PG_FREE_IF_COPY(a, 0);
36 1809920 : PG_FREE_IF_COPY(b, 1);
37 1809920 : PG_RETURN_INT32(cmp);
38 : }
39 :
40 : Datum
41 444 : gin_cmp_prefix(PG_FUNCTION_ARGS)
42 : {
43 444 : text *a = PG_GETARG_TEXT_PP(0);
44 444 : text *b = PG_GETARG_TEXT_PP(1);
45 :
46 : #ifdef NOT_USED
47 : StrategyNumber strategy = PG_GETARG_UINT16(2);
48 : Pointer extra_data = PG_GETARG_POINTER(3);
49 : #endif
50 : int cmp;
51 :
52 888 : cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
53 888 : VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
54 : true);
55 :
56 444 : if (cmp < 0)
57 12 : cmp = 1; /* prevent continue scan */
58 :
59 444 : PG_FREE_IF_COPY(a, 0);
60 444 : PG_FREE_IF_COPY(b, 1);
61 444 : PG_RETURN_INT32(cmp);
62 : }
63 :
64 : Datum
65 3096 : gin_extract_tsvector(PG_FUNCTION_ARGS)
66 : {
67 3096 : TSVector vector = PG_GETARG_TSVECTOR(0);
68 3096 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
69 3096 : Datum *entries = NULL;
70 :
71 3096 : *nentries = vector->size;
72 3096 : if (vector->size > 0)
73 : {
74 : int i;
75 3042 : WordEntry *we = ARRPTR(vector);
76 :
77 3042 : entries = (Datum *) palloc(sizeof(Datum) * vector->size);
78 :
79 175974 : for (i = 0; i < vector->size; i++)
80 : {
81 : text *txt;
82 :
83 172932 : txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
84 172932 : entries[i] = PointerGetDatum(txt);
85 :
86 172932 : we++;
87 : }
88 : }
89 :
90 3096 : PG_FREE_IF_COPY(vector, 0);
91 3096 : PG_RETURN_POINTER(entries);
92 : }
93 :
94 : Datum
95 450 : gin_extract_tsquery(PG_FUNCTION_ARGS)
96 : {
97 450 : TSQuery query = PG_GETARG_TSQUERY(0);
98 450 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
99 :
100 : /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
101 450 : bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
102 450 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
103 :
104 : /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
105 450 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
106 450 : Datum *entries = NULL;
107 :
108 450 : *nentries = 0;
109 :
110 450 : if (query->size > 0)
111 : {
112 450 : QueryItem *item = GETQUERY(query);
113 : int32 i,
114 : j;
115 : bool *partialmatch;
116 : int *map_item_operand;
117 :
118 : /*
119 : * If the query doesn't have any required positive matches (for
120 : * instance, it's something like '! foo'), we have to do a full index
121 : * scan.
122 : */
123 450 : if (tsquery_requires_match(item))
124 330 : *searchMode = GIN_SEARCH_MODE_DEFAULT;
125 : else
126 120 : *searchMode = GIN_SEARCH_MODE_ALL;
127 :
128 : /* count number of VAL items */
129 450 : j = 0;
130 1704 : for (i = 0; i < query->size; i++)
131 : {
132 1254 : if (item[i].type == QI_VAL)
133 768 : j++;
134 : }
135 450 : *nentries = j;
136 :
137 450 : entries = (Datum *) palloc(sizeof(Datum) * j);
138 450 : partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
139 :
140 : /*
141 : * Make map to convert item's number to corresponding operand's (the
142 : * same, entry's) number. Entry's number is used in check array in
143 : * consistent method. We use the same map for each entry.
144 : */
145 450 : *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
146 450 : map_item_operand = (int *) palloc0(sizeof(int) * query->size);
147 :
148 : /* Now rescan the VAL items and fill in the arrays */
149 450 : j = 0;
150 1704 : for (i = 0; i < query->size; i++)
151 : {
152 1254 : if (item[i].type == QI_VAL)
153 : {
154 768 : QueryOperand *val = &item[i].qoperand;
155 : text *txt;
156 :
157 768 : txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
158 768 : val->length);
159 768 : entries[j] = PointerGetDatum(txt);
160 768 : partialmatch[j] = val->prefix;
161 768 : (*extra_data)[j] = (Pointer) map_item_operand;
162 768 : map_item_operand[i] = j;
163 768 : j++;
164 : }
165 : }
166 : }
167 :
168 450 : PG_FREE_IF_COPY(query, 0);
169 :
170 450 : PG_RETURN_POINTER(entries);
171 : }
172 :
173 : typedef struct
174 : {
175 : QueryItem *first_item;
176 : GinTernaryValue *check;
177 : int *map_item_operand;
178 : } GinChkVal;
179 :
180 : /*
181 : * TS_execute callback for matching a tsquery operand to GIN index data
182 : */
183 : static TSTernaryValue
184 48366 : checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
185 : {
186 48366 : GinChkVal *gcv = (GinChkVal *) checkval;
187 : int j;
188 : GinTernaryValue result;
189 :
190 : /* convert item's number to corresponding entry's (operand's) number */
191 48366 : j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
192 :
193 : /* determine presence of current entry in indexed value */
194 48366 : result = gcv->check[j];
195 :
196 : /*
197 : * If any val requiring a weight is used or caller needs position
198 : * information then we must recheck, so replace TRUE with MAYBE.
199 : */
200 48366 : if (result == GIN_TRUE)
201 : {
202 15654 : if (val->weight != 0 || data != NULL)
203 6498 : result = GIN_MAYBE;
204 : }
205 :
206 : /*
207 : * We rely on GinTernaryValue and TSTernaryValue using equivalent value
208 : * assignments. We could use a switch statement to map the values if that
209 : * ever stops being true, but it seems unlikely to happen.
210 : */
211 48366 : return (TSTernaryValue) result;
212 : }
213 :
214 : Datum
215 24 : gin_tsquery_consistent(PG_FUNCTION_ARGS)
216 : {
217 24 : bool *check = (bool *) PG_GETARG_POINTER(0);
218 :
219 : /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
220 24 : TSQuery query = PG_GETARG_TSQUERY(2);
221 :
222 : /* int32 nkeys = PG_GETARG_INT32(3); */
223 24 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
224 24 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
225 24 : bool res = false;
226 :
227 : /* Initially assume query doesn't require recheck */
228 24 : *recheck = false;
229 :
230 24 : if (query->size > 0)
231 : {
232 : GinChkVal gcv;
233 :
234 : /*
235 : * check-parameter array has one entry for each value (operand) in the
236 : * query.
237 : */
238 24 : gcv.first_item = GETQUERY(query);
239 : StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
240 : "sizes of GinTernaryValue and bool are not equal");
241 24 : gcv.check = (GinTernaryValue *) check;
242 24 : gcv.map_item_operand = (int *) (extra_data[0]);
243 :
244 24 : switch (TS_execute_ternary(GETQUERY(query),
245 : &gcv,
246 : TS_EXEC_PHRASE_NO_POS,
247 : checkcondition_gin))
248 : {
249 0 : case TS_NO:
250 0 : res = false;
251 0 : break;
252 24 : case TS_YES:
253 24 : res = true;
254 24 : break;
255 0 : case TS_MAYBE:
256 0 : res = true;
257 0 : *recheck = true;
258 0 : break;
259 : }
260 24 : }
261 :
262 24 : PG_RETURN_BOOL(res);
263 : }
264 :
265 : Datum
266 36918 : gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
267 : {
268 36918 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
269 :
270 : /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
271 36918 : TSQuery query = PG_GETARG_TSQUERY(2);
272 :
273 : /* int32 nkeys = PG_GETARG_INT32(3); */
274 36918 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
275 36918 : GinTernaryValue res = GIN_FALSE;
276 :
277 36918 : if (query->size > 0)
278 : {
279 : GinChkVal gcv;
280 :
281 : /*
282 : * check-parameter array has one entry for each value (operand) in the
283 : * query.
284 : */
285 36918 : gcv.first_item = GETQUERY(query);
286 36918 : gcv.check = check;
287 36918 : gcv.map_item_operand = (int *) (extra_data[0]);
288 :
289 36918 : res = TS_execute_ternary(GETQUERY(query),
290 : &gcv,
291 : TS_EXEC_PHRASE_NO_POS,
292 : checkcondition_gin);
293 : }
294 :
295 36918 : PG_RETURN_GIN_TERNARY_VALUE(res);
296 : }
297 :
298 : /*
299 : * Formerly, gin_extract_tsvector had only two arguments. Now it has three,
300 : * but we still need a pg_proc entry with two args to support reloading
301 : * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility
302 : * function should go away eventually. (Note: you might say "hey, but the
303 : * code above is only *using* two args, so let's just declare it that way".
304 : * If you try that you'll find the opr_sanity regression test complains.)
305 : */
306 : Datum
307 0 : gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
308 : {
309 0 : if (PG_NARGS() < 3) /* should not happen */
310 0 : elog(ERROR, "gin_extract_tsvector requires three arguments");
311 0 : return gin_extract_tsvector(fcinfo);
312 : }
313 :
314 : /*
315 : * Likewise, we need a stub version of gin_extract_tsquery declared with
316 : * only five arguments.
317 : */
318 : Datum
319 0 : gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
320 : {
321 0 : if (PG_NARGS() < 7) /* should not happen */
322 0 : elog(ERROR, "gin_extract_tsquery requires seven arguments");
323 0 : return gin_extract_tsquery(fcinfo);
324 : }
325 :
326 : /*
327 : * Likewise, we need a stub version of gin_tsquery_consistent declared with
328 : * only six arguments.
329 : */
330 : Datum
331 0 : gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
332 : {
333 0 : if (PG_NARGS() < 8) /* should not happen */
334 0 : elog(ERROR, "gin_tsquery_consistent requires eight arguments");
335 0 : return gin_tsquery_consistent(fcinfo);
336 : }
337 :
338 : /*
339 : * Likewise, a stub version of gin_extract_tsquery declared with argument
340 : * types that are no longer considered appropriate.
341 : */
342 : Datum
343 0 : gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
344 : {
345 0 : return gin_extract_tsquery(fcinfo);
346 : }
347 :
348 : /*
349 : * Likewise, a stub version of gin_tsquery_consistent declared with argument
350 : * types that are no longer considered appropriate.
351 : */
352 : Datum
353 0 : gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
354 : {
355 0 : return gin_tsquery_consistent(fcinfo);
356 : }
|