Line data Source code
1 : /*
2 : * contrib/pg_trgm/trgm_gin.c
3 : */
4 : #include "postgres.h"
5 :
6 : #include "access/gin.h"
7 : #include "access/stratnum.h"
8 : #include "fmgr.h"
9 : #include "trgm.h"
10 : #include "varatt.h"
11 :
12 0 : PG_FUNCTION_INFO_V1(gin_extract_trgm);
13 8 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
14 8 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
15 8 : PG_FUNCTION_INFO_V1(gin_trgm_consistent);
16 8 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
17 :
18 : /*
19 : * This function can only be called if a pre-9.1 version of the GIN operator
20 : * class definition is present in the catalogs (probably as a consequence
21 : * of upgrade-in-place). Cope.
22 : */
23 : Datum
24 0 : gin_extract_trgm(PG_FUNCTION_ARGS)
25 : {
26 0 : if (PG_NARGS() == 3)
27 0 : return gin_extract_value_trgm(fcinfo);
28 0 : if (PG_NARGS() == 7)
29 0 : return gin_extract_query_trgm(fcinfo);
30 0 : elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
31 : PG_RETURN_NULL();
32 : }
33 :
34 : Datum
35 4808 : gin_extract_value_trgm(PG_FUNCTION_ARGS)
36 : {
37 4808 : text *val = (text *) PG_GETARG_TEXT_PP(0);
38 4808 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
39 4808 : Datum *entries = NULL;
40 : TRGM *trg;
41 : int32 trglen;
42 :
43 4808 : *nentries = 0;
44 :
45 4808 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
46 4808 : trglen = ARRNELEM(trg);
47 :
48 4808 : if (trglen > 0)
49 : {
50 : trgm *ptr;
51 : int32 i;
52 :
53 4808 : *nentries = trglen;
54 4808 : entries = palloc_array(Datum, trglen);
55 :
56 4808 : ptr = GETARR(trg);
57 71262 : for (i = 0; i < trglen; i++)
58 : {
59 66454 : int32 item = trgm2int(ptr);
60 :
61 66454 : entries[i] = Int32GetDatum(item);
62 66454 : ptr++;
63 : }
64 : }
65 :
66 4808 : PG_RETURN_POINTER(entries);
67 : }
68 :
69 : Datum
70 376 : gin_extract_query_trgm(PG_FUNCTION_ARGS)
71 : {
72 376 : text *val = (text *) PG_GETARG_TEXT_PP(0);
73 376 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
74 376 : StrategyNumber strategy = PG_GETARG_UINT16(2);
75 : #ifdef NOT_USED
76 : bool **pmatch = (bool **) PG_GETARG_POINTER(3);
77 : #endif
78 376 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79 : #ifdef NOT_USED
80 : bool **nullFlags = (bool **) PG_GETARG_POINTER(5);
81 : #endif
82 376 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
83 376 : Datum *entries = NULL;
84 : TRGM *trg;
85 : int32 trglen;
86 : trgm *ptr;
87 : TrgmPackedGraph *graph;
88 : int32 i;
89 :
90 376 : switch (strategy)
91 : {
92 192 : case SimilarityStrategyNumber:
93 : case WordSimilarityStrategyNumber:
94 : case StrictWordSimilarityStrategyNumber:
95 : case EqualStrategyNumber:
96 192 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
97 192 : break;
98 96 : case ILikeStrategyNumber:
99 : #ifndef IGNORECASE
100 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
101 : #endif
102 : /* FALL THRU */
103 : case LikeStrategyNumber:
104 :
105 : /*
106 : * For wildcard search we extract all the trigrams that every
107 : * potentially-matching string must include.
108 : */
109 96 : trg = generate_wildcard_trgm(VARDATA_ANY(val),
110 96 : VARSIZE_ANY_EXHDR(val));
111 96 : break;
112 88 : case RegExpICaseStrategyNumber:
113 : #ifndef IGNORECASE
114 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
115 : #endif
116 : /* FALL THRU */
117 : case RegExpStrategyNumber:
118 88 : trg = createTrgmNFA(val, PG_GET_COLLATION(),
119 : &graph, CurrentMemoryContext);
120 88 : if (trg && ARRNELEM(trg) > 0)
121 : {
122 : /*
123 : * Successful regex processing: store NFA-like graph as
124 : * extra_data. GIN API requires an array of nentries
125 : * Pointers, but we just put the same value in each element.
126 : */
127 68 : trglen = ARRNELEM(trg);
128 68 : *extra_data = palloc_array(Pointer, trglen);
129 1696 : for (i = 0; i < trglen; i++)
130 1628 : (*extra_data)[i] = (Pointer) graph;
131 : }
132 : else
133 : {
134 : /* No result: have to do full index scan. */
135 20 : *nentries = 0;
136 20 : *searchMode = GIN_SEARCH_MODE_ALL;
137 20 : PG_RETURN_POINTER(entries);
138 : }
139 68 : break;
140 0 : default:
141 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
142 : trg = NULL; /* keep compiler quiet */
143 : break;
144 : }
145 :
146 356 : trglen = ARRNELEM(trg);
147 356 : *nentries = trglen;
148 :
149 356 : if (trglen > 0)
150 : {
151 292 : entries = palloc_array(Datum, trglen);
152 292 : ptr = GETARR(trg);
153 3496 : for (i = 0; i < trglen; i++)
154 : {
155 3204 : int32 item = trgm2int(ptr);
156 :
157 3204 : entries[i] = Int32GetDatum(item);
158 3204 : ptr++;
159 : }
160 : }
161 :
162 : /*
163 : * If no trigram was extracted then we have to scan all the index.
164 : */
165 356 : if (trglen == 0)
166 64 : *searchMode = GIN_SEARCH_MODE_ALL;
167 :
168 356 : PG_RETURN_POINTER(entries);
169 : }
170 :
171 : Datum
172 24 : gin_trgm_consistent(PG_FUNCTION_ARGS)
173 : {
174 24 : bool *check = (bool *) PG_GETARG_POINTER(0);
175 24 : StrategyNumber strategy = PG_GETARG_UINT16(1);
176 : #ifdef NOT_USED
177 : text *query = PG_GETARG_TEXT_PP(2);
178 : #endif
179 24 : int32 nkeys = PG_GETARG_INT32(3);
180 24 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
181 24 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
182 : bool res;
183 : int32 i,
184 : ntrue;
185 : double nlimit;
186 :
187 : /* All cases served by this function are inexact */
188 24 : *recheck = true;
189 :
190 24 : switch (strategy)
191 : {
192 8 : case SimilarityStrategyNumber:
193 : case WordSimilarityStrategyNumber:
194 : case StrictWordSimilarityStrategyNumber:
195 8 : nlimit = index_strategy_get_limit(strategy);
196 :
197 : /* Count the matches */
198 8 : ntrue = 0;
199 36 : for (i = 0; i < nkeys; i++)
200 : {
201 28 : if (check[i])
202 24 : ntrue++;
203 : }
204 :
205 : /*--------------------
206 : * If DIVUNION is defined then similarity formula is:
207 : * c / (len1 + len2 - c)
208 : * where c is number of common trigrams and it stands as ntrue in
209 : * this code. Here we don't know value of len2 but we can assume
210 : * that c (ntrue) is a lower bound of len2, so upper bound of
211 : * similarity is:
212 : * c / (len1 + c - c) => c / len1
213 : * If DIVUNION is not defined then similarity formula is:
214 : * c / max(len1, len2)
215 : * And again, c (ntrue) is a lower bound of len2, but c <= len1
216 : * just by definition and, consequently, upper bound of
217 : * similarity is just c / len1.
218 : * So, independently on DIVUNION the upper bound formula is the same.
219 : */
220 12 : res = (nkeys == 0) ? false :
221 4 : (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
222 8 : break;
223 16 : case ILikeStrategyNumber:
224 : #ifndef IGNORECASE
225 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
226 : #endif
227 : /* FALL THRU */
228 : case LikeStrategyNumber:
229 : case EqualStrategyNumber:
230 : /* Check if all extracted trigrams are presented. */
231 16 : res = true;
232 32 : for (i = 0; i < nkeys; i++)
233 : {
234 16 : if (!check[i])
235 : {
236 0 : res = false;
237 0 : break;
238 : }
239 : }
240 16 : break;
241 0 : case RegExpICaseStrategyNumber:
242 : #ifndef IGNORECASE
243 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
244 : #endif
245 : /* FALL THRU */
246 : case RegExpStrategyNumber:
247 0 : if (nkeys < 1)
248 : {
249 : /* Regex processing gave no result: do full index scan */
250 0 : res = true;
251 : }
252 : else
253 0 : res = trigramsMatchGraph(extra_data[0], check);
254 0 : break;
255 0 : default:
256 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
257 : res = false; /* keep compiler quiet */
258 : break;
259 : }
260 :
261 24 : PG_RETURN_BOOL(res);
262 : }
263 :
264 : /*
265 : * In all cases, GIN_TRUE is at least as favorable to inclusion as
266 : * GIN_MAYBE. If no better option is available, simply treat
267 : * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
268 : * consistent function.
269 : */
270 : Datum
271 32714 : gin_trgm_triconsistent(PG_FUNCTION_ARGS)
272 : {
273 32714 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
274 32714 : StrategyNumber strategy = PG_GETARG_UINT16(1);
275 : #ifdef NOT_USED
276 : text *query = PG_GETARG_TEXT_PP(2);
277 : #endif
278 32714 : int32 nkeys = PG_GETARG_INT32(3);
279 32714 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
280 32714 : GinTernaryValue res = GIN_MAYBE;
281 : int32 i,
282 : ntrue;
283 : bool *boolcheck;
284 : double nlimit;
285 :
286 32714 : switch (strategy)
287 : {
288 20394 : case SimilarityStrategyNumber:
289 : case WordSimilarityStrategyNumber:
290 : case StrictWordSimilarityStrategyNumber:
291 20394 : nlimit = index_strategy_get_limit(strategy);
292 :
293 : /* Count the matches */
294 20394 : ntrue = 0;
295 191914 : for (i = 0; i < nkeys; i++)
296 : {
297 171520 : if (check[i] != GIN_FALSE)
298 78404 : ntrue++;
299 : }
300 :
301 : /*
302 : * See comment in gin_trgm_consistent() about * upper bound
303 : * formula
304 : */
305 38788 : res = (nkeys == 0)
306 18394 : ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
307 : ? GIN_MAYBE : GIN_FALSE);
308 20394 : break;
309 8078 : case ILikeStrategyNumber:
310 : #ifndef IGNORECASE
311 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
312 : #endif
313 : /* FALL THRU */
314 : case LikeStrategyNumber:
315 : case EqualStrategyNumber:
316 : /* Check if all extracted trigrams are presented. */
317 8078 : res = GIN_MAYBE;
318 16350 : for (i = 0; i < nkeys; i++)
319 : {
320 8312 : if (check[i] == GIN_FALSE)
321 : {
322 40 : res = GIN_FALSE;
323 40 : break;
324 : }
325 : }
326 8078 : break;
327 4242 : case RegExpICaseStrategyNumber:
328 : #ifndef IGNORECASE
329 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
330 : #endif
331 : /* FALL THRU */
332 : case RegExpStrategyNumber:
333 4242 : if (nkeys < 1)
334 : {
335 : /* Regex processing gave no result: do full index scan */
336 1472 : res = GIN_MAYBE;
337 : }
338 : else
339 : {
340 : /*
341 : * As trigramsMatchGraph implements a monotonic boolean
342 : * function, promoting all GIN_MAYBE keys to GIN_TRUE will
343 : * give a conservative result.
344 : */
345 2770 : boolcheck = palloc_array(bool, nkeys);
346 638578 : for (i = 0; i < nkeys; i++)
347 635808 : boolcheck[i] = (check[i] != GIN_FALSE);
348 2770 : if (!trigramsMatchGraph(extra_data[0], boolcheck))
349 12 : res = GIN_FALSE;
350 2770 : pfree(boolcheck);
351 : }
352 4242 : break;
353 0 : default:
354 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
355 : res = GIN_FALSE; /* keep compiler quiet */
356 : break;
357 : }
358 :
359 : /* All cases served by this function are inexact */
360 : Assert(res != GIN_TRUE);
361 32714 : PG_RETURN_GIN_TERNARY_VALUE(res);
362 : }
|