Line data Source code
1 : /*
2 : * contrib/pg_trgm/trgm_gin.c
3 : */
4 : #include "postgres.h"
5 :
6 : #include "access/gin.h"
7 : #include "access/stratnum.h"
8 : #include "fmgr.h"
9 : #include "trgm.h"
10 : #include "varatt.h"
11 :
12 0 : PG_FUNCTION_INFO_V1(gin_extract_trgm);
13 8 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
14 8 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
15 8 : PG_FUNCTION_INFO_V1(gin_trgm_consistent);
16 8 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
17 :
18 : /*
19 : * This function can only be called if a pre-9.1 version of the GIN operator
20 : * class definition is present in the catalogs (probably as a consequence
21 : * of upgrade-in-place). Cope.
22 : */
23 : Datum
24 0 : gin_extract_trgm(PG_FUNCTION_ARGS)
25 : {
26 0 : if (PG_NARGS() == 3)
27 0 : return gin_extract_value_trgm(fcinfo);
28 0 : if (PG_NARGS() == 7)
29 0 : return gin_extract_query_trgm(fcinfo);
30 0 : elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
31 : PG_RETURN_NULL();
32 : }
33 :
34 : Datum
35 4808 : gin_extract_value_trgm(PG_FUNCTION_ARGS)
36 : {
37 4808 : text *val = (text *) PG_GETARG_TEXT_PP(0);
38 4808 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
39 4808 : Datum *entries = NULL;
40 : TRGM *trg;
41 : int32 trglen;
42 :
43 4808 : *nentries = 0;
44 :
45 4808 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
46 4808 : trglen = ARRNELEM(trg);
47 :
48 4808 : if (trglen > 0)
49 : {
50 : trgm *ptr;
51 : int32 i;
52 :
53 4808 : *nentries = trglen;
54 4808 : entries = palloc_array(Datum, trglen);
55 :
56 4808 : ptr = GETARR(trg);
57 71262 : for (i = 0; i < trglen; i++)
58 : {
59 66454 : int32 item = trgm2int(ptr);
60 :
61 66454 : entries[i] = Int32GetDatum(item);
62 66454 : ptr++;
63 : }
64 : }
65 :
66 4808 : PG_RETURN_POINTER(entries);
67 : }
68 :
69 : Datum
70 376 : gin_extract_query_trgm(PG_FUNCTION_ARGS)
71 : {
72 376 : text *val = (text *) PG_GETARG_TEXT_PP(0);
73 376 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
74 376 : StrategyNumber strategy = PG_GETARG_UINT16(2);
75 :
76 : /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
77 376 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
78 :
79 : /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
80 376 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
81 376 : Datum *entries = NULL;
82 : TRGM *trg;
83 : int32 trglen;
84 : trgm *ptr;
85 : TrgmPackedGraph *graph;
86 : int32 i;
87 :
88 376 : switch (strategy)
89 : {
90 192 : case SimilarityStrategyNumber:
91 : case WordSimilarityStrategyNumber:
92 : case StrictWordSimilarityStrategyNumber:
93 : case EqualStrategyNumber:
94 192 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
95 192 : break;
96 96 : case ILikeStrategyNumber:
97 : #ifndef IGNORECASE
98 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
99 : #endif
100 : /* FALL THRU */
101 : case LikeStrategyNumber:
102 :
103 : /*
104 : * For wildcard search we extract all the trigrams that every
105 : * potentially-matching string must include.
106 : */
107 96 : trg = generate_wildcard_trgm(VARDATA_ANY(val),
108 96 : VARSIZE_ANY_EXHDR(val));
109 96 : break;
110 88 : case RegExpICaseStrategyNumber:
111 : #ifndef IGNORECASE
112 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
113 : #endif
114 : /* FALL THRU */
115 : case RegExpStrategyNumber:
116 88 : trg = createTrgmNFA(val, PG_GET_COLLATION(),
117 : &graph, CurrentMemoryContext);
118 88 : if (trg && ARRNELEM(trg) > 0)
119 : {
120 : /*
121 : * Successful regex processing: store NFA-like graph as
122 : * extra_data. GIN API requires an array of nentries
123 : * Pointers, but we just put the same value in each element.
124 : */
125 68 : trglen = ARRNELEM(trg);
126 68 : *extra_data = palloc_array(Pointer, trglen);
127 1696 : for (i = 0; i < trglen; i++)
128 1628 : (*extra_data)[i] = (Pointer) graph;
129 : }
130 : else
131 : {
132 : /* No result: have to do full index scan. */
133 20 : *nentries = 0;
134 20 : *searchMode = GIN_SEARCH_MODE_ALL;
135 20 : PG_RETURN_POINTER(entries);
136 : }
137 68 : break;
138 0 : default:
139 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
140 : trg = NULL; /* keep compiler quiet */
141 : break;
142 : }
143 :
144 356 : trglen = ARRNELEM(trg);
145 356 : *nentries = trglen;
146 :
147 356 : if (trglen > 0)
148 : {
149 292 : entries = palloc_array(Datum, trglen);
150 292 : ptr = GETARR(trg);
151 3496 : for (i = 0; i < trglen; i++)
152 : {
153 3204 : int32 item = trgm2int(ptr);
154 :
155 3204 : entries[i] = Int32GetDatum(item);
156 3204 : ptr++;
157 : }
158 : }
159 :
160 : /*
161 : * If no trigram was extracted then we have to scan all the index.
162 : */
163 356 : if (trglen == 0)
164 64 : *searchMode = GIN_SEARCH_MODE_ALL;
165 :
166 356 : PG_RETURN_POINTER(entries);
167 : }
168 :
169 : Datum
170 24 : gin_trgm_consistent(PG_FUNCTION_ARGS)
171 : {
172 24 : bool *check = (bool *) PG_GETARG_POINTER(0);
173 24 : StrategyNumber strategy = PG_GETARG_UINT16(1);
174 :
175 : /* text *query = PG_GETARG_TEXT_PP(2); */
176 24 : int32 nkeys = PG_GETARG_INT32(3);
177 24 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
178 24 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
179 : bool res;
180 : int32 i,
181 : ntrue;
182 : double nlimit;
183 :
184 : /* All cases served by this function are inexact */
185 24 : *recheck = true;
186 :
187 24 : switch (strategy)
188 : {
189 8 : case SimilarityStrategyNumber:
190 : case WordSimilarityStrategyNumber:
191 : case StrictWordSimilarityStrategyNumber:
192 8 : nlimit = index_strategy_get_limit(strategy);
193 :
194 : /* Count the matches */
195 8 : ntrue = 0;
196 36 : for (i = 0; i < nkeys; i++)
197 : {
198 28 : if (check[i])
199 24 : ntrue++;
200 : }
201 :
202 : /*--------------------
203 : * If DIVUNION is defined then similarity formula is:
204 : * c / (len1 + len2 - c)
205 : * where c is number of common trigrams and it stands as ntrue in
206 : * this code. Here we don't know value of len2 but we can assume
207 : * that c (ntrue) is a lower bound of len2, so upper bound of
208 : * similarity is:
209 : * c / (len1 + c - c) => c / len1
210 : * If DIVUNION is not defined then similarity formula is:
211 : * c / max(len1, len2)
212 : * And again, c (ntrue) is a lower bound of len2, but c <= len1
213 : * just by definition and, consequently, upper bound of
214 : * similarity is just c / len1.
215 : * So, independently on DIVUNION the upper bound formula is the same.
216 : */
217 12 : res = (nkeys == 0) ? false :
218 4 : (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
219 8 : break;
220 16 : case ILikeStrategyNumber:
221 : #ifndef IGNORECASE
222 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
223 : #endif
224 : /* FALL THRU */
225 : case LikeStrategyNumber:
226 : case EqualStrategyNumber:
227 : /* Check if all extracted trigrams are presented. */
228 16 : res = true;
229 32 : for (i = 0; i < nkeys; i++)
230 : {
231 16 : if (!check[i])
232 : {
233 0 : res = false;
234 0 : break;
235 : }
236 : }
237 16 : break;
238 0 : case RegExpICaseStrategyNumber:
239 : #ifndef IGNORECASE
240 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
241 : #endif
242 : /* FALL THRU */
243 : case RegExpStrategyNumber:
244 0 : if (nkeys < 1)
245 : {
246 : /* Regex processing gave no result: do full index scan */
247 0 : res = true;
248 : }
249 : else
250 0 : res = trigramsMatchGraph(extra_data[0], check);
251 0 : break;
252 0 : default:
253 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
254 : res = false; /* keep compiler quiet */
255 : break;
256 : }
257 :
258 24 : PG_RETURN_BOOL(res);
259 : }
260 :
261 : /*
262 : * In all cases, GIN_TRUE is at least as favorable to inclusion as
263 : * GIN_MAYBE. If no better option is available, simply treat
264 : * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
265 : * consistent function.
266 : */
267 : Datum
268 32714 : gin_trgm_triconsistent(PG_FUNCTION_ARGS)
269 : {
270 32714 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
271 32714 : StrategyNumber strategy = PG_GETARG_UINT16(1);
272 :
273 : /* text *query = PG_GETARG_TEXT_PP(2); */
274 32714 : int32 nkeys = PG_GETARG_INT32(3);
275 32714 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
276 32714 : GinTernaryValue res = GIN_MAYBE;
277 : int32 i,
278 : ntrue;
279 : bool *boolcheck;
280 : double nlimit;
281 :
282 32714 : switch (strategy)
283 : {
284 20394 : case SimilarityStrategyNumber:
285 : case WordSimilarityStrategyNumber:
286 : case StrictWordSimilarityStrategyNumber:
287 20394 : nlimit = index_strategy_get_limit(strategy);
288 :
289 : /* Count the matches */
290 20394 : ntrue = 0;
291 191914 : for (i = 0; i < nkeys; i++)
292 : {
293 171520 : if (check[i] != GIN_FALSE)
294 78404 : ntrue++;
295 : }
296 :
297 : /*
298 : * See comment in gin_trgm_consistent() about * upper bound
299 : * formula
300 : */
301 38788 : res = (nkeys == 0)
302 18394 : ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
303 : ? GIN_MAYBE : GIN_FALSE);
304 20394 : break;
305 8078 : case ILikeStrategyNumber:
306 : #ifndef IGNORECASE
307 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
308 : #endif
309 : /* FALL THRU */
310 : case LikeStrategyNumber:
311 : case EqualStrategyNumber:
312 : /* Check if all extracted trigrams are presented. */
313 8078 : res = GIN_MAYBE;
314 16350 : for (i = 0; i < nkeys; i++)
315 : {
316 8312 : if (check[i] == GIN_FALSE)
317 : {
318 40 : res = GIN_FALSE;
319 40 : break;
320 : }
321 : }
322 8078 : break;
323 4242 : case RegExpICaseStrategyNumber:
324 : #ifndef IGNORECASE
325 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
326 : #endif
327 : /* FALL THRU */
328 : case RegExpStrategyNumber:
329 4242 : if (nkeys < 1)
330 : {
331 : /* Regex processing gave no result: do full index scan */
332 1472 : res = GIN_MAYBE;
333 : }
334 : else
335 : {
336 : /*
337 : * As trigramsMatchGraph implements a monotonic boolean
338 : * function, promoting all GIN_MAYBE keys to GIN_TRUE will
339 : * give a conservative result.
340 : */
341 2770 : boolcheck = palloc_array(bool, nkeys);
342 638578 : for (i = 0; i < nkeys; i++)
343 635808 : boolcheck[i] = (check[i] != GIN_FALSE);
344 2770 : if (!trigramsMatchGraph(extra_data[0], boolcheck))
345 12 : res = GIN_FALSE;
346 2770 : pfree(boolcheck);
347 : }
348 4242 : break;
349 0 : default:
350 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
351 : res = GIN_FALSE; /* keep compiler quiet */
352 : break;
353 : }
354 :
355 : /* All cases served by this function are inexact */
356 : Assert(res != GIN_TRUE);
357 32714 : PG_RETURN_GIN_TERNARY_VALUE(res);
358 : }
|