Line data Source code
1 : /* 2 : * contrib/pg_trgm/trgm_gin.c 3 : */ 4 : #include "postgres.h" 5 : 6 : #include "access/gin.h" 7 : #include "access/stratnum.h" 8 : #include "fmgr.h" 9 : #include "trgm.h" 10 : #include "varatt.h" 11 : 12 0 : PG_FUNCTION_INFO_V1(gin_extract_trgm); 13 8 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm); 14 8 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm); 15 8 : PG_FUNCTION_INFO_V1(gin_trgm_consistent); 16 8 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent); 17 : 18 : /* 19 : * This function can only be called if a pre-9.1 version of the GIN operator 20 : * class definition is present in the catalogs (probably as a consequence 21 : * of upgrade-in-place). Cope. 22 : */ 23 : Datum 24 0 : gin_extract_trgm(PG_FUNCTION_ARGS) 25 : { 26 0 : if (PG_NARGS() == 3) 27 0 : return gin_extract_value_trgm(fcinfo); 28 0 : if (PG_NARGS() == 7) 29 0 : return gin_extract_query_trgm(fcinfo); 30 0 : elog(ERROR, "unexpected number of arguments to gin_extract_trgm"); 31 : PG_RETURN_NULL(); 32 : } 33 : 34 : Datum 35 4808 : gin_extract_value_trgm(PG_FUNCTION_ARGS) 36 : { 37 4808 : text *val = (text *) PG_GETARG_TEXT_PP(0); 38 4808 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1); 39 4808 : Datum *entries = NULL; 40 : TRGM *trg; 41 : int32 trglen; 42 : 43 4808 : *nentries = 0; 44 : 45 4808 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); 46 4808 : trglen = ARRNELEM(trg); 47 : 48 4808 : if (trglen > 0) 49 : { 50 : trgm *ptr; 51 : int32 i; 52 : 53 4808 : *nentries = trglen; 54 4808 : entries = (Datum *) palloc(sizeof(Datum) * trglen); 55 : 56 4808 : ptr = GETARR(trg); 57 71262 : for (i = 0; i < trglen; i++) 58 : { 59 66454 : int32 item = trgm2int(ptr); 60 : 61 66454 : entries[i] = Int32GetDatum(item); 62 66454 : ptr++; 63 : } 64 : } 65 : 66 4808 : PG_RETURN_POINTER(entries); 67 : } 68 : 69 : Datum 70 344 : gin_extract_query_trgm(PG_FUNCTION_ARGS) 71 : { 72 344 : text *val = (text *) PG_GETARG_TEXT_PP(0); 73 344 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1); 74 344 : StrategyNumber strategy = PG_GETARG_UINT16(2); 75 : 76 : /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */ 77 344 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); 78 : 79 : /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ 80 344 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); 81 344 : Datum *entries = NULL; 82 : TRGM *trg; 83 : int32 trglen; 84 : trgm *ptr; 85 : TrgmPackedGraph *graph; 86 : int32 i; 87 : 88 344 : switch (strategy) 89 : { 90 160 : case SimilarityStrategyNumber: 91 : case WordSimilarityStrategyNumber: 92 : case StrictWordSimilarityStrategyNumber: 93 : case EqualStrategyNumber: 94 160 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); 95 160 : break; 96 96 : case ILikeStrategyNumber: 97 : #ifndef IGNORECASE 98 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 99 : #endif 100 : /* FALL THRU */ 101 : case LikeStrategyNumber: 102 : 103 : /* 104 : * For wildcard search we extract all the trigrams that every 105 : * potentially-matching string must include. 106 : */ 107 96 : trg = generate_wildcard_trgm(VARDATA_ANY(val), 108 96 : VARSIZE_ANY_EXHDR(val)); 109 96 : break; 110 88 : case RegExpICaseStrategyNumber: 111 : #ifndef IGNORECASE 112 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 113 : #endif 114 : /* FALL THRU */ 115 : case RegExpStrategyNumber: 116 88 : trg = createTrgmNFA(val, PG_GET_COLLATION(), 117 : &graph, CurrentMemoryContext); 118 88 : if (trg && ARRNELEM(trg) > 0) 119 : { 120 : /* 121 : * Successful regex processing: store NFA-like graph as 122 : * extra_data. GIN API requires an array of nentries 123 : * Pointers, but we just put the same value in each element. 124 : */ 125 68 : trglen = ARRNELEM(trg); 126 68 : *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen); 127 1696 : for (i = 0; i < trglen; i++) 128 1628 : (*extra_data)[i] = (Pointer) graph; 129 : } 130 : else 131 : { 132 : /* No result: have to do full index scan. */ 133 20 : *nentries = 0; 134 20 : *searchMode = GIN_SEARCH_MODE_ALL; 135 20 : PG_RETURN_POINTER(entries); 136 : } 137 68 : break; 138 0 : default: 139 0 : elog(ERROR, "unrecognized strategy number: %d", strategy); 140 : trg = NULL; /* keep compiler quiet */ 141 : break; 142 : } 143 : 144 324 : trglen = ARRNELEM(trg); 145 324 : *nentries = trglen; 146 : 147 324 : if (trglen > 0) 148 : { 149 276 : entries = (Datum *) palloc(sizeof(Datum) * trglen); 150 276 : ptr = GETARR(trg); 151 3368 : for (i = 0; i < trglen; i++) 152 : { 153 3092 : int32 item = trgm2int(ptr); 154 : 155 3092 : entries[i] = Int32GetDatum(item); 156 3092 : ptr++; 157 : } 158 : } 159 : 160 : /* 161 : * If no trigram was extracted then we have to scan all the index. 162 : */ 163 324 : if (trglen == 0) 164 48 : *searchMode = GIN_SEARCH_MODE_ALL; 165 : 166 324 : PG_RETURN_POINTER(entries); 167 : } 168 : 169 : Datum 170 16 : gin_trgm_consistent(PG_FUNCTION_ARGS) 171 : { 172 16 : bool *check = (bool *) PG_GETARG_POINTER(0); 173 16 : StrategyNumber strategy = PG_GETARG_UINT16(1); 174 : 175 : /* text *query = PG_GETARG_TEXT_PP(2); */ 176 16 : int32 nkeys = PG_GETARG_INT32(3); 177 16 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); 178 16 : bool *recheck = (bool *) PG_GETARG_POINTER(5); 179 : bool res; 180 : int32 i, 181 : ntrue; 182 : double nlimit; 183 : 184 : /* All cases served by this function are inexact */ 185 16 : *recheck = true; 186 : 187 16 : switch (strategy) 188 : { 189 0 : case SimilarityStrategyNumber: 190 : case WordSimilarityStrategyNumber: 191 : case StrictWordSimilarityStrategyNumber: 192 0 : nlimit = index_strategy_get_limit(strategy); 193 : 194 : /* Count the matches */ 195 0 : ntrue = 0; 196 0 : for (i = 0; i < nkeys; i++) 197 : { 198 0 : if (check[i]) 199 0 : ntrue++; 200 : } 201 : 202 : /*-------------------- 203 : * If DIVUNION is defined then similarity formula is: 204 : * c / (len1 + len2 - c) 205 : * where c is number of common trigrams and it stands as ntrue in 206 : * this code. Here we don't know value of len2 but we can assume 207 : * that c (ntrue) is a lower bound of len2, so upper bound of 208 : * similarity is: 209 : * c / (len1 + c - c) => c / len1 210 : * If DIVUNION is not defined then similarity formula is: 211 : * c / max(len1, len2) 212 : * And again, c (ntrue) is a lower bound of len2, but c <= len1 213 : * just by definition and, consequently, upper bound of 214 : * similarity is just c / len1. 215 : * So, independently on DIVUNION the upper bound formula is the same. 216 : */ 217 0 : res = (nkeys == 0) ? false : 218 0 : (((((float4) ntrue) / ((float4) nkeys))) >= nlimit); 219 0 : break; 220 16 : case ILikeStrategyNumber: 221 : #ifndef IGNORECASE 222 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 223 : #endif 224 : /* FALL THRU */ 225 : case LikeStrategyNumber: 226 : case EqualStrategyNumber: 227 : /* Check if all extracted trigrams are presented. */ 228 16 : res = true; 229 32 : for (i = 0; i < nkeys; i++) 230 : { 231 16 : if (!check[i]) 232 : { 233 0 : res = false; 234 0 : break; 235 : } 236 : } 237 16 : break; 238 0 : case RegExpICaseStrategyNumber: 239 : #ifndef IGNORECASE 240 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 241 : #endif 242 : /* FALL THRU */ 243 : case RegExpStrategyNumber: 244 0 : if (nkeys < 1) 245 : { 246 : /* Regex processing gave no result: do full index scan */ 247 0 : res = true; 248 : } 249 : else 250 0 : res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], 251 : check); 252 0 : break; 253 0 : default: 254 0 : elog(ERROR, "unrecognized strategy number: %d", strategy); 255 : res = false; /* keep compiler quiet */ 256 : break; 257 : } 258 : 259 16 : PG_RETURN_BOOL(res); 260 : } 261 : 262 : /* 263 : * In all cases, GIN_TRUE is at least as favorable to inclusion as 264 : * GIN_MAYBE. If no better option is available, simply treat 265 : * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary 266 : * consistent function. 267 : */ 268 : Datum 269 28716 : gin_trgm_triconsistent(PG_FUNCTION_ARGS) 270 : { 271 28716 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); 272 28716 : StrategyNumber strategy = PG_GETARG_UINT16(1); 273 : 274 : /* text *query = PG_GETARG_TEXT_PP(2); */ 275 28716 : int32 nkeys = PG_GETARG_INT32(3); 276 28716 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); 277 28716 : GinTernaryValue res = GIN_MAYBE; 278 : int32 i, 279 : ntrue; 280 : bool *boolcheck; 281 : double nlimit; 282 : 283 28716 : switch (strategy) 284 : { 285 16382 : case SimilarityStrategyNumber: 286 : case WordSimilarityStrategyNumber: 287 : case StrictWordSimilarityStrategyNumber: 288 16382 : nlimit = index_strategy_get_limit(strategy); 289 : 290 : /* Count the matches */ 291 16382 : ntrue = 0; 292 173818 : for (i = 0; i < nkeys; i++) 293 : { 294 157436 : if (check[i] != GIN_FALSE) 295 66344 : ntrue++; 296 : } 297 : 298 : /* 299 : * See comment in gin_trgm_consistent() about * upper bound 300 : * formula 301 : */ 302 32764 : res = (nkeys == 0) 303 16382 : ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit) 304 : ? GIN_MAYBE : GIN_FALSE); 305 16382 : break; 306 8092 : case ILikeStrategyNumber: 307 : #ifndef IGNORECASE 308 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 309 : #endif 310 : /* FALL THRU */ 311 : case LikeStrategyNumber: 312 : case EqualStrategyNumber: 313 : /* Check if all extracted trigrams are presented. */ 314 8092 : res = GIN_MAYBE; 315 16364 : for (i = 0; i < nkeys; i++) 316 : { 317 8312 : if (check[i] == GIN_FALSE) 318 : { 319 40 : res = GIN_FALSE; 320 40 : break; 321 : } 322 : } 323 8092 : break; 324 4242 : case RegExpICaseStrategyNumber: 325 : #ifndef IGNORECASE 326 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 327 : #endif 328 : /* FALL THRU */ 329 : case RegExpStrategyNumber: 330 4242 : if (nkeys < 1) 331 : { 332 : /* Regex processing gave no result: do full index scan */ 333 1472 : res = GIN_MAYBE; 334 : } 335 : else 336 : { 337 : /* 338 : * As trigramsMatchGraph implements a monotonic boolean 339 : * function, promoting all GIN_MAYBE keys to GIN_TRUE will 340 : * give a conservative result. 341 : */ 342 2770 : boolcheck = (bool *) palloc(sizeof(bool) * nkeys); 343 638578 : for (i = 0; i < nkeys; i++) 344 635808 : boolcheck[i] = (check[i] != GIN_FALSE); 345 2770 : if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], 346 : boolcheck)) 347 12 : res = GIN_FALSE; 348 2770 : pfree(boolcheck); 349 : } 350 4242 : break; 351 0 : default: 352 0 : elog(ERROR, "unrecognized strategy number: %d", strategy); 353 : res = GIN_FALSE; /* keep compiler quiet */ 354 : break; 355 : } 356 : 357 : /* All cases served by this function are inexact */ 358 : Assert(res != GIN_TRUE); 359 28716 : PG_RETURN_GIN_TERNARY_VALUE(res); 360 : }