LCOV - code coverage report
Current view: top level - contrib/pg_trgm - trgm_op.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 453 499 90.8 %
Date: 2025-02-22 07:14:56 Functions: 52 55 94.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * contrib/pg_trgm/trgm_op.c
       3             :  */
       4             : #include "postgres.h"
       5             : 
       6             : #include <ctype.h>
       7             : 
       8             : #include "catalog/pg_collation_d.h"
       9             : #include "catalog/pg_type.h"
      10             : #include "common/int.h"
      11             : #include "lib/qunique.h"
      12             : #include "miscadmin.h"
      13             : #include "trgm.h"
      14             : #include "tsearch/ts_locale.h"
      15             : #include "utils/formatting.h"
      16             : #include "utils/guc.h"
      17             : #include "utils/lsyscache.h"
      18             : #include "utils/memutils.h"
      19             : #include "utils/pg_crc.h"
      20             : 
      21           6 : PG_MODULE_MAGIC;
      22             : 
      23             : /* GUC variables */
      24             : double      similarity_threshold = 0.3f;
      25             : double      word_similarity_threshold = 0.6f;
      26             : double      strict_word_similarity_threshold = 0.5f;
      27             : 
      28           4 : PG_FUNCTION_INFO_V1(set_limit);
      29           4 : PG_FUNCTION_INFO_V1(show_limit);
      30           4 : PG_FUNCTION_INFO_V1(show_trgm);
      31           4 : PG_FUNCTION_INFO_V1(similarity);
      32           4 : PG_FUNCTION_INFO_V1(word_similarity);
      33           4 : PG_FUNCTION_INFO_V1(strict_word_similarity);
      34           4 : PG_FUNCTION_INFO_V1(similarity_dist);
      35           4 : PG_FUNCTION_INFO_V1(similarity_op);
      36           4 : PG_FUNCTION_INFO_V1(word_similarity_op);
      37           4 : PG_FUNCTION_INFO_V1(word_similarity_commutator_op);
      38           2 : PG_FUNCTION_INFO_V1(word_similarity_dist_op);
      39           4 : PG_FUNCTION_INFO_V1(word_similarity_dist_commutator_op);
      40           4 : PG_FUNCTION_INFO_V1(strict_word_similarity_op);
      41           4 : PG_FUNCTION_INFO_V1(strict_word_similarity_commutator_op);
      42           2 : PG_FUNCTION_INFO_V1(strict_word_similarity_dist_op);
      43           4 : PG_FUNCTION_INFO_V1(strict_word_similarity_dist_commutator_op);
      44             : 
      45             : static int  CMPTRGM_CHOOSE(const void *a, const void *b);
      46             : int         (*CMPTRGM) (const void *a, const void *b) = CMPTRGM_CHOOSE;
      47             : 
      48             : /* Trigram with position */
      49             : typedef struct
      50             : {
      51             :     trgm        trg;
      52             :     int         index;
      53             : } pos_trgm;
      54             : 
      55             : /* Trigram bound type */
      56             : typedef uint8 TrgmBound;
      57             : #define TRGM_BOUND_LEFT             0x01    /* trigram is left bound of word */
      58             : #define TRGM_BOUND_RIGHT            0x02    /* trigram is right bound of word */
      59             : 
      60             : /* Word similarity flags */
      61             : #define WORD_SIMILARITY_CHECK_ONLY  0x01    /* only check existence of similar
      62             :                                              * search pattern in text */
      63             : #define WORD_SIMILARITY_STRICT      0x02    /* force bounds of extent to match
      64             :                                              * word bounds */
      65             : 
      66             : /*
      67             :  * Module load callback
      68             :  */
      69             : void
      70           6 : _PG_init(void)
      71             : {
      72             :     /* Define custom GUC variables. */
      73           6 :     DefineCustomRealVariable("pg_trgm.similarity_threshold",
      74             :                              "Sets the threshold used by the % operator.",
      75             :                              "Valid range is 0.0 .. 1.0.",
      76             :                              &similarity_threshold,
      77             :                              0.3f,
      78             :                              0.0,
      79             :                              1.0,
      80             :                              PGC_USERSET,
      81             :                              0,
      82             :                              NULL,
      83             :                              NULL,
      84             :                              NULL);
      85           6 :     DefineCustomRealVariable("pg_trgm.word_similarity_threshold",
      86             :                              "Sets the threshold used by the <% operator.",
      87             :                              "Valid range is 0.0 .. 1.0.",
      88             :                              &word_similarity_threshold,
      89             :                              0.6f,
      90             :                              0.0,
      91             :                              1.0,
      92             :                              PGC_USERSET,
      93             :                              0,
      94             :                              NULL,
      95             :                              NULL,
      96             :                              NULL);
      97           6 :     DefineCustomRealVariable("pg_trgm.strict_word_similarity_threshold",
      98             :                              "Sets the threshold used by the <<% operator.",
      99             :                              "Valid range is 0.0 .. 1.0.",
     100             :                              &strict_word_similarity_threshold,
     101             :                              0.5f,
     102             :                              0.0,
     103             :                              1.0,
     104             :                              PGC_USERSET,
     105             :                              0,
     106             :                              NULL,
     107             :                              NULL,
     108             :                              NULL);
     109             : 
     110           6 :     MarkGUCPrefixReserved("pg_trgm");
     111           6 : }
     112             : 
     113             : #define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
     114             : 
     115             : /*
     116             :  * Functions for comparing two trgms while treating each char as "signed char" or
     117             :  * "unsigned char".
     118             :  */
     119             : static inline int
     120    14921412 : CMPTRGM_SIGNED(const void *a, const void *b)
     121             : {
     122             : #define CMPPCHAR_S(a,b,i)  CMPCHAR( *(((const signed char*)(a))+i), *(((const signed char*)(b))+i) )
     123             : 
     124    10743246 :     return CMPPCHAR_S(a, b, 0) ? CMPPCHAR_S(a, b, 0)
     125    32522646 :         : (CMPPCHAR_S(a, b, 1) ? CMPPCHAR_S(a, b, 1)
     126     6857988 :            : CMPPCHAR_S(a, b, 2));
     127             : }
     128             : 
     129             : static inline int
     130           0 : CMPTRGM_UNSIGNED(const void *a, const void *b)
     131             : {
     132             : #define CMPPCHAR_UNS(a,b,i)  CMPCHAR( *(((const unsigned char*)(a))+i), *(((const unsigned char*)(b))+i) )
     133             : 
     134           0 :     return CMPPCHAR_UNS(a, b, 0) ? CMPPCHAR_UNS(a, b, 0)
     135           0 :         : (CMPPCHAR_UNS(a, b, 1) ? CMPPCHAR_UNS(a, b, 1)
     136           0 :            : CMPPCHAR_UNS(a, b, 2));
     137             : }
     138             : 
     139             : /*
     140             :  * This gets called on the first call. It replaces the function pointer so
     141             :  * that subsequent calls are routed directly to the chosen implementation.
     142             :  */
     143             : static int
     144           6 : CMPTRGM_CHOOSE(const void *a, const void *b)
     145             : {
     146           6 :     if (GetDefaultCharSignedness())
     147           6 :         CMPTRGM = CMPTRGM_SIGNED;
     148             :     else
     149           0 :         CMPTRGM = CMPTRGM_UNSIGNED;
     150             : 
     151           6 :     return CMPTRGM(a, b);
     152             : }
     153             : 
     154             : /*
     155             :  * Deprecated function.
     156             :  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
     157             :  */
     158             : Datum
     159           4 : set_limit(PG_FUNCTION_ARGS)
     160             : {
     161           4 :     float4      nlimit = PG_GETARG_FLOAT4(0);
     162             :     char       *nlimit_str;
     163             :     Oid         func_out_oid;
     164             :     bool        is_varlena;
     165             : 
     166           4 :     getTypeOutputInfo(FLOAT4OID, &func_out_oid, &is_varlena);
     167             : 
     168           4 :     nlimit_str = OidOutputFunctionCall(func_out_oid, Float4GetDatum(nlimit));
     169             : 
     170           4 :     SetConfigOption("pg_trgm.similarity_threshold", nlimit_str,
     171             :                     PGC_USERSET, PGC_S_SESSION);
     172             : 
     173           4 :     PG_RETURN_FLOAT4(similarity_threshold);
     174             : }
     175             : 
     176             : 
     177             : /*
     178             :  * Get similarity threshold for given index scan strategy number.
     179             :  */
     180             : double
     181       86806 : index_strategy_get_limit(StrategyNumber strategy)
     182             : {
     183       86806 :     switch (strategy)
     184             :     {
     185       65198 :         case SimilarityStrategyNumber:
     186       65198 :             return similarity_threshold;
     187        9644 :         case WordSimilarityStrategyNumber:
     188        9644 :             return word_similarity_threshold;
     189       11964 :         case StrictWordSimilarityStrategyNumber:
     190       11964 :             return strict_word_similarity_threshold;
     191           0 :         default:
     192           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     193             :             break;
     194             :     }
     195             : 
     196             :     return 0.0;                 /* keep compiler quiet */
     197             : }
     198             : 
     199             : /*
     200             :  * Deprecated function.
     201             :  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
     202             :  */
     203             : Datum
     204       40000 : show_limit(PG_FUNCTION_ARGS)
     205             : {
     206       40000 :     PG_RETURN_FLOAT4(similarity_threshold);
     207             : }
     208             : 
     209             : static int
     210     6374440 : comp_trgm(const void *a, const void *b)
     211             : {
     212     6374440 :     return CMPTRGM(a, b);
     213             : }
     214             : 
     215             : /*
     216             :  * Finds first word in string, returns pointer to the word,
     217             :  * endword points to the character after word
     218             :  */
     219             : static char *
     220      482822 : find_word(char *str, int lenstr, char **endword, int *charlen)
     221             : {
     222      482822 :     char       *beginword = str;
     223             : 
     224      510150 :     while (beginword - str < lenstr && !ISWORDCHR(beginword))
     225       27328 :         beginword += pg_mblen(beginword);
     226             : 
     227      482822 :     if (beginword - str >= lenstr)
     228      228160 :         return NULL;
     229             : 
     230      254662 :     *endword = beginword;
     231      254662 :     *charlen = 0;
     232     2199348 :     while (*endword - str < lenstr && ISWORDCHR(*endword))
     233             :     {
     234     1944686 :         *endword += pg_mblen(*endword);
     235     1944686 :         (*charlen)++;
     236             :     }
     237             : 
     238      254662 :     return beginword;
     239             : }
     240             : 
     241             : /*
     242             :  * Reduce a trigram (three possibly multi-byte characters) to a trgm,
     243             :  * which is always exactly three bytes.  If we have three single-byte
     244             :  * characters, we just use them as-is; otherwise we form a hash value.
     245             :  */
     246             : void
     247        2918 : compact_trigram(trgm *tptr, char *str, int bytelen)
     248             : {
     249        2918 :     if (bytelen == 3)
     250             :     {
     251        2918 :         CPTRGM(tptr, str);
     252             :     }
     253             :     else
     254             :     {
     255             :         pg_crc32    crc;
     256             : 
     257           0 :         INIT_LEGACY_CRC32(crc);
     258           0 :         COMP_LEGACY_CRC32(crc, str, bytelen);
     259           0 :         FIN_LEGACY_CRC32(crc);
     260             : 
     261             :         /*
     262             :          * use only 3 upper bytes from crc, hope, it's good enough hashing
     263             :          */
     264           0 :         CPTRGM(tptr, &crc);
     265             :     }
     266        2918 : }
     267             : 
     268             : /*
     269             :  * Adds trigrams from words (already padded).
     270             :  */
     271             : static trgm *
     272      254790 : make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
     273             : {
     274      254790 :     char       *ptr = str;
     275             : 
     276      254790 :     if (charlen < 3)
     277          54 :         return tptr;
     278             : 
     279      254736 :     if (bytelen > charlen)
     280             :     {
     281             :         /* Find multibyte character boundaries and apply compact_trigram */
     282           0 :         int         lenfirst = pg_mblen(str),
     283           0 :                     lenmiddle = pg_mblen(str + lenfirst),
     284           0 :                     lenlast = pg_mblen(str + lenfirst + lenmiddle);
     285             : 
     286           0 :         while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
     287             :         {
     288           0 :             compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);
     289             : 
     290           0 :             ptr += lenfirst;
     291           0 :             tptr++;
     292             : 
     293           0 :             lenfirst = lenmiddle;
     294           0 :             lenmiddle = lenlast;
     295           0 :             lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
     296             :         }
     297             :     }
     298             :     else
     299             :     {
     300             :         /* Fast path when there are no multibyte characters */
     301             :         Assert(bytelen == charlen);
     302             : 
     303     2454266 :         while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ )
     304             :         {
     305     2199530 :             CPTRGM(tptr, ptr);
     306     2199530 :             ptr++;
     307     2199530 :             tptr++;
     308             :         }
     309             :     }
     310             : 
     311      254736 :     return tptr;
     312             : }
     313             : 
     314             : /*
     315             :  * Make array of trigrams without sorting and removing duplicate items.
     316             :  *
     317             :  * trg: where to return the array of trigrams.
     318             :  * str: source string, of length slen bytes.
     319             :  * bounds: where to return bounds of trigrams (if needed).
     320             :  *
     321             :  * Returns length of the generated array.
     322             :  */
     323             : static int
     324      228162 : generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
     325             : {
     326             :     trgm       *tptr;
     327             :     char       *buf;
     328             :     int         charlen,
     329             :                 bytelen;
     330             :     char       *bword,
     331             :                *eword;
     332             : 
     333      228162 :     if (slen + LPADDING + RPADDING < 3 || slen == 0)
     334           2 :         return 0;
     335             : 
     336      228160 :     tptr = trg;
     337             : 
     338             :     /* Allocate a buffer for case-folded, blank-padded words */
     339      228160 :     buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4);
     340             : 
     341             :     if (LPADDING > 0)
     342             :     {
     343      228160 :         *buf = ' ';
     344             :         if (LPADDING > 1)
     345      228160 :             *(buf + 1) = ' ';
     346             :     }
     347             : 
     348      228160 :     eword = str;
     349      482822 :     while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
     350             :     {
     351             : #ifdef IGNORECASE
     352      254662 :         bword = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
     353      254662 :         bytelen = strlen(bword);
     354             : #else
     355             :         bytelen = eword - bword;
     356             : #endif
     357             : 
     358      254662 :         memcpy(buf + LPADDING, bword, bytelen);
     359             : 
     360             : #ifdef IGNORECASE
     361      254662 :         pfree(bword);
     362             : #endif
     363             : 
     364      254662 :         buf[LPADDING + bytelen] = ' ';
     365      254662 :         buf[LPADDING + bytelen + 1] = ' ';
     366             : 
     367             :         /* Calculate trigrams marking their bounds if needed */
     368      254662 :         if (bounds)
     369       24796 :             bounds[tptr - trg] |= TRGM_BOUND_LEFT;
     370      254662 :         tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
     371             :                              charlen + LPADDING + RPADDING);
     372      254662 :         if (bounds)
     373       24796 :             bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;
     374             :     }
     375             : 
     376      228160 :     pfree(buf);
     377             : 
     378      228160 :     return tptr - trg;
     379             : }
     380             : 
     381             : /*
     382             :  * Guard against possible overflow in the palloc requests below.  (We
     383             :  * don't worry about the additive constants, since palloc can detect
     384             :  * requests that are a little above MaxAllocSize --- we just need to
     385             :  * prevent integer overflow in the multiplications.)
     386             :  */
     387             : static void
     388      204020 : protect_out_of_mem(int slen)
     389             : {
     390      204020 :     if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||
     391      204020 :         (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length()))
     392           0 :         ereport(ERROR,
     393             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     394             :                  errmsg("out of memory")));
     395      204020 : }
     396             : 
     397             : /*
     398             :  * Make array of trigrams with sorting and removing duplicate items.
     399             :  *
     400             :  * str: source string, of length slen bytes.
     401             :  *
     402             :  * Returns the sorted array of unique trigrams.
     403             :  */
     404             : TRGM *
     405      179658 : generate_trgm(char *str, int slen)
     406             : {
     407             :     TRGM       *trg;
     408             :     int         len;
     409             : 
     410      179658 :     protect_out_of_mem(slen);
     411             : 
     412      179658 :     trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
     413      179658 :     trg->flag = ARRKEY;
     414             : 
     415      179658 :     len = generate_trgm_only(GETARR(trg), str, slen, NULL);
     416      179658 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     417             : 
     418      179658 :     if (len == 0)
     419           8 :         return trg;
     420             : 
     421             :     /*
     422             :      * Make trigrams unique.
     423             :      */
     424      179650 :     if (len > 1)
     425             :     {
     426      179650 :         qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
     427      179650 :         len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
     428             :     }
     429             : 
     430      179650 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     431             : 
     432      179650 :     return trg;
     433             : }
     434             : 
     435             : /*
     436             :  * Make array of positional trigrams from two trigram arrays trg1 and trg2.
     437             :  *
     438             :  * trg1: trigram array of search pattern, of length len1. trg1 is required
     439             :  *       word which positions don't matter and replaced with -1.
     440             :  * trg2: trigram array of text, of length len2. trg2 is haystack where we
     441             :  *       search and have to store its positions.
     442             :  *
     443             :  * Returns concatenated trigram array.
     444             :  */
     445             : static pos_trgm *
     446       24252 : make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
     447             : {
     448             :     pos_trgm   *result;
     449             :     int         i,
     450       24252 :                 len = len1 + len2;
     451             : 
     452       24252 :     result = (pos_trgm *) palloc(sizeof(pos_trgm) * len);
     453             : 
     454      241728 :     for (i = 0; i < len1; i++)
     455             :     {
     456      217476 :         memcpy(&result[i].trg, &trg1[i], sizeof(trgm));
     457      217476 :         result[i].index = -1;
     458             :     }
     459             : 
     460      384424 :     for (i = 0; i < len2; i++)
     461             :     {
     462      360172 :         memcpy(&result[i + len1].trg, &trg2[i], sizeof(trgm));
     463      360172 :         result[i + len1].index = i;
     464             :     }
     465             : 
     466       24252 :     return result;
     467             : }
     468             : 
     469             : /*
     470             :  * Compare position trigrams: compare trigrams first and position second.
     471             :  */
     472             : static int
     473     2615444 : comp_ptrgm(const void *v1, const void *v2)
     474             : {
     475     2615444 :     const pos_trgm *p1 = (const pos_trgm *) v1;
     476     2615444 :     const pos_trgm *p2 = (const pos_trgm *) v2;
     477             :     int         cmp;
     478             : 
     479     2615444 :     cmp = CMPTRGM(p1->trg, p2->trg);
     480     2615444 :     if (cmp != 0)
     481     2536034 :         return cmp;
     482             : 
     483       79410 :     return pg_cmp_s32(p1->index, p2->index);
     484             : }
     485             : 
     486             : /*
     487             :  * Iterative search function which calculates maximum similarity with word in
     488             :  * the string. Maximum similarity is only calculated only if the flag
     489             :  * WORD_SIMILARITY_CHECK_ONLY isn't set.
     490             :  *
     491             :  * trg2indexes: array which stores indexes of the array "found".
     492             :  * found: array which stores true of false values.
     493             :  * ulen1: count of unique trigrams of array "trg1".
     494             :  * len2: length of array "trg2" and array "trg2indexes".
     495             :  * len: length of the array "found".
     496             :  * flags: set of boolean flags parameterizing similarity calculation.
     497             :  * bounds: whether each trigram is left/right bound of word.
     498             :  *
     499             :  * Returns word similarity.
     500             :  */
     501             : static float4
     502       24252 : iterate_word_similarity(int *trg2indexes,
     503             :                         bool *found,
     504             :                         int ulen1,
     505             :                         int len2,
     506             :                         int len,
     507             :                         uint8 flags,
     508             :                         TrgmBound *bounds)
     509             : {
     510             :     int        *lastpos,
     511             :                 i,
     512       24252 :                 ulen2 = 0,
     513       24252 :                 count = 0,
     514       24252 :                 upper = -1,
     515             :                 lower;
     516             :     float4      smlr_cur,
     517       24252 :                 smlr_max = 0.0f;
     518             :     double      threshold;
     519             : 
     520             :     Assert(bounds || !(flags & WORD_SIMILARITY_STRICT));
     521             : 
     522             :     /* Select appropriate threshold */
     523       48504 :     threshold = (flags & WORD_SIMILARITY_STRICT) ?
     524       24252 :         strict_word_similarity_threshold :
     525             :         word_similarity_threshold;
     526             : 
     527             :     /*
     528             :      * Consider first trigram as initial lower bound for strict word
     529             :      * similarity, or initialize it later with first trigram present for plain
     530             :      * word similarity.
     531             :      */
     532       24252 :     lower = (flags & WORD_SIMILARITY_STRICT) ? 0 : -1;
     533             : 
     534             :     /* Memorise last position of each trigram */
     535       24252 :     lastpos = (int *) palloc(sizeof(int) * len);
     536       24252 :     memset(lastpos, -1, sizeof(int) * len);
     537             : 
     538      367284 :     for (i = 0; i < len2; i++)
     539             :     {
     540             :         int         trgindex;
     541             : 
     542      346600 :         CHECK_FOR_INTERRUPTS();
     543             : 
     544             :         /* Get index of next trigram */
     545      346600 :         trgindex = trg2indexes[i];
     546             : 
     547             :         /* Update last position of this trigram */
     548      346600 :         if (lower >= 0 || found[trgindex])
     549             :         {
     550      271584 :             if (lastpos[trgindex] < 0)
     551             :             {
     552      267876 :                 ulen2++;
     553      267876 :                 if (found[trgindex])
     554       61512 :                     count++;
     555             :             }
     556      271584 :             lastpos[trgindex] = i;
     557             :         }
     558             : 
     559             :         /*
     560             :          * Adjust upper bound if trigram is upper bound of word for strict
     561             :          * word similarity, or if trigram is present in required substring for
     562             :          * plain word similarity
     563             :          */
     564      500684 :         if ((flags & WORD_SIMILARITY_STRICT) ? (bounds[i] & TRGM_BOUND_RIGHT)
     565      154084 :             : found[trgindex])
     566             :         {
     567             :             int         prev_lower,
     568             :                         tmp_ulen2,
     569             :                         tmp_lower,
     570             :                         tmp_count;
     571             : 
     572       51272 :             upper = i;
     573       51272 :             if (lower == -1)
     574             :             {
     575        9390 :                 lower = i;
     576        9390 :                 ulen2 = 1;
     577             :             }
     578             : 
     579       51272 :             smlr_cur = CALCSML(count, ulen1, ulen2);
     580             : 
     581             :             /* Also try to adjust lower bound for greater similarity */
     582       51272 :             tmp_count = count;
     583       51272 :             tmp_ulen2 = ulen2;
     584       51272 :             prev_lower = lower;
     585      417180 :             for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++)
     586             :             {
     587             :                 float       smlr_tmp;
     588             :                 int         tmp_trgindex;
     589             : 
     590             :                 /*
     591             :                  * Adjust lower bound only if trigram is lower bound of word
     592             :                  * for strict word similarity, or consider every trigram as
     593             :                  * lower bound for plain word similarity.
     594             :                  */
     595      369476 :                 if (!(flags & WORD_SIMILARITY_STRICT)
     596      290346 :                     || (bounds[tmp_lower] & TRGM_BOUND_LEFT))
     597             :                 {
     598      119394 :                     smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
     599      119394 :                     if (smlr_tmp > smlr_cur)
     600             :                     {
     601        7024 :                         smlr_cur = smlr_tmp;
     602        7024 :                         ulen2 = tmp_ulen2;
     603        7024 :                         lower = tmp_lower;
     604        7024 :                         count = tmp_count;
     605             :                     }
     606             : 
     607             :                     /*
     608             :                      * If we only check that word similarity is greater than
     609             :                      * threshold we do not need to calculate a maximum
     610             :                      * similarity.
     611             :                      */
     612      119394 :                     if ((flags & WORD_SIMILARITY_CHECK_ONLY)
     613       74228 :                         && smlr_cur >= threshold)
     614        3568 :                         break;
     615             :                 }
     616             : 
     617      365908 :                 tmp_trgindex = trg2indexes[tmp_lower];
     618      365908 :                 if (lastpos[tmp_trgindex] == tmp_lower)
     619             :                 {
     620      361384 :                     tmp_ulen2--;
     621      361384 :                     if (found[tmp_trgindex])
     622       93158 :                         tmp_count--;
     623             :                 }
     624             :             }
     625             : 
     626       51272 :             smlr_max = Max(smlr_max, smlr_cur);
     627             : 
     628             :             /*
     629             :              * if we only check that word similarity is greater than threshold
     630             :              * we do not need to calculate a maximum similarity.
     631             :              */
     632       51272 :             if ((flags & WORD_SIMILARITY_CHECK_ONLY) && smlr_max >= threshold)
     633        3568 :                 break;
     634             : 
     635       81216 :             for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++)
     636             :             {
     637             :                 int         tmp_trgindex;
     638             : 
     639       33512 :                 tmp_trgindex = trg2indexes[tmp_lower];
     640       33512 :                 if (lastpos[tmp_trgindex] == tmp_lower)
     641       32014 :                     lastpos[tmp_trgindex] = -1;
     642             :             }
     643             :         }
     644             :     }
     645             : 
     646       24252 :     pfree(lastpos);
     647             : 
     648       24252 :     return smlr_max;
     649             : }
     650             : 
     651             : /*
     652             :  * Calculate word similarity.
     653             :  * This function prepare two arrays: "trg2indexes" and "found". Then this arrays
     654             :  * are used to calculate word similarity using iterate_word_similarity().
     655             :  *
     656             :  * "trg2indexes" is array which stores indexes of the array "found".
     657             :  * In other words:
     658             :  * trg2indexes[j] = i;
     659             :  * found[i] = true (or false);
     660             :  * If found[i] == true then there is trigram trg2[j] in array "trg1".
     661             :  * If found[i] == false then there is not trigram trg2[j] in array "trg1".
     662             :  *
     663             :  * str1: search pattern string, of length slen1 bytes.
     664             :  * str2: text in which we are looking for a word, of length slen2 bytes.
     665             :  * flags: set of boolean flags parameterizing similarity calculation.
     666             :  *
     667             :  * Returns word similarity.
     668             :  */
     669             : static float4
     670       24252 : calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
     671             :                      uint8 flags)
     672             : {
     673             :     bool       *found;
     674             :     pos_trgm   *ptrg;
     675             :     trgm       *trg1;
     676             :     trgm       *trg2;
     677             :     int         len1,
     678             :                 len2,
     679             :                 len,
     680             :                 i,
     681             :                 j,
     682             :                 ulen1;
     683             :     int        *trg2indexes;
     684             :     float4      result;
     685             :     TrgmBound  *bounds;
     686             : 
     687       24252 :     protect_out_of_mem(slen1 + slen2);
     688             : 
     689             :     /* Make positional trigrams */
     690       24252 :     trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);
     691       24252 :     trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);
     692       24252 :     if (flags & WORD_SIMILARITY_STRICT)
     693       13324 :         bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);
     694             :     else
     695       10928 :         bounds = NULL;
     696             : 
     697       24252 :     len1 = generate_trgm_only(trg1, str1, slen1, NULL);
     698       24252 :     len2 = generate_trgm_only(trg2, str2, slen2, bounds);
     699             : 
     700       24252 :     ptrg = make_positional_trgm(trg1, len1, trg2, len2);
     701       24252 :     len = len1 + len2;
     702       24252 :     qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm);
     703             : 
     704       24252 :     pfree(trg1);
     705       24252 :     pfree(trg2);
     706             : 
     707             :     /*
     708             :      * Merge positional trigrams array: enumerate each trigram and find its
     709             :      * presence in required word.
     710             :      */
     711       24252 :     trg2indexes = (int *) palloc(sizeof(int) * len2);
     712       24252 :     found = (bool *) palloc0(sizeof(bool) * len);
     713             : 
     714       24252 :     ulen1 = 0;
     715       24252 :     j = 0;
     716      601900 :     for (i = 0; i < len; i++)
     717             :     {
     718      577648 :         if (i > 0)
     719             :         {
     720      553396 :             int         cmp = CMPTRGM(ptrg[i - 1].trg, ptrg[i].trg);
     721             : 
     722      553396 :             if (cmp != 0)
     723             :             {
     724      484992 :                 if (found[j])
     725      202274 :                     ulen1++;
     726      484992 :                 j++;
     727             :             }
     728             :         }
     729             : 
     730      577648 :         if (ptrg[i].index >= 0)
     731             :         {
     732      360172 :             trg2indexes[ptrg[i].index] = j;
     733             :         }
     734             :         else
     735             :         {
     736      217476 :             found[j] = true;
     737             :         }
     738             :     }
     739       24252 :     if (found[j])
     740       15202 :         ulen1++;
     741             : 
     742             :     /* Run iterative procedure to find maximum similarity with word */
     743       24252 :     result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len,
     744             :                                      flags, bounds);
     745             : 
     746       24252 :     pfree(trg2indexes);
     747       24252 :     pfree(found);
     748       24252 :     pfree(ptrg);
     749             : 
     750       24252 :     return result;
     751             : }
     752             : 
     753             : 
     754             : /*
     755             :  * Extract the next non-wildcard part of a search string, i.e. a word bounded
     756             :  * by '_' or '%' meta-characters, non-word characters or string end.
     757             :  *
     758             :  * str: source string, of length lenstr bytes (need not be null-terminated)
     759             :  * buf: where to return the substring (must be long enough)
     760             :  * *bytelen: receives byte length of the found substring
     761             :  * *charlen: receives character length of the found substring
     762             :  *
     763             :  * Returns pointer to end+1 of the found substring in the source string.
     764             :  * Returns NULL if no word found (in which case buf, bytelen, charlen not set)
     765             :  *
     766             :  * If the found word is bounded by non-word characters or string boundaries
     767             :  * then this function will include corresponding padding spaces into buf.
     768             :  */
     769             : static const char *
     770         238 : get_wildcard_part(const char *str, int lenstr,
     771             :                   char *buf, int *bytelen, int *charlen)
     772             : {
     773         238 :     const char *beginword = str;
     774             :     const char *endword;
     775         238 :     char       *s = buf;
     776         238 :     bool        in_leading_wildcard_meta = false;
     777         238 :     bool        in_trailing_wildcard_meta = false;
     778         238 :     bool        in_escape = false;
     779             :     int         clen;
     780             : 
     781             :     /*
     782             :      * Find the first word character, remembering whether preceding character
     783             :      * was wildcard meta-character.  Note that the in_escape state persists
     784             :      * from this loop to the next one, since we may exit at a word character
     785             :      * that is in_escape.
     786             :      */
     787         482 :     while (beginword - str < lenstr)
     788             :     {
     789         372 :         if (in_escape)
     790             :         {
     791           6 :             if (ISWORDCHR(beginword))
     792           6 :                 break;
     793           0 :             in_escape = false;
     794           0 :             in_leading_wildcard_meta = false;
     795             :         }
     796             :         else
     797             :         {
     798         366 :             if (ISESCAPECHAR(beginword))
     799           6 :                 in_escape = true;
     800         360 :             else if (ISWILDCARDCHAR(beginword))
     801         208 :                 in_leading_wildcard_meta = true;
     802         152 :             else if (ISWORDCHR(beginword))
     803         122 :                 break;
     804             :             else
     805          30 :                 in_leading_wildcard_meta = false;
     806             :         }
     807         244 :         beginword += pg_mblen(beginword);
     808             :     }
     809             : 
     810             :     /*
     811             :      * Handle string end.
     812             :      */
     813         238 :     if (beginword - str >= lenstr)
     814         110 :         return NULL;
     815             : 
     816             :     /*
     817             :      * Add left padding spaces if preceding character wasn't wildcard
     818             :      * meta-character.
     819             :      */
     820         128 :     *charlen = 0;
     821         128 :     if (!in_leading_wildcard_meta)
     822             :     {
     823             :         if (LPADDING > 0)
     824             :         {
     825          30 :             *s++ = ' ';
     826          30 :             (*charlen)++;
     827             :             if (LPADDING > 1)
     828             :             {
     829          30 :                 *s++ = ' ';
     830          30 :                 (*charlen)++;
     831             :             }
     832             :         }
     833             :     }
     834             : 
     835             :     /*
     836             :      * Copy data into buf until wildcard meta-character, non-word character or
     837             :      * string boundary.  Strip escapes during copy.
     838             :      */
     839         128 :     endword = beginword;
     840         488 :     while (endword - str < lenstr)
     841             :     {
     842         488 :         clen = pg_mblen(endword);
     843         488 :         if (in_escape)
     844             :         {
     845           6 :             if (ISWORDCHR(endword))
     846             :             {
     847           6 :                 memcpy(s, endword, clen);
     848           6 :                 (*charlen)++;
     849           6 :                 s += clen;
     850             :             }
     851             :             else
     852             :             {
     853             :                 /*
     854             :                  * Back up endword to the escape character when stopping at an
     855             :                  * escaped char, so that subsequent get_wildcard_part will
     856             :                  * restart from the escape character.  We assume here that
     857             :                  * escape chars are single-byte.
     858             :                  */
     859           0 :                 endword--;
     860           0 :                 break;
     861             :             }
     862           6 :             in_escape = false;
     863             :         }
     864             :         else
     865             :         {
     866         482 :             if (ISESCAPECHAR(endword))
     867           0 :                 in_escape = true;
     868         482 :             else if (ISWILDCARDCHAR(endword))
     869             :             {
     870         110 :                 in_trailing_wildcard_meta = true;
     871         110 :                 break;
     872             :             }
     873         372 :             else if (ISWORDCHR(endword))
     874             :             {
     875         354 :                 memcpy(s, endword, clen);
     876         354 :                 (*charlen)++;
     877         354 :                 s += clen;
     878             :             }
     879             :             else
     880          18 :                 break;
     881             :         }
     882         360 :         endword += clen;
     883             :     }
     884             : 
     885             :     /*
     886             :      * Add right padding spaces if next character isn't wildcard
     887             :      * meta-character.
     888             :      */
     889         128 :     if (!in_trailing_wildcard_meta)
     890             :     {
     891             :         if (RPADDING > 0)
     892             :         {
     893          18 :             *s++ = ' ';
     894          18 :             (*charlen)++;
     895             :             if (RPADDING > 1)
     896             :             {
     897             :                 *s++ = ' ';
     898             :                 (*charlen)++;
     899             :             }
     900             :         }
     901             :     }
     902             : 
     903         128 :     *bytelen = s - buf;
     904         128 :     return endword;
     905             : }
     906             : 
     907             : /*
     908             :  * Generates trigrams for wildcard search string.
     909             :  *
     910             :  * Returns array of trigrams that must occur in any string that matches the
     911             :  * wildcard string.  For example, given pattern "a%bcd%" the trigrams
     912             :  * " a", "bcd" would be extracted.
     913             :  */
     914             : TRGM *
     915         110 : generate_wildcard_trgm(const char *str, int slen)
     916             : {
     917             :     TRGM       *trg;
     918             :     char       *buf,
     919             :                *buf2;
     920             :     trgm       *tptr;
     921             :     int         len,
     922             :                 charlen,
     923             :                 bytelen;
     924             :     const char *eword;
     925             : 
     926         110 :     protect_out_of_mem(slen);
     927             : 
     928         110 :     trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
     929         110 :     trg->flag = ARRKEY;
     930         110 :     SET_VARSIZE(trg, TRGMHDRSIZE);
     931             : 
     932         110 :     if (slen + LPADDING + RPADDING < 3 || slen == 0)
     933           0 :         return trg;
     934             : 
     935         110 :     tptr = GETARR(trg);
     936             : 
     937             :     /* Allocate a buffer for blank-padded, but not yet case-folded, words */
     938         110 :     buf = palloc(sizeof(char) * (slen + 4));
     939             : 
     940             :     /*
     941             :      * Extract trigrams from each substring extracted by get_wildcard_part.
     942             :      */
     943         110 :     eword = str;
     944         238 :     while ((eword = get_wildcard_part(eword, slen - (eword - str),
     945             :                                       buf, &bytelen, &charlen)) != NULL)
     946             :     {
     947             : #ifdef IGNORECASE
     948         128 :         buf2 = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);
     949         128 :         bytelen = strlen(buf2);
     950             : #else
     951             :         buf2 = buf;
     952             : #endif
     953             : 
     954             :         /*
     955             :          * count trigrams
     956             :          */
     957         128 :         tptr = make_trigrams(tptr, buf2, bytelen, charlen);
     958             : 
     959             : #ifdef IGNORECASE
     960         128 :         pfree(buf2);
     961             : #endif
     962             :     }
     963             : 
     964         110 :     pfree(buf);
     965             : 
     966         110 :     if ((len = tptr - GETARR(trg)) == 0)
     967          48 :         return trg;
     968             : 
     969             :     /*
     970             :      * Make trigrams unique.
     971             :      */
     972          62 :     if (len > 1)
     973             :     {
     974          34 :         qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
     975          34 :         len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
     976             :     }
     977             : 
     978          62 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     979             : 
     980          62 :     return trg;
     981             : }
     982             : 
     983             : uint32
     984       69546 : trgm2int(trgm *ptr)
     985             : {
     986       69546 :     uint32      val = 0;
     987             : 
     988       69546 :     val |= *(((unsigned char *) ptr));
     989       69546 :     val <<= 8;
     990       69546 :     val |= *(((unsigned char *) ptr) + 1);
     991       69546 :     val <<= 8;
     992       69546 :     val |= *(((unsigned char *) ptr) + 2);
     993             : 
     994       69546 :     return val;
     995             : }
     996             : 
     997             : Datum
     998          14 : show_trgm(PG_FUNCTION_ARGS)
     999             : {
    1000          14 :     text       *in = PG_GETARG_TEXT_PP(0);
    1001             :     TRGM       *trg;
    1002             :     Datum      *d;
    1003             :     ArrayType  *a;
    1004             :     trgm       *ptr;
    1005             :     int         i;
    1006             : 
    1007          14 :     trg = generate_trgm(VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
    1008          14 :     d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg)));
    1009             : 
    1010          88 :     for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++)
    1011             :     {
    1012          74 :         text       *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3));
    1013             : 
    1014          74 :         if (pg_database_encoding_max_length() > 1 && !ISPRINTABLETRGM(ptr))
    1015             :         {
    1016           0 :             snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr));
    1017           0 :             SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item)));
    1018             :         }
    1019             :         else
    1020             :         {
    1021          74 :             SET_VARSIZE(item, VARHDRSZ + 3);
    1022          74 :             CPTRGM(VARDATA(item), ptr);
    1023             :         }
    1024          74 :         d[i] = PointerGetDatum(item);
    1025             :     }
    1026             : 
    1027          14 :     a = construct_array_builtin(d, ARRNELEM(trg), TEXTOID);
    1028             : 
    1029          88 :     for (i = 0; i < ARRNELEM(trg); i++)
    1030          74 :         pfree(DatumGetPointer(d[i]));
    1031             : 
    1032          14 :     pfree(d);
    1033          14 :     pfree(trg);
    1034          14 :     PG_FREE_IF_COPY(in, 0);
    1035             : 
    1036          14 :     PG_RETURN_POINTER(a);
    1037             : }
    1038             : 
    1039             : float4
    1040      138214 : cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
    1041             : {
    1042             :     trgm       *ptr1,
    1043             :                *ptr2;
    1044      138214 :     int         count = 0;
    1045             :     int         len1,
    1046             :                 len2;
    1047             : 
    1048      138214 :     ptr1 = GETARR(trg1);
    1049      138214 :     ptr2 = GETARR(trg2);
    1050             : 
    1051      138214 :     len1 = ARRNELEM(trg1);
    1052      138214 :     len2 = ARRNELEM(trg2);
    1053             : 
    1054             :     /* explicit test is needed to avoid 0/0 division when both lengths are 0 */
    1055      138214 :     if (len1 <= 0 || len2 <= 0)
    1056           2 :         return (float4) 0.0;
    1057             : 
    1058     1762582 :     while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
    1059             :     {
    1060     1624370 :         int         res = CMPTRGM(ptr1, ptr2);
    1061             : 
    1062     1624370 :         if (res < 0)
    1063      369688 :             ptr1++;
    1064     1254682 :         else if (res > 0)
    1065      430438 :             ptr2++;
    1066             :         else
    1067             :         {
    1068      824244 :             ptr1++;
    1069      824244 :             ptr2++;
    1070      824244 :             count++;
    1071             :         }
    1072             :     }
    1073             : 
    1074             :     /*
    1075             :      * If inexact then len2 is equal to count, because we don't know actual
    1076             :      * length of second string in inexact search and we can assume that count
    1077             :      * is a lower bound of len2.
    1078             :      */
    1079      138212 :     return CALCSML(count, len1, inexact ? count : len2);
    1080             : }
    1081             : 
    1082             : 
    1083             : /*
    1084             :  * Returns whether trg2 contains all trigrams in trg1.
    1085             :  * This relies on the trigram arrays being sorted.
    1086             :  */
    1087             : bool
    1088         380 : trgm_contained_by(TRGM *trg1, TRGM *trg2)
    1089             : {
    1090             :     trgm       *ptr1,
    1091             :                *ptr2;
    1092             :     int         len1,
    1093             :                 len2;
    1094             : 
    1095         380 :     ptr1 = GETARR(trg1);
    1096         380 :     ptr2 = GETARR(trg2);
    1097             : 
    1098         380 :     len1 = ARRNELEM(trg1);
    1099         380 :     len2 = ARRNELEM(trg2);
    1100             : 
    1101        1244 :     while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
    1102             :     {
    1103        1198 :         int         res = CMPTRGM(ptr1, ptr2);
    1104             : 
    1105        1198 :         if (res < 0)
    1106         334 :             return false;
    1107         864 :         else if (res > 0)
    1108         640 :             ptr2++;
    1109             :         else
    1110             :         {
    1111         224 :             ptr1++;
    1112         224 :             ptr2++;
    1113             :         }
    1114             :     }
    1115          46 :     if (ptr1 - GETARR(trg1) < len1)
    1116           8 :         return false;
    1117             :     else
    1118          38 :         return true;
    1119             : }
    1120             : 
    1121             : /*
    1122             :  * Return a palloc'd boolean array showing, for each trigram in "query",
    1123             :  * whether it is present in the trigram array "key".
    1124             :  * This relies on the "key" array being sorted, but "query" need not be.
    1125             :  */
    1126             : bool *
    1127        4300 : trgm_presence_map(TRGM *query, TRGM *key)
    1128             : {
    1129             :     bool       *result;
    1130        4300 :     trgm       *ptrq = GETARR(query),
    1131        4300 :                *ptrk = GETARR(key);
    1132        4300 :     int         lenq = ARRNELEM(query),
    1133        4300 :                 lenk = ARRNELEM(key),
    1134             :                 i;
    1135             : 
    1136        4300 :     result = (bool *) palloc0(lenq * sizeof(bool));
    1137             : 
    1138             :     /* for each query trigram, do a binary search in the key array */
    1139     1015120 :     for (i = 0; i < lenq; i++)
    1140             :     {
    1141     1010820 :         int         lo = 0;
    1142     1010820 :         int         hi = lenk;
    1143             : 
    1144     4747306 :         while (lo < hi)
    1145             :         {
    1146     3752564 :             int         mid = (lo + hi) / 2;
    1147     3752564 :             int         res = CMPTRGM(ptrq, ptrk + mid);
    1148             : 
    1149     3752564 :             if (res < 0)
    1150     1568164 :                 hi = mid;
    1151     2184400 :             else if (res > 0)
    1152     2168322 :                 lo = mid + 1;
    1153             :             else
    1154             :             {
    1155       16078 :                 result[i] = true;
    1156       16078 :                 break;
    1157             :             }
    1158             :         }
    1159     1010820 :         ptrq++;
    1160             :     }
    1161             : 
    1162        4300 :     return result;
    1163             : }
    1164             : 
    1165             : Datum
    1166       62904 : similarity(PG_FUNCTION_ARGS)
    1167             : {
    1168       62904 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1169       62904 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1170             :     TRGM       *trg1,
    1171             :                *trg2;
    1172             :     float4      res;
    1173             : 
    1174       62904 :     trg1 = generate_trgm(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1));
    1175       62904 :     trg2 = generate_trgm(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2));
    1176             : 
    1177       62904 :     res = cnt_sml(trg1, trg2, false);
    1178             : 
    1179       62904 :     pfree(trg1);
    1180       62904 :     pfree(trg2);
    1181       62904 :     PG_FREE_IF_COPY(in1, 0);
    1182       62904 :     PG_FREE_IF_COPY(in2, 1);
    1183             : 
    1184       62904 :     PG_RETURN_FLOAT4(res);
    1185             : }
    1186             : 
    1187             : Datum
    1188        1804 : word_similarity(PG_FUNCTION_ARGS)
    1189             : {
    1190        1804 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1191        1804 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1192             :     float4      res;
    1193             : 
    1194        3608 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1195        3608 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1196             :                                0);
    1197             : 
    1198        1804 :     PG_FREE_IF_COPY(in1, 0);
    1199        1804 :     PG_FREE_IF_COPY(in2, 1);
    1200        1804 :     PG_RETURN_FLOAT4(res);
    1201             : }
    1202             : 
    1203             : Datum
    1204        1764 : strict_word_similarity(PG_FUNCTION_ARGS)
    1205             : {
    1206        1764 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1207        1764 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1208             :     float4      res;
    1209             : 
    1210        3528 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1211        3528 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1212             :                                WORD_SIMILARITY_STRICT);
    1213             : 
    1214        1764 :     PG_FREE_IF_COPY(in1, 0);
    1215        1764 :     PG_FREE_IF_COPY(in2, 1);
    1216        1764 :     PG_RETURN_FLOAT4(res);
    1217             : }
    1218             : 
    1219             : Datum
    1220        2008 : similarity_dist(PG_FUNCTION_ARGS)
    1221             : {
    1222        2008 :     float4      res = DatumGetFloat4(DirectFunctionCall2(similarity,
    1223             :                                                          PG_GETARG_DATUM(0),
    1224             :                                                          PG_GETARG_DATUM(1)));
    1225             : 
    1226        2008 :     PG_RETURN_FLOAT4(1.0 - res);
    1227             : }
    1228             : 
    1229             : Datum
    1230       12000 : similarity_op(PG_FUNCTION_ARGS)
    1231             : {
    1232       12000 :     float4      res = DatumGetFloat4(DirectFunctionCall2(similarity,
    1233             :                                                          PG_GETARG_DATUM(0),
    1234             :                                                          PG_GETARG_DATUM(1)));
    1235             : 
    1236       12000 :     PG_RETURN_BOOL(res >= similarity_threshold);
    1237             : }
    1238             : 
    1239             : Datum
    1240        3848 : word_similarity_op(PG_FUNCTION_ARGS)
    1241             : {
    1242        3848 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1243        3848 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1244             :     float4      res;
    1245             : 
    1246        7696 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1247        7696 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1248             :                                WORD_SIMILARITY_CHECK_ONLY);
    1249             : 
    1250        3848 :     PG_FREE_IF_COPY(in1, 0);
    1251        3848 :     PG_FREE_IF_COPY(in2, 1);
    1252        3848 :     PG_RETURN_BOOL(res >= word_similarity_threshold);
    1253             : }
    1254             : 
    1255             : Datum
    1256        3848 : word_similarity_commutator_op(PG_FUNCTION_ARGS)
    1257             : {
    1258        3848 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1259        3848 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1260             :     float4      res;
    1261             : 
    1262        7696 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1263        7696 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1264             :                                WORD_SIMILARITY_CHECK_ONLY);
    1265             : 
    1266        3848 :     PG_FREE_IF_COPY(in1, 0);
    1267        3848 :     PG_FREE_IF_COPY(in2, 1);
    1268        3848 :     PG_RETURN_BOOL(res >= word_similarity_threshold);
    1269             : }
    1270             : 
    1271             : Datum
    1272           0 : word_similarity_dist_op(PG_FUNCTION_ARGS)
    1273             : {
    1274           0 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1275           0 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1276             :     float4      res;
    1277             : 
    1278           0 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1279           0 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1280             :                                0);
    1281             : 
    1282           0 :     PG_FREE_IF_COPY(in1, 0);
    1283           0 :     PG_FREE_IF_COPY(in2, 1);
    1284           0 :     PG_RETURN_FLOAT4(1.0 - res);
    1285             : }
    1286             : 
    1287             : Datum
    1288        1428 : word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
    1289             : {
    1290        1428 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1291        1428 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1292             :     float4      res;
    1293             : 
    1294        2856 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1295        2856 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1296             :                                0);
    1297             : 
    1298        1428 :     PG_FREE_IF_COPY(in1, 0);
    1299        1428 :     PG_FREE_IF_COPY(in2, 1);
    1300        1428 :     PG_RETURN_FLOAT4(1.0 - res);
    1301             : }
    1302             : 
    1303             : Datum
    1304        5060 : strict_word_similarity_op(PG_FUNCTION_ARGS)
    1305             : {
    1306        5060 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1307        5060 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1308             :     float4      res;
    1309             : 
    1310       10120 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1311       10120 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1312             :                                WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
    1313             : 
    1314        5060 :     PG_FREE_IF_COPY(in1, 0);
    1315        5060 :     PG_FREE_IF_COPY(in2, 1);
    1316        5060 :     PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
    1317             : }
    1318             : 
    1319             : Datum
    1320        5060 : strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
    1321             : {
    1322        5060 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1323        5060 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1324             :     float4      res;
    1325             : 
    1326       10120 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1327       10120 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1328             :                                WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
    1329             : 
    1330        5060 :     PG_FREE_IF_COPY(in1, 0);
    1331        5060 :     PG_FREE_IF_COPY(in2, 1);
    1332        5060 :     PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
    1333             : }
    1334             : 
    1335             : Datum
    1336           0 : strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
    1337             : {
    1338           0 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1339           0 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1340             :     float4      res;
    1341             : 
    1342           0 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1343           0 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1344             :                                WORD_SIMILARITY_STRICT);
    1345             : 
    1346           0 :     PG_FREE_IF_COPY(in1, 0);
    1347           0 :     PG_FREE_IF_COPY(in2, 1);
    1348           0 :     PG_RETURN_FLOAT4(1.0 - res);
    1349             : }
    1350             : 
    1351             : Datum
    1352        1440 : strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
    1353             : {
    1354        1440 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1355        1440 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1356             :     float4      res;
    1357             : 
    1358        2880 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1359        2880 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1360             :                                WORD_SIMILARITY_STRICT);
    1361             : 
    1362        1440 :     PG_FREE_IF_COPY(in1, 0);
    1363        1440 :     PG_FREE_IF_COPY(in2, 1);
    1364        1440 :     PG_RETURN_FLOAT4(1.0 - res);
    1365             : }

Generated by: LCOV version 1.14