Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsquery_gist.c
4 : * GiST index support for tsquery
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsquery_gist.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gist.h"
18 : #include "access/stratnum.h"
19 : #include "common/int.h"
20 : #include "tsearch/ts_utils.h"
21 : #include "utils/fmgrprotos.h"
22 :
23 : #define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key)
24 :
25 :
26 : Datum
27 36 : gtsquery_compress(PG_FUNCTION_ARGS)
28 : {
29 36 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
30 36 : GISTENTRY *retval = entry;
31 :
32 36 : if (entry->leafkey)
33 : {
34 : TSQuerySign sign;
35 :
36 36 : retval = palloc_object(GISTENTRY);
37 36 : sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
38 :
39 36 : gistentryinit(*retval, TSQuerySignGetDatum(sign),
40 : entry->rel, entry->page,
41 : entry->offset, false);
42 : }
43 :
44 36 : PG_RETURN_POINTER(retval);
45 : }
46 :
47 : /*
48 : * We do not need a decompress function, because the other gtsquery
49 : * support functions work with the compressed representation.
50 : */
51 :
52 : Datum
53 48 : gtsquery_consistent(PG_FUNCTION_ARGS)
54 : {
55 48 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
56 48 : TSQuery query = PG_GETARG_TSQUERY(1);
57 48 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
58 : #ifdef NOT_USED
59 : Oid subtype = PG_GETARG_OID(3);
60 : #endif
61 48 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
62 48 : TSQuerySign key = DatumGetTSQuerySign(entry->key);
63 48 : TSQuerySign sq = makeTSQuerySign(query);
64 : bool retval;
65 :
66 : /* All cases served by this function are inexact */
67 48 : *recheck = true;
68 :
69 48 : switch (strategy)
70 : {
71 24 : case RTContainsStrategyNumber:
72 24 : if (GIST_LEAF(entry))
73 24 : retval = (key & sq) == sq;
74 : else
75 0 : retval = (key & sq) != 0;
76 24 : break;
77 24 : case RTContainedByStrategyNumber:
78 24 : if (GIST_LEAF(entry))
79 24 : retval = (key & sq) == key;
80 : else
81 0 : retval = (key & sq) != 0;
82 24 : break;
83 0 : default:
84 0 : retval = false;
85 : }
86 48 : PG_RETURN_BOOL(retval);
87 : }
88 :
89 : Datum
90 0 : gtsquery_union(PG_FUNCTION_ARGS)
91 : {
92 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
93 0 : int *size = (int *) PG_GETARG_POINTER(1);
94 : TSQuerySign sign;
95 : int i;
96 :
97 0 : sign = 0;
98 :
99 0 : for (i = 0; i < entryvec->n; i++)
100 0 : sign |= GETENTRY(entryvec, i);
101 :
102 0 : *size = sizeof(TSQuerySign);
103 :
104 0 : PG_RETURN_TSQUERYSIGN(sign);
105 : }
106 :
107 : Datum
108 0 : gtsquery_same(PG_FUNCTION_ARGS)
109 : {
110 0 : TSQuerySign a = PG_GETARG_TSQUERYSIGN(0);
111 0 : TSQuerySign b = PG_GETARG_TSQUERYSIGN(1);
112 0 : bool *result = (bool *) PG_GETARG_POINTER(2);
113 :
114 0 : *result = (a == b);
115 :
116 0 : PG_RETURN_POINTER(result);
117 : }
118 :
119 : static int
120 0 : sizebitvec(TSQuerySign sign)
121 : {
122 0 : int size = 0,
123 : i;
124 :
125 0 : for (i = 0; i < TSQS_SIGLEN; i++)
126 0 : size += 0x01 & (sign >> i);
127 :
128 0 : return size;
129 : }
130 :
131 : static int
132 0 : hemdist(TSQuerySign a, TSQuerySign b)
133 : {
134 0 : TSQuerySign res = a ^ b;
135 :
136 0 : return sizebitvec(res);
137 : }
138 :
139 : Datum
140 0 : gtsquery_penalty(PG_FUNCTION_ARGS)
141 : {
142 0 : TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
143 0 : TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
144 0 : float *penalty = (float *) PG_GETARG_POINTER(2);
145 :
146 0 : *penalty = hemdist(origval, newval);
147 :
148 0 : PG_RETURN_POINTER(penalty);
149 : }
150 :
151 :
152 : typedef struct
153 : {
154 : OffsetNumber pos;
155 : int32 cost;
156 : } SPLITCOST;
157 :
158 : static int
159 0 : comparecost(const void *a, const void *b)
160 : {
161 0 : return pg_cmp_s32(((const SPLITCOST *) a)->cost,
162 0 : ((const SPLITCOST *) b)->cost);
163 : }
164 :
165 : #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
166 :
167 : Datum
168 0 : gtsquery_picksplit(PG_FUNCTION_ARGS)
169 : {
170 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
171 0 : GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
172 0 : OffsetNumber maxoff = entryvec->n - 2;
173 : OffsetNumber k,
174 : j;
175 : TSQuerySign datum_l,
176 : datum_r;
177 : int32 size_alpha,
178 : size_beta;
179 : int32 size_waste,
180 0 : waste = -1;
181 : int32 nbytes;
182 0 : OffsetNumber seed_1 = 0,
183 0 : seed_2 = 0;
184 : OffsetNumber *left,
185 : *right;
186 :
187 : SPLITCOST *costvector;
188 :
189 0 : nbytes = (maxoff + 2) * sizeof(OffsetNumber);
190 0 : left = v->spl_left = (OffsetNumber *) palloc(nbytes);
191 0 : right = v->spl_right = (OffsetNumber *) palloc(nbytes);
192 0 : v->spl_nleft = v->spl_nright = 0;
193 :
194 0 : for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
195 0 : for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
196 : {
197 0 : size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k));
198 0 : if (size_waste > waste)
199 : {
200 0 : waste = size_waste;
201 0 : seed_1 = k;
202 0 : seed_2 = j;
203 : }
204 : }
205 :
206 :
207 0 : if (seed_1 == 0 || seed_2 == 0)
208 : {
209 0 : seed_1 = 1;
210 0 : seed_2 = 2;
211 : }
212 :
213 0 : datum_l = GETENTRY(entryvec, seed_1);
214 0 : datum_r = GETENTRY(entryvec, seed_2);
215 :
216 0 : maxoff = OffsetNumberNext(maxoff);
217 0 : costvector = palloc_array(SPLITCOST, maxoff);
218 0 : for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
219 : {
220 0 : costvector[j - 1].pos = j;
221 0 : size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j));
222 0 : size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j));
223 0 : costvector[j - 1].cost = abs(size_alpha - size_beta);
224 : }
225 0 : qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
226 :
227 0 : for (k = 0; k < maxoff; k++)
228 : {
229 0 : j = costvector[k].pos;
230 0 : if (j == seed_1)
231 : {
232 0 : *left++ = j;
233 0 : v->spl_nleft++;
234 0 : continue;
235 : }
236 0 : else if (j == seed_2)
237 : {
238 0 : *right++ = j;
239 0 : v->spl_nright++;
240 0 : continue;
241 : }
242 0 : size_alpha = hemdist(datum_l, GETENTRY(entryvec, j));
243 0 : size_beta = hemdist(datum_r, GETENTRY(entryvec, j));
244 :
245 0 : if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
246 : {
247 0 : datum_l |= GETENTRY(entryvec, j);
248 0 : *left++ = j;
249 0 : v->spl_nleft++;
250 : }
251 : else
252 : {
253 0 : datum_r |= GETENTRY(entryvec, j);
254 0 : *right++ = j;
255 0 : v->spl_nright++;
256 : }
257 : }
258 :
259 0 : *right = *left = FirstOffsetNumber;
260 0 : v->spl_ldatum = TSQuerySignGetDatum(datum_l);
261 0 : v->spl_rdatum = TSQuerySignGetDatum(datum_r);
262 :
263 0 : PG_RETURN_POINTER(v);
264 : }
265 :
266 : /*
267 : * Formerly, gtsquery_consistent was declared in pg_proc.h with arguments
268 : * that did not match the documented conventions for GiST support functions.
269 : * We fixed that, but we still need a pg_proc entry with the old signature
270 : * to support reloading pre-9.6 contrib/tsearch2 opclass declarations.
271 : * This compatibility function should go away eventually.
272 : */
273 : Datum
274 0 : gtsquery_consistent_oldsig(PG_FUNCTION_ARGS)
275 : {
276 0 : return gtsquery_consistent(fcinfo);
277 : }
|