Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsquery_gist.c
4 : * GiST index support for tsquery
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsquery_gist.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gist.h"
18 : #include "access/stratnum.h"
19 : #include "common/int.h"
20 : #include "tsearch/ts_utils.h"
21 : #include "utils/fmgrprotos.h"
22 :
23 : #define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key)
24 :
25 :
26 : Datum
27 36 : gtsquery_compress(PG_FUNCTION_ARGS)
28 : {
29 36 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
30 36 : GISTENTRY *retval = entry;
31 :
32 36 : if (entry->leafkey)
33 : {
34 : TSQuerySign sign;
35 :
36 36 : retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
37 36 : sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
38 :
39 36 : gistentryinit(*retval, TSQuerySignGetDatum(sign),
40 : entry->rel, entry->page,
41 : entry->offset, false);
42 : }
43 :
44 36 : PG_RETURN_POINTER(retval);
45 : }
46 :
47 : /*
48 : * We do not need a decompress function, because the other gtsquery
49 : * support functions work with the compressed representation.
50 : */
51 :
52 : Datum
53 48 : gtsquery_consistent(PG_FUNCTION_ARGS)
54 : {
55 48 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
56 48 : TSQuery query = PG_GETARG_TSQUERY(1);
57 48 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
58 :
59 : /* Oid subtype = PG_GETARG_OID(3); */
60 48 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
61 48 : TSQuerySign key = DatumGetTSQuerySign(entry->key);
62 48 : TSQuerySign sq = makeTSQuerySign(query);
63 : bool retval;
64 :
65 : /* All cases served by this function are inexact */
66 48 : *recheck = true;
67 :
68 48 : switch (strategy)
69 : {
70 24 : case RTContainsStrategyNumber:
71 24 : if (GIST_LEAF(entry))
72 24 : retval = (key & sq) == sq;
73 : else
74 0 : retval = (key & sq) != 0;
75 24 : break;
76 24 : case RTContainedByStrategyNumber:
77 24 : if (GIST_LEAF(entry))
78 24 : retval = (key & sq) == key;
79 : else
80 0 : retval = (key & sq) != 0;
81 24 : break;
82 0 : default:
83 0 : retval = false;
84 : }
85 48 : PG_RETURN_BOOL(retval);
86 : }
87 :
88 : Datum
89 0 : gtsquery_union(PG_FUNCTION_ARGS)
90 : {
91 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
92 0 : int *size = (int *) PG_GETARG_POINTER(1);
93 : TSQuerySign sign;
94 : int i;
95 :
96 0 : sign = 0;
97 :
98 0 : for (i = 0; i < entryvec->n; i++)
99 0 : sign |= GETENTRY(entryvec, i);
100 :
101 0 : *size = sizeof(TSQuerySign);
102 :
103 0 : PG_RETURN_TSQUERYSIGN(sign);
104 : }
105 :
106 : Datum
107 0 : gtsquery_same(PG_FUNCTION_ARGS)
108 : {
109 0 : TSQuerySign a = PG_GETARG_TSQUERYSIGN(0);
110 0 : TSQuerySign b = PG_GETARG_TSQUERYSIGN(1);
111 0 : bool *result = (bool *) PG_GETARG_POINTER(2);
112 :
113 0 : *result = (a == b);
114 :
115 0 : PG_RETURN_POINTER(result);
116 : }
117 :
118 : static int
119 0 : sizebitvec(TSQuerySign sign)
120 : {
121 0 : int size = 0,
122 : i;
123 :
124 0 : for (i = 0; i < TSQS_SIGLEN; i++)
125 0 : size += 0x01 & (sign >> i);
126 :
127 0 : return size;
128 : }
129 :
130 : static int
131 0 : hemdist(TSQuerySign a, TSQuerySign b)
132 : {
133 0 : TSQuerySign res = a ^ b;
134 :
135 0 : return sizebitvec(res);
136 : }
137 :
138 : Datum
139 0 : gtsquery_penalty(PG_FUNCTION_ARGS)
140 : {
141 0 : TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
142 0 : TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
143 0 : float *penalty = (float *) PG_GETARG_POINTER(2);
144 :
145 0 : *penalty = hemdist(origval, newval);
146 :
147 0 : PG_RETURN_POINTER(penalty);
148 : }
149 :
150 :
151 : typedef struct
152 : {
153 : OffsetNumber pos;
154 : int32 cost;
155 : } SPLITCOST;
156 :
157 : static int
158 0 : comparecost(const void *a, const void *b)
159 : {
160 0 : return pg_cmp_s32(((const SPLITCOST *) a)->cost,
161 : ((const SPLITCOST *) b)->cost);
162 : }
163 :
164 : #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
165 :
166 : Datum
167 0 : gtsquery_picksplit(PG_FUNCTION_ARGS)
168 : {
169 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
170 0 : GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
171 0 : OffsetNumber maxoff = entryvec->n - 2;
172 : OffsetNumber k,
173 : j;
174 : TSQuerySign datum_l,
175 : datum_r;
176 : int32 size_alpha,
177 : size_beta;
178 : int32 size_waste,
179 0 : waste = -1;
180 : int32 nbytes;
181 0 : OffsetNumber seed_1 = 0,
182 0 : seed_2 = 0;
183 : OffsetNumber *left,
184 : *right;
185 :
186 : SPLITCOST *costvector;
187 :
188 0 : nbytes = (maxoff + 2) * sizeof(OffsetNumber);
189 0 : left = v->spl_left = (OffsetNumber *) palloc(nbytes);
190 0 : right = v->spl_right = (OffsetNumber *) palloc(nbytes);
191 0 : v->spl_nleft = v->spl_nright = 0;
192 :
193 0 : for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
194 0 : for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
195 : {
196 0 : size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k));
197 0 : if (size_waste > waste)
198 : {
199 0 : waste = size_waste;
200 0 : seed_1 = k;
201 0 : seed_2 = j;
202 : }
203 : }
204 :
205 :
206 0 : if (seed_1 == 0 || seed_2 == 0)
207 : {
208 0 : seed_1 = 1;
209 0 : seed_2 = 2;
210 : }
211 :
212 0 : datum_l = GETENTRY(entryvec, seed_1);
213 0 : datum_r = GETENTRY(entryvec, seed_2);
214 :
215 0 : maxoff = OffsetNumberNext(maxoff);
216 0 : costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
217 0 : for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
218 : {
219 0 : costvector[j - 1].pos = j;
220 0 : size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j));
221 0 : size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j));
222 0 : costvector[j - 1].cost = abs(size_alpha - size_beta);
223 : }
224 0 : qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
225 :
226 0 : for (k = 0; k < maxoff; k++)
227 : {
228 0 : j = costvector[k].pos;
229 0 : if (j == seed_1)
230 : {
231 0 : *left++ = j;
232 0 : v->spl_nleft++;
233 0 : continue;
234 : }
235 0 : else if (j == seed_2)
236 : {
237 0 : *right++ = j;
238 0 : v->spl_nright++;
239 0 : continue;
240 : }
241 0 : size_alpha = hemdist(datum_l, GETENTRY(entryvec, j));
242 0 : size_beta = hemdist(datum_r, GETENTRY(entryvec, j));
243 :
244 0 : if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
245 : {
246 0 : datum_l |= GETENTRY(entryvec, j);
247 0 : *left++ = j;
248 0 : v->spl_nleft++;
249 : }
250 : else
251 : {
252 0 : datum_r |= GETENTRY(entryvec, j);
253 0 : *right++ = j;
254 0 : v->spl_nright++;
255 : }
256 : }
257 :
258 0 : *right = *left = FirstOffsetNumber;
259 0 : v->spl_ldatum = TSQuerySignGetDatum(datum_l);
260 0 : v->spl_rdatum = TSQuerySignGetDatum(datum_r);
261 :
262 0 : PG_RETURN_POINTER(v);
263 : }
264 :
265 : /*
266 : * Formerly, gtsquery_consistent was declared in pg_proc.h with arguments
267 : * that did not match the documented conventions for GiST support functions.
268 : * We fixed that, but we still need a pg_proc entry with the old signature
269 : * to support reloading pre-9.6 contrib/tsearch2 opclass declarations.
270 : * This compatibility function should go away eventually.
271 : */
272 : Datum
273 0 : gtsquery_consistent_oldsig(PG_FUNCTION_ARGS)
274 : {
275 0 : return gtsquery_consistent(fcinfo);
276 : }
|