Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistutil.c
4 : * utilities routines for the postgres GiST index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gistutil.c
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include <math.h>
17 :
18 : #include "access/gist_private.h"
19 : #include "access/htup_details.h"
20 : #include "access/reloptions.h"
21 : #include "common/pg_prng.h"
22 : #include "storage/indexfsm.h"
23 : #include "utils/float.h"
24 : #include "utils/fmgrprotos.h"
25 : #include "utils/lsyscache.h"
26 : #include "utils/rel.h"
27 : #include "utils/snapmgr.h"
28 : #include "utils/syscache.h"
29 :
30 : /*
31 : * Write itup vector to page, has no control of free space.
32 : */
33 : void
34 1164150 : gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
35 : {
36 : int i;
37 :
38 1164150 : if (off == InvalidOffsetNumber)
39 2321254 : off = (PageIsEmpty(page)) ? FirstOffsetNumber :
40 1158906 : OffsetNumberNext(PageGetMaxOffsetNumber(page));
41 :
42 2500952 : for (i = 0; i < len; i++)
43 : {
44 1336802 : Size sz = IndexTupleSize(itup[i]);
45 : OffsetNumber l;
46 :
47 1336802 : l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
48 1336802 : if (l == InvalidOffsetNumber)
49 0 : elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
50 : i, len, (int) sz);
51 1336802 : off++;
52 : }
53 1164150 : }
54 :
55 : /*
56 : * Check space for itup vector on page
57 : */
58 : bool
59 1724744 : gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
60 : {
61 1724744 : unsigned int size = freespace,
62 1724744 : deleted = 0;
63 : int i;
64 :
65 3474600 : for (i = 0; i < len; i++)
66 1749856 : size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
67 :
68 1724744 : if (todelete != InvalidOffsetNumber)
69 : {
70 759692 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
71 :
72 759692 : deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
73 : }
74 :
75 1724744 : return (PageGetFreeSpace(page) + deleted < size);
76 : }
77 :
78 : bool
79 53456 : gistfitpage(IndexTuple *itvec, int len)
80 : {
81 : int i;
82 53456 : Size size = 0;
83 :
84 2331404 : for (i = 0; i < len; i++)
85 2277948 : size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
86 :
87 : /* TODO: Consider fillfactor */
88 53456 : return (size <= GiSTPageSize);
89 : }
90 :
91 : /*
92 : * Read buffer into itup vector
93 : */
94 : IndexTuple *
95 26612 : gistextractpage(Page page, int *len /* out */ )
96 : {
97 : OffsetNumber i,
98 : maxoff;
99 : IndexTuple *itvec;
100 :
101 26612 : maxoff = PageGetMaxOffsetNumber(page);
102 26612 : *len = maxoff;
103 26612 : itvec = palloc(sizeof(IndexTuple) * maxoff);
104 2026508 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
105 1999896 : itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
106 :
107 26612 : return itvec;
108 : }
109 :
110 : /*
111 : * join two vectors into one
112 : */
113 : IndexTuple *
114 26328 : gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
115 : {
116 26328 : itvec = (IndexTuple *) repalloc(itvec, sizeof(IndexTuple) * ((*len) + addlen));
117 26328 : memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
118 26328 : *len += addlen;
119 26328 : return itvec;
120 : }
121 :
122 : /*
123 : * make plain IndexTuple vector
124 : */
125 :
126 : IndexTupleData *
127 52956 : gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
128 : {
129 : char *ptr,
130 : *ret;
131 : int i;
132 :
133 52956 : *memlen = 0;
134 :
135 2079064 : for (i = 0; i < veclen; i++)
136 2026108 : *memlen += IndexTupleSize(vec[i]);
137 :
138 52956 : ptr = ret = palloc(*memlen);
139 :
140 2079064 : for (i = 0; i < veclen; i++)
141 : {
142 2026108 : memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
143 2026108 : ptr += IndexTupleSize(vec[i]);
144 : }
145 :
146 52956 : return (IndexTupleData *) ret;
147 : }
148 :
149 : /*
150 : * Make unions of keys in IndexTuple vector (one union datum per index column).
151 : * Union Datums are returned into the attr/isnull arrays.
152 : * Resulting Datums aren't compressed.
153 : */
154 : void
155 5542 : gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
156 : Datum *attr, bool *isnull)
157 : {
158 : int i;
159 : GistEntryVector *evec;
160 : int attrsize;
161 :
162 5542 : evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
163 :
164 16220 : for (i = 0; i < giststate->nonLeafTupdesc->natts; i++)
165 : {
166 : int j;
167 :
168 : /* Collect non-null datums for this column */
169 10678 : evec->n = 0;
170 538424 : for (j = 0; j < len; j++)
171 : {
172 : Datum datum;
173 : bool IsNull;
174 :
175 527746 : datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc,
176 : &IsNull);
177 527746 : if (IsNull)
178 1744 : continue;
179 :
180 526002 : gistdentryinit(giststate, i,
181 526002 : evec->vector + evec->n,
182 : datum,
183 : NULL, NULL, (OffsetNumber) 0,
184 : false, IsNull);
185 526002 : evec->n++;
186 : }
187 :
188 : /* If this column was all NULLs, the union is NULL */
189 10678 : if (evec->n == 0)
190 : {
191 204 : attr[i] = (Datum) 0;
192 204 : isnull[i] = true;
193 : }
194 : else
195 : {
196 10474 : if (evec->n == 1)
197 : {
198 : /* unionFn may expect at least two inputs */
199 8 : evec->n = 2;
200 8 : evec->vector[1] = evec->vector[0];
201 : }
202 :
203 : /* Make union and store in attr array */
204 10474 : attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
205 : giststate->supportCollation[i],
206 : PointerGetDatum(evec),
207 : PointerGetDatum(&attrsize));
208 :
209 10474 : isnull[i] = false;
210 : }
211 : }
212 5542 : }
213 :
214 : /*
215 : * Return an IndexTuple containing the result of applying the "union"
216 : * method to the specified IndexTuple vector.
217 : */
218 : IndexTuple
219 6 : gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
220 : {
221 : Datum attr[INDEX_MAX_KEYS];
222 : bool isnull[INDEX_MAX_KEYS];
223 :
224 6 : gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
225 :
226 6 : return gistFormTuple(giststate, r, attr, isnull, false);
227 : }
228 :
229 : /*
230 : * makes union of two key
231 : */
232 : void
233 1465032 : gistMakeUnionKey(GISTSTATE *giststate, int attno,
234 : GISTENTRY *entry1, bool isnull1,
235 : GISTENTRY *entry2, bool isnull2,
236 : Datum *dst, bool *dstisnull)
237 : {
238 : /* we need a GistEntryVector with room for exactly 2 elements */
239 : union
240 : {
241 : GistEntryVector gev;
242 : char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
243 : } storage;
244 1465032 : GistEntryVector *evec = &storage.gev;
245 : int dstsize;
246 :
247 1465032 : evec->n = 2;
248 :
249 1465032 : if (isnull1 && isnull2)
250 : {
251 18442 : *dstisnull = true;
252 18442 : *dst = (Datum) 0;
253 : }
254 : else
255 : {
256 1446590 : if (isnull1 == false && isnull2 == false)
257 : {
258 1445966 : evec->vector[0] = *entry1;
259 1445966 : evec->vector[1] = *entry2;
260 : }
261 624 : else if (isnull1 == false)
262 : {
263 624 : evec->vector[0] = *entry1;
264 624 : evec->vector[1] = *entry1;
265 : }
266 : else
267 : {
268 0 : evec->vector[0] = *entry2;
269 0 : evec->vector[1] = *entry2;
270 : }
271 :
272 1446590 : *dstisnull = false;
273 1446590 : *dst = FunctionCall2Coll(&giststate->unionFn[attno],
274 : giststate->supportCollation[attno],
275 : PointerGetDatum(evec),
276 : PointerGetDatum(&dstsize));
277 : }
278 1465032 : }
279 :
280 : bool
281 1265538 : gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
282 : {
283 1265538 : bool result = false; /* silence compiler warning */
284 :
285 1265538 : FunctionCall3Coll(&giststate->equalFn[attno],
286 : giststate->supportCollation[attno],
287 : a, b,
288 : PointerGetDatum(&result));
289 1265538 : return result;
290 : }
291 :
292 : /*
293 : * Decompress all keys in tuple
294 : */
295 : void
296 3862254 : gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
297 : OffsetNumber o, GISTENTRY *attdata, bool *isnull)
298 : {
299 : int i;
300 :
301 8287098 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
302 : {
303 : Datum datum;
304 :
305 4424844 : datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
306 4424844 : gistdentryinit(giststate, i, &attdata[i],
307 : datum, r, p, o,
308 4424844 : false, isnull[i]);
309 : }
310 3862254 : }
311 :
312 : /*
313 : * Forms union of oldtup and addtup, if union == oldtup then return NULL
314 : */
315 : IndexTuple
316 1277498 : gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
317 : {
318 1277498 : bool neednew = false;
319 : GISTENTRY oldentries[INDEX_MAX_KEYS],
320 : addentries[INDEX_MAX_KEYS];
321 : bool oldisnull[INDEX_MAX_KEYS],
322 : addisnull[INDEX_MAX_KEYS];
323 : Datum attr[INDEX_MAX_KEYS];
324 : bool isnull[INDEX_MAX_KEYS];
325 1277498 : IndexTuple newtup = NULL;
326 : int i;
327 :
328 1277498 : gistDeCompressAtt(giststate, r, oldtup, NULL,
329 : (OffsetNumber) 0, oldentries, oldisnull);
330 :
331 1277498 : gistDeCompressAtt(giststate, r, addtup, NULL,
332 : (OffsetNumber) 0, addentries, addisnull);
333 :
334 2742526 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
335 : {
336 1465028 : gistMakeUnionKey(giststate, i,
337 1465028 : oldentries + i, oldisnull[i],
338 1465028 : addentries + i, addisnull[i],
339 1465028 : attr + i, isnull + i);
340 :
341 1465028 : if (neednew)
342 : /* we already need new key, so we can skip check */
343 182988 : continue;
344 :
345 1282040 : if (isnull[i])
346 : /* union of key may be NULL if and only if both keys are NULL */
347 18442 : continue;
348 :
349 1263598 : if (!addisnull[i])
350 : {
351 1262974 : if (oldisnull[i] ||
352 1262974 : !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
353 764316 : neednew = true;
354 : }
355 : }
356 :
357 1277498 : if (neednew)
358 : {
359 : /* need to update key */
360 764316 : newtup = gistFormTuple(giststate, r, attr, isnull, false);
361 764316 : newtup->t_tid = oldtup->t_tid;
362 : }
363 :
364 1277498 : return newtup;
365 : }
366 :
367 : /*
368 : * Search an upper index page for the entry with lowest penalty for insertion
369 : * of the new index key contained in "it".
370 : *
371 : * Returns the index of the page entry to insert into.
372 : */
373 : OffsetNumber
374 1247756 : gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
375 : GISTSTATE *giststate)
376 : {
377 : OffsetNumber result;
378 : OffsetNumber maxoff;
379 : OffsetNumber i;
380 : float best_penalty[INDEX_MAX_KEYS];
381 : GISTENTRY entry,
382 : identry[INDEX_MAX_KEYS];
383 : bool isnull[INDEX_MAX_KEYS];
384 : int keep_current_best;
385 :
386 : Assert(!GistPageIsLeaf(p));
387 :
388 1247756 : gistDeCompressAtt(giststate, r,
389 : it, NULL, (OffsetNumber) 0,
390 : identry, isnull);
391 :
392 : /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
393 1247756 : result = FirstOffsetNumber;
394 :
395 : /*
396 : * The index may have multiple columns, and there's a penalty value for
397 : * each column. The penalty associated with a column that appears earlier
398 : * in the index definition is strictly more important than the penalty of
399 : * a column that appears later in the index definition.
400 : *
401 : * best_penalty[j] is the best penalty we have seen so far for column j,
402 : * or -1 when we haven't yet examined column j. Array entries to the
403 : * right of the first -1 are undefined.
404 : */
405 1247756 : best_penalty[0] = -1;
406 :
407 : /*
408 : * If we find a tuple that's exactly as good as the currently best one, we
409 : * could use either one. When inserting a lot of tuples with the same or
410 : * similar keys, it's preferable to descend down the same path when
411 : * possible, as that's more cache-friendly. On the other hand, if all
412 : * inserts land on the same leaf page after a split, we're never going to
413 : * insert anything to the other half of the split, and will end up using
414 : * only 50% of the available space. Distributing the inserts evenly would
415 : * lead to better space usage, but that hurts cache-locality during
416 : * insertion. To get the best of both worlds, when we find a tuple that's
417 : * exactly as good as the previous best, choose randomly whether to stick
418 : * to the old best, or use the new one. Once we decide to stick to the
419 : * old best, we keep sticking to it for any subsequent equally good tuples
420 : * we might find. This favors tuples with low offsets, but still allows
421 : * some inserts to go to other equally-good subtrees.
422 : *
423 : * keep_current_best is -1 if we haven't yet had to make a random choice
424 : * whether to keep the current best tuple. If we have done so, and
425 : * decided to keep it, keep_current_best is 1; if we've decided to
426 : * replace, keep_current_best is 0. (This state will be reset to -1 as
427 : * soon as we've made the replacement, but sometimes we make the choice in
428 : * advance of actually finding a replacement best tuple.)
429 : */
430 1247756 : keep_current_best = -1;
431 :
432 : /*
433 : * Loop over tuples on page.
434 : */
435 1247756 : maxoff = PageGetMaxOffsetNumber(p);
436 : Assert(maxoff >= FirstOffsetNumber);
437 :
438 39211138 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
439 : {
440 38226700 : IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
441 : bool zero_penalty;
442 : int j;
443 :
444 38226700 : zero_penalty = true;
445 :
446 : /* Loop over index attributes. */
447 77417168 : for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
448 : {
449 : Datum datum;
450 : float usize;
451 : bool IsNull;
452 :
453 : /* Compute penalty for this column. */
454 45595830 : datum = index_getattr(itup, j + 1, giststate->leafTupdesc,
455 : &IsNull);
456 45595830 : gistdentryinit(giststate, j, &entry, datum, r, p, i,
457 : false, IsNull);
458 45595830 : usize = gistpenalty(giststate, j, &entry, IsNull,
459 45595830 : &identry[j], isnull[j]);
460 45595830 : if (usize > 0)
461 45060876 : zero_penalty = false;
462 :
463 45595830 : if (best_penalty[j] < 0 || usize < best_penalty[j])
464 : {
465 : /*
466 : * New best penalty for column. Tentatively select this tuple
467 : * as the target, and record the best penalty. Then reset the
468 : * next column's penalty to "unknown" (and indirectly, the
469 : * same for all the ones to its right). This will force us to
470 : * adopt this tuple's penalty values as the best for all the
471 : * remaining columns during subsequent loop iterations.
472 : */
473 36491452 : result = i;
474 36491452 : best_penalty[j] = usize;
475 :
476 36491452 : if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1)
477 7365912 : best_penalty[j + 1] = -1;
478 :
479 : /* we have new best, so reset keep-it decision */
480 36491452 : keep_current_best = -1;
481 : }
482 9104378 : else if (best_penalty[j] == usize)
483 : {
484 : /*
485 : * The current tuple is exactly as good for this column as the
486 : * best tuple seen so far. The next iteration of this loop
487 : * will compare the next column.
488 : */
489 : }
490 : else
491 : {
492 : /*
493 : * The current tuple is worse for this column than the best
494 : * tuple seen so far. Skip the remaining columns and move on
495 : * to the next tuple, if any.
496 : */
497 6405362 : zero_penalty = false; /* so outer loop won't exit */
498 6405362 : break;
499 : }
500 : }
501 :
502 : /*
503 : * If we looped past the last column, and did not update "result",
504 : * then this tuple is exactly as good as the prior best tuple.
505 : */
506 38226700 : if (j == IndexRelationGetNumberOfKeyAttributes(r) && result != i)
507 : {
508 2695798 : if (keep_current_best == -1)
509 : {
510 : /* we didn't make the random choice yet for this old best */
511 298352 : keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
512 : }
513 2695798 : if (keep_current_best == 0)
514 : {
515 : /* we choose to use the new tuple */
516 253326 : result = i;
517 : /* choose again if there are even more exactly-as-good ones */
518 253326 : keep_current_best = -1;
519 : }
520 : }
521 :
522 : /*
523 : * If we find a tuple with zero penalty for all columns, and we've
524 : * decided we don't want to search for another tuple with equal
525 : * penalty, there's no need to examine remaining tuples; just break
526 : * out of the loop and return it.
527 : */
528 38226700 : if (zero_penalty)
529 : {
530 527210 : if (keep_current_best == -1)
531 : {
532 : /* we didn't make the random choice yet for this old best */
533 527210 : keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
534 : }
535 527210 : if (keep_current_best == 1)
536 263318 : break;
537 : }
538 : }
539 :
540 1247756 : return result;
541 : }
542 :
543 : /*
544 : * initialize a GiST entry with a decompressed version of key
545 : */
546 : void
547 54867144 : gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
548 : Datum k, Relation r, Page pg, OffsetNumber o,
549 : bool l, bool isNull)
550 : {
551 54867144 : if (!isNull)
552 : {
553 : GISTENTRY *dep;
554 :
555 54597578 : gistentryinit(*e, k, r, pg, o, l);
556 :
557 : /* there may not be a decompress function in opclass */
558 54597578 : if (!OidIsValid(giststate->decompressFn[nkey].fn_oid))
559 48266064 : return;
560 :
561 : dep = (GISTENTRY *)
562 6331514 : DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey],
563 : giststate->supportCollation[nkey],
564 : PointerGetDatum(e)));
565 : /* decompressFn may just return the given pointer */
566 6331514 : if (dep != e)
567 766516 : gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
568 : dep->leafkey);
569 : }
570 : else
571 269566 : gistentryinit(*e, (Datum) 0, r, pg, o, l);
572 : }
573 :
574 : IndexTuple
575 1781858 : gistFormTuple(GISTSTATE *giststate, Relation r,
576 : const Datum *attdata, const bool *isnull, bool isleaf)
577 : {
578 : Datum compatt[INDEX_MAX_KEYS];
579 : IndexTuple res;
580 :
581 1781858 : gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
582 :
583 1781856 : res = index_form_tuple(isleaf ? giststate->leafTupdesc :
584 : giststate->nonLeafTupdesc,
585 : compatt, isnull);
586 :
587 : /*
588 : * The offset number on tuples on internal pages is unused. For historical
589 : * reasons, it is set to 0xffff.
590 : */
591 1781856 : ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
592 1781856 : return res;
593 : }
594 :
595 : void
596 1978002 : gistCompressValues(GISTSTATE *giststate, Relation r,
597 : const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)
598 : {
599 : int i;
600 :
601 : /*
602 : * Call the compress method on each attribute.
603 : */
604 4271964 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
605 : {
606 2293964 : if (isnull[i])
607 15590 : compatt[i] = (Datum) 0;
608 : else
609 : {
610 : GISTENTRY centry;
611 : GISTENTRY *cep;
612 :
613 2278374 : gistentryinit(centry, attdata[i], r, NULL, (OffsetNumber) 0,
614 : isleaf);
615 : /* there may not be a compress function in opclass */
616 2278374 : if (OidIsValid(giststate->compressFn[i].fn_oid))
617 : cep = (GISTENTRY *)
618 1785480 : DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[i],
619 : giststate->supportCollation[i],
620 : PointerGetDatum(¢ry)));
621 : else
622 492894 : cep = ¢ry;
623 2278372 : compatt[i] = cep->key;
624 : }
625 : }
626 :
627 1978000 : if (isleaf)
628 : {
629 : /*
630 : * Emplace each included attribute if any.
631 : */
632 1454284 : for (; i < r->rd_att->natts; i++)
633 : {
634 293140 : if (isnull[i])
635 2000 : compatt[i] = (Datum) 0;
636 : else
637 291140 : compatt[i] = attdata[i];
638 : }
639 : }
640 1978000 : }
641 :
642 : /*
643 : * initialize a GiST entry with fetched value in key field
644 : */
645 : static Datum
646 105436 : gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
647 : {
648 : GISTENTRY fentry;
649 : GISTENTRY *fep;
650 :
651 105436 : gistentryinit(fentry, k, r, NULL, (OffsetNumber) 0, false);
652 :
653 : fep = (GISTENTRY *)
654 105436 : DatumGetPointer(FunctionCall1Coll(&giststate->fetchFn[nkey],
655 : giststate->supportCollation[nkey],
656 : PointerGetDatum(&fentry)));
657 :
658 : /* fetchFn set 'key', return it to the caller */
659 105436 : return fep->key;
660 : }
661 :
662 : /*
663 : * Fetch all keys in tuple.
664 : * Returns a new HeapTuple containing the originally-indexed data.
665 : */
666 : HeapTuple
667 530236 : gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple)
668 : {
669 530236 : MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt);
670 : Datum fetchatt[INDEX_MAX_KEYS];
671 : bool isnull[INDEX_MAX_KEYS];
672 : int i;
673 :
674 1120822 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
675 : {
676 : Datum datum;
677 :
678 590586 : datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
679 :
680 590586 : if (giststate->fetchFn[i].fn_oid != InvalidOid)
681 : {
682 105448 : if (!isnull[i])
683 105436 : fetchatt[i] = gistFetchAtt(giststate, i, datum, r);
684 : else
685 12 : fetchatt[i] = (Datum) 0;
686 : }
687 485138 : else if (giststate->compressFn[i].fn_oid == InvalidOid)
688 : {
689 : /*
690 : * If opclass does not provide compress method that could change
691 : * original value, att is necessarily stored in original form.
692 : */
693 424788 : if (!isnull[i])
694 421452 : fetchatt[i] = datum;
695 : else
696 3336 : fetchatt[i] = (Datum) 0;
697 : }
698 : else
699 : {
700 : /*
701 : * Index-only scans not supported for this column. Since the
702 : * planner chose an index-only scan anyway, it is not interested
703 : * in this column, and we can replace it with a NULL.
704 : */
705 60350 : isnull[i] = true;
706 60350 : fetchatt[i] = (Datum) 0;
707 : }
708 : }
709 :
710 : /*
711 : * Get each included attribute.
712 : */
713 530236 : for (; i < r->rd_att->natts; i++)
714 : {
715 0 : fetchatt[i] = index_getattr(tuple, i + 1, giststate->leafTupdesc,
716 : &isnull[i]);
717 : }
718 530236 : MemoryContextSwitchTo(oldcxt);
719 :
720 530236 : return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull);
721 : }
722 :
723 : float
724 45902096 : gistpenalty(GISTSTATE *giststate, int attno,
725 : GISTENTRY *orig, bool isNullOrig,
726 : GISTENTRY *add, bool isNullAdd)
727 : {
728 45902096 : float penalty = 0.0;
729 :
730 45902096 : if (giststate->penaltyFn[attno].fn_strict == false ||
731 45902096 : (isNullOrig == false && isNullAdd == false))
732 : {
733 45610260 : FunctionCall3Coll(&giststate->penaltyFn[attno],
734 : giststate->supportCollation[attno],
735 : PointerGetDatum(orig),
736 : PointerGetDatum(add),
737 : PointerGetDatum(&penalty));
738 : /* disallow negative or NaN penalty */
739 45610260 : if (isnan(penalty) || penalty < 0.0)
740 4724 : penalty = 0.0;
741 : }
742 291836 : else if (isNullOrig && isNullAdd)
743 18696 : penalty = 0.0;
744 : else
745 : {
746 : /* try to prevent mixing null and non-null values */
747 273140 : penalty = get_float4_infinity();
748 : }
749 :
750 45902096 : return penalty;
751 : }
752 :
753 : /*
754 : * Initialize a new index page
755 : */
756 : void
757 31910 : gistinitpage(Page page, uint32 f)
758 : {
759 : GISTPageOpaque opaque;
760 :
761 31910 : PageInit(page, BLCKSZ, sizeof(GISTPageOpaqueData));
762 :
763 31910 : opaque = GistPageGetOpaque(page);
764 31910 : opaque->rightlink = InvalidBlockNumber;
765 31910 : opaque->flags = f;
766 31910 : opaque->gist_page_id = GIST_PAGE_ID;
767 31910 : }
768 :
769 : /*
770 : * Initialize a new index buffer
771 : */
772 : void
773 29262 : GISTInitBuffer(Buffer b, uint32 f)
774 : {
775 : Page page;
776 :
777 29262 : page = BufferGetPage(b);
778 29262 : gistinitpage(page, f);
779 29262 : }
780 :
781 : /*
782 : * Verify that a freshly-read page looks sane.
783 : */
784 : void
785 2212462 : gistcheckpage(Relation rel, Buffer buf)
786 : {
787 2212462 : Page page = BufferGetPage(buf);
788 :
789 : /*
790 : * ReadBuffer verifies that every newly-read page passes
791 : * PageHeaderIsValid, which means it either contains a reasonably sane
792 : * page header or is all-zero. We have to defend against the all-zero
793 : * case, however.
794 : */
795 2212462 : if (PageIsNew(page))
796 0 : ereport(ERROR,
797 : (errcode(ERRCODE_INDEX_CORRUPTED),
798 : errmsg("index \"%s\" contains unexpected zero page at block %u",
799 : RelationGetRelationName(rel),
800 : BufferGetBlockNumber(buf)),
801 : errhint("Please REINDEX it.")));
802 :
803 : /*
804 : * Additionally check that the special area looks sane.
805 : */
806 2212462 : if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
807 0 : ereport(ERROR,
808 : (errcode(ERRCODE_INDEX_CORRUPTED),
809 : errmsg("index \"%s\" contains corrupted page at block %u",
810 : RelationGetRelationName(rel),
811 : BufferGetBlockNumber(buf)),
812 : errhint("Please REINDEX it.")));
813 2212462 : }
814 :
815 :
816 : /*
817 : * Allocate a new page (either by recycling, or by extending the index file)
818 : *
819 : * The returned buffer is already pinned and exclusive-locked
820 : *
821 : * Caller is responsible for initializing the page by calling GISTInitBuffer
822 : */
823 : Buffer
824 27450 : gistNewBuffer(Relation r, Relation heaprel)
825 : {
826 : Buffer buffer;
827 :
828 : /* First, try to get a page from FSM */
829 : for (;;)
830 0 : {
831 27450 : BlockNumber blkno = GetFreeIndexPage(r);
832 :
833 27450 : if (blkno == InvalidBlockNumber)
834 27450 : break; /* nothing left in FSM */
835 :
836 0 : buffer = ReadBuffer(r, blkno);
837 :
838 : /*
839 : * We have to guard against the possibility that someone else already
840 : * recycled this page; the buffer may be locked if so.
841 : */
842 0 : if (ConditionalLockBuffer(buffer))
843 : {
844 0 : Page page = BufferGetPage(buffer);
845 :
846 : /*
847 : * If the page was never initialized, it's OK to use.
848 : */
849 0 : if (PageIsNew(page))
850 0 : return buffer;
851 :
852 0 : gistcheckpage(r, buffer);
853 :
854 : /*
855 : * Otherwise, recycle it if deleted, and too old to have any
856 : * processes interested in it.
857 : */
858 0 : if (gistPageRecyclable(page))
859 : {
860 : /*
861 : * If we are generating WAL for Hot Standby then create a WAL
862 : * record that will allow us to conflict with queries running
863 : * on standby, in case they have snapshots older than the
864 : * page's deleteXid.
865 : */
866 0 : if (XLogStandbyInfoActive() && RelationNeedsWAL(r))
867 0 : gistXLogPageReuse(r, heaprel, blkno, GistPageGetDeleteXid(page));
868 :
869 0 : return buffer;
870 : }
871 :
872 0 : LockBuffer(buffer, GIST_UNLOCK);
873 : }
874 :
875 : /* Can't use it, so release buffer and try again */
876 0 : ReleaseBuffer(buffer);
877 : }
878 :
879 : /* Must extend the file */
880 27450 : buffer = ExtendBufferedRel(BMR_REL(r), MAIN_FORKNUM, NULL,
881 : EB_LOCK_FIRST);
882 :
883 27450 : return buffer;
884 : }
885 :
886 : /* Can this page be recycled yet? */
887 : bool
888 2678 : gistPageRecyclable(Page page)
889 : {
890 2678 : if (PageIsNew(page))
891 0 : return true;
892 2678 : if (GistPageIsDeleted(page))
893 : {
894 : /*
895 : * The page was deleted, but when? If it was just deleted, a scan
896 : * might have seen the downlink to it, and will read the page later.
897 : * As long as that can happen, we must keep the deleted page around as
898 : * a tombstone.
899 : *
900 : * For that check if the deletion XID could still be visible to
901 : * anyone. If not, then no scan that's still in progress could have
902 : * seen its downlink, and we can recycle it.
903 : */
904 0 : FullTransactionId deletexid_full = GistPageGetDeleteXid(page);
905 :
906 0 : return GlobalVisCheckRemovableFullXid(NULL, deletexid_full);
907 : }
908 2678 : return false;
909 : }
910 :
911 : bytea *
912 192 : gistoptions(Datum reloptions, bool validate)
913 : {
914 : static const relopt_parse_elt tab[] = {
915 : {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
916 : {"buffering", RELOPT_TYPE_ENUM, offsetof(GiSTOptions, buffering_mode)}
917 : };
918 :
919 192 : return (bytea *) build_reloptions(reloptions, validate,
920 : RELOPT_KIND_GIST,
921 : sizeof(GiSTOptions),
922 : tab, lengthof(tab));
923 : }
924 :
925 : /*
926 : * gistproperty() -- Check boolean properties of indexes.
927 : *
928 : * This is optional for most AMs, but is required for GiST because the core
929 : * property code doesn't support AMPROP_DISTANCE_ORDERABLE. We also handle
930 : * AMPROP_RETURNABLE here to save opening the rel to call gistcanreturn.
931 : */
932 : bool
933 468 : gistproperty(Oid index_oid, int attno,
934 : IndexAMProperty prop, const char *propname,
935 : bool *res, bool *isnull)
936 : {
937 : Oid opclass,
938 : opfamily,
939 : opcintype;
940 : int16 procno;
941 :
942 : /* Only answer column-level inquiries */
943 468 : if (attno == 0)
944 294 : return false;
945 :
946 : /*
947 : * Currently, GiST distance-ordered scans require that there be a distance
948 : * function in the opclass with the default types (i.e. the one loaded
949 : * into the relcache entry, see initGISTstate). So we assume that if such
950 : * a function exists, then there's a reason for it (rather than grubbing
951 : * through all the opfamily's operators to find an ordered one).
952 : *
953 : * Essentially the same code can test whether we support returning the
954 : * column data, since that's true if the opclass provides a fetch proc.
955 : */
956 :
957 174 : switch (prop)
958 : {
959 12 : case AMPROP_DISTANCE_ORDERABLE:
960 12 : procno = GIST_DISTANCE_PROC;
961 12 : break;
962 12 : case AMPROP_RETURNABLE:
963 12 : procno = GIST_FETCH_PROC;
964 12 : break;
965 150 : default:
966 150 : return false;
967 : }
968 :
969 : /* First we need to know the column's opclass. */
970 24 : opclass = get_index_column_opclass(index_oid, attno);
971 24 : if (!OidIsValid(opclass))
972 : {
973 0 : *isnull = true;
974 0 : return true;
975 : }
976 :
977 : /* Now look up the opclass family and input datatype. */
978 24 : if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
979 : {
980 0 : *isnull = true;
981 0 : return true;
982 : }
983 :
984 : /* And now we can check whether the function is provided. */
985 :
986 24 : *res = SearchSysCacheExists4(AMPROCNUM,
987 : ObjectIdGetDatum(opfamily),
988 : ObjectIdGetDatum(opcintype),
989 : ObjectIdGetDatum(opcintype),
990 : Int16GetDatum(procno));
991 :
992 : /*
993 : * Special case: even without a fetch function, AMPROP_RETURNABLE is true
994 : * if the opclass has no compress function.
995 : */
996 24 : if (prop == AMPROP_RETURNABLE && !*res)
997 : {
998 12 : *res = !SearchSysCacheExists4(AMPROCNUM,
999 : ObjectIdGetDatum(opfamily),
1000 : ObjectIdGetDatum(opcintype),
1001 : ObjectIdGetDatum(opcintype),
1002 : Int16GetDatum(GIST_COMPRESS_PROC));
1003 : }
1004 :
1005 24 : *isnull = false;
1006 :
1007 24 : return true;
1008 : }
1009 :
1010 : /*
1011 : * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
1012 : * splits anyway. This function provides a fake sequence of LSNs for that
1013 : * purpose.
1014 : */
1015 : XLogRecPtr
1016 84 : gistGetFakeLSN(Relation rel)
1017 : {
1018 84 : if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
1019 : {
1020 : /*
1021 : * Temporary relations are only accessible in our session, so a simple
1022 : * backend-local counter will do.
1023 : */
1024 : static XLogRecPtr counter = FirstNormalUnloggedLSN;
1025 :
1026 18 : return counter++;
1027 : }
1028 66 : else if (RelationIsPermanent(rel))
1029 : {
1030 : /*
1031 : * WAL-logging on this relation will start after commit, so its LSNs
1032 : * must be distinct numbers smaller than the LSN at the next commit.
1033 : * Emit a dummy WAL record if insert-LSN hasn't advanced after the
1034 : * last call.
1035 : */
1036 : static XLogRecPtr lastlsn = InvalidXLogRecPtr;
1037 0 : XLogRecPtr currlsn = GetXLogInsertRecPtr();
1038 :
1039 : /* Shouldn't be called for WAL-logging relations */
1040 : Assert(!RelationNeedsWAL(rel));
1041 :
1042 : /* No need for an actual record if we already have a distinct LSN */
1043 0 : if (!XLogRecPtrIsInvalid(lastlsn) && lastlsn == currlsn)
1044 0 : currlsn = gistXLogAssignLSN();
1045 :
1046 0 : lastlsn = currlsn;
1047 0 : return currlsn;
1048 : }
1049 : else
1050 : {
1051 : /*
1052 : * Unlogged relations are accessible from other backends, and survive
1053 : * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
1054 : */
1055 : Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
1056 66 : return GetFakeLSNForUnloggedRel();
1057 : }
1058 : }
1059 :
1060 : /*
1061 : * This is a stratnum support function for GiST opclasses that use the
1062 : * RT*StrategyNumber constants.
1063 : */
1064 : Datum
1065 2612 : gist_stratnum_common(PG_FUNCTION_ARGS)
1066 : {
1067 2612 : CompareType cmptype = PG_GETARG_INT32(0);
1068 :
1069 2612 : switch (cmptype)
1070 : {
1071 914 : case COMPARE_EQ:
1072 914 : PG_RETURN_UINT16(RTEqualStrategyNumber);
1073 0 : case COMPARE_LT:
1074 0 : PG_RETURN_UINT16(RTLessStrategyNumber);
1075 0 : case COMPARE_LE:
1076 0 : PG_RETURN_UINT16(RTLessEqualStrategyNumber);
1077 0 : case COMPARE_GT:
1078 0 : PG_RETURN_UINT16(RTGreaterStrategyNumber);
1079 0 : case COMPARE_GE:
1080 0 : PG_RETURN_UINT16(RTGreaterEqualStrategyNumber);
1081 810 : case COMPARE_OVERLAP:
1082 810 : PG_RETURN_UINT16(RTOverlapStrategyNumber);
1083 888 : case COMPARE_CONTAINED_BY:
1084 888 : PG_RETURN_UINT16(RTContainedByStrategyNumber);
1085 0 : default:
1086 0 : PG_RETURN_UINT16(InvalidStrategy);
1087 : }
1088 : }
1089 :
1090 : /*
1091 : * Returns the opclass's private stratnum used for the given compare type.
1092 : *
1093 : * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
1094 : * and returns the result.
1095 : * Returns InvalidStrategy if the function is not defined.
1096 : */
1097 : StrategyNumber
1098 2606 : GistTranslateStratnum(Oid opclass, CompareType cmptype)
1099 : {
1100 : Oid opfamily;
1101 : Oid opcintype;
1102 : Oid funcid;
1103 : Datum result;
1104 :
1105 : /* Look up the opclass family and input datatype. */
1106 2606 : if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
1107 0 : return InvalidStrategy;
1108 :
1109 : /* Check whether the function is provided. */
1110 2606 : funcid = get_opfamily_proc(opfamily, opcintype, opcintype, GIST_STRATNUM_PROC);
1111 2606 : if (!OidIsValid(funcid))
1112 0 : return InvalidStrategy;
1113 :
1114 : /* Ask the translation function */
1115 2606 : result = OidFunctionCall1Coll(funcid, InvalidOid, Int32GetDatum(cmptype));
1116 2606 : return DatumGetUInt16(result);
1117 : }
|