Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistutil.c
4 : * utilities routines for the postgres GiST index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gistutil.c
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include <math.h>
17 :
18 : #include "access/gist_private.h"
19 : #include "access/htup_details.h"
20 : #include "access/reloptions.h"
21 : #include "common/pg_prng.h"
22 : #include "storage/indexfsm.h"
23 : #include "utils/float.h"
24 : #include "utils/fmgrprotos.h"
25 : #include "utils/lsyscache.h"
26 : #include "utils/rel.h"
27 : #include "utils/snapmgr.h"
28 : #include "utils/syscache.h"
29 :
30 : /*
31 : * Write itup vector to page, has no control of free space.
32 : */
33 : void
34 1163920 : gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
35 : {
36 : int i;
37 :
38 1163920 : if (off == InvalidOffsetNumber)
39 2320868 : off = (PageIsEmpty(page)) ? FirstOffsetNumber :
40 1158750 : OffsetNumberNext(PageGetMaxOffsetNumber(page));
41 :
42 2500486 : for (i = 0; i < len; i++)
43 : {
44 1336566 : Size sz = IndexTupleSize(itup[i]);
45 : OffsetNumber l;
46 :
47 1336566 : l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
48 1336566 : if (l == InvalidOffsetNumber)
49 0 : elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
50 : i, len, (int) sz);
51 1336566 : off++;
52 : }
53 1163920 : }
54 :
55 : /*
56 : * Check space for itup vector on page
57 : */
58 : bool
59 1724078 : gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
60 : {
61 1724078 : unsigned int size = freespace,
62 1724078 : deleted = 0;
63 : int i;
64 :
65 3473052 : for (i = 0; i < len; i++)
66 1748974 : size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
67 :
68 1724078 : if (todelete != InvalidOffsetNumber)
69 : {
70 759228 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
71 :
72 759228 : deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
73 : }
74 :
75 1724078 : return (PageGetFreeSpace(page) + deleted < size);
76 : }
77 :
78 : bool
79 53016 : gistfitpage(IndexTuple *itvec, int len)
80 : {
81 : int i;
82 53016 : Size size = 0;
83 :
84 2324606 : for (i = 0; i < len; i++)
85 2271590 : size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
86 :
87 : /* TODO: Consider fillfactor */
88 53016 : return (size <= GiSTPageSize);
89 : }
90 :
91 : /*
92 : * Read buffer into itup vector
93 : */
94 : IndexTuple *
95 26388 : gistextractpage(Page page, int *len /* out */ )
96 : {
97 : OffsetNumber i,
98 : maxoff;
99 : IndexTuple *itvec;
100 :
101 26388 : maxoff = PageGetMaxOffsetNumber(page);
102 26388 : *len = maxoff;
103 26388 : itvec = palloc(sizeof(IndexTuple) * maxoff);
104 2020084 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
105 1993696 : itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
106 :
107 26388 : return itvec;
108 : }
109 :
110 : /*
111 : * join two vectors into one
112 : */
113 : IndexTuple *
114 26104 : gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
115 : {
116 26104 : itvec = (IndexTuple *) repalloc(itvec, sizeof(IndexTuple) * ((*len) + addlen));
117 26104 : memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
118 26104 : *len += addlen;
119 26104 : return itvec;
120 : }
121 :
122 : /*
123 : * make plain IndexTuple vector
124 : */
125 :
126 : IndexTupleData *
127 52504 : gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
128 : {
129 : char *ptr,
130 : *ret;
131 : int i;
132 :
133 52504 : *memlen = 0;
134 :
135 2072206 : for (i = 0; i < veclen; i++)
136 2019702 : *memlen += IndexTupleSize(vec[i]);
137 :
138 52504 : ptr = ret = palloc(*memlen);
139 :
140 2072206 : for (i = 0; i < veclen; i++)
141 : {
142 2019702 : memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
143 2019702 : ptr += IndexTupleSize(vec[i]);
144 : }
145 :
146 52504 : return (IndexTupleData *) ret;
147 : }
148 :
149 : /*
150 : * Make unions of keys in IndexTuple vector (one union datum per index column).
151 : * Union Datums are returned into the attr/isnull arrays.
152 : * Resulting Datums aren't compressed.
153 : */
154 : void
155 5522 : gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
156 : Datum *attr, bool *isnull)
157 : {
158 : int i;
159 : GistEntryVector *evec;
160 : int attrsize;
161 :
162 5522 : evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
163 :
164 16180 : for (i = 0; i < giststate->nonLeafTupdesc->natts; i++)
165 : {
166 : int j;
167 :
168 : /* Collect non-null datums for this column */
169 10658 : evec->n = 0;
170 538056 : for (j = 0; j < len; j++)
171 : {
172 : Datum datum;
173 : bool IsNull;
174 :
175 527398 : datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc,
176 : &IsNull);
177 527398 : if (IsNull)
178 1726 : continue;
179 :
180 525672 : gistdentryinit(giststate, i,
181 525672 : evec->vector + evec->n,
182 : datum,
183 : NULL, NULL, (OffsetNumber) 0,
184 : false, IsNull);
185 525672 : evec->n++;
186 : }
187 :
188 : /* If this column was all NULLs, the union is NULL */
189 10658 : if (evec->n == 0)
190 : {
191 194 : attr[i] = (Datum) 0;
192 194 : isnull[i] = true;
193 : }
194 : else
195 : {
196 10464 : if (evec->n == 1)
197 : {
198 : /* unionFn may expect at least two inputs */
199 8 : evec->n = 2;
200 8 : evec->vector[1] = evec->vector[0];
201 : }
202 :
203 : /* Make union and store in attr array */
204 10464 : attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
205 : giststate->supportCollation[i],
206 : PointerGetDatum(evec),
207 : PointerGetDatum(&attrsize));
208 :
209 10464 : isnull[i] = false;
210 : }
211 : }
212 5522 : }
213 :
214 : /*
215 : * Return an IndexTuple containing the result of applying the "union"
216 : * method to the specified IndexTuple vector.
217 : */
218 : IndexTuple
219 6 : gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
220 : {
221 : Datum attr[INDEX_MAX_KEYS];
222 : bool isnull[INDEX_MAX_KEYS];
223 :
224 6 : gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
225 :
226 6 : return gistFormTuple(giststate, r, attr, isnull, false);
227 : }
228 :
229 : /*
230 : * makes union of two key
231 : */
232 : void
233 1460280 : gistMakeUnionKey(GISTSTATE *giststate, int attno,
234 : GISTENTRY *entry1, bool isnull1,
235 : GISTENTRY *entry2, bool isnull2,
236 : Datum *dst, bool *dstisnull)
237 : {
238 : /* we need a GistEntryVector with room for exactly 2 elements */
239 : union
240 : {
241 : GistEntryVector gev;
242 : char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
243 : } storage;
244 1460280 : GistEntryVector *evec = &storage.gev;
245 : int dstsize;
246 :
247 1460280 : evec->n = 2;
248 :
249 1460280 : if (isnull1 && isnull2)
250 : {
251 18268 : *dstisnull = true;
252 18268 : *dst = (Datum) 0;
253 : }
254 : else
255 : {
256 1442012 : if (isnull1 == false && isnull2 == false)
257 : {
258 1441232 : evec->vector[0] = *entry1;
259 1441232 : evec->vector[1] = *entry2;
260 : }
261 780 : else if (isnull1 == false)
262 : {
263 780 : evec->vector[0] = *entry1;
264 780 : evec->vector[1] = *entry1;
265 : }
266 : else
267 : {
268 0 : evec->vector[0] = *entry2;
269 0 : evec->vector[1] = *entry2;
270 : }
271 :
272 1442012 : *dstisnull = false;
273 1442012 : *dst = FunctionCall2Coll(&giststate->unionFn[attno],
274 : giststate->supportCollation[attno],
275 : PointerGetDatum(evec),
276 : PointerGetDatum(&dstsize));
277 : }
278 1460280 : }
279 :
280 : bool
281 1260804 : gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
282 : {
283 1260804 : bool result = false; /* silence compiler warning */
284 :
285 1260804 : FunctionCall3Coll(&giststate->equalFn[attno],
286 : giststate->supportCollation[attno],
287 : a, b,
288 : PointerGetDatum(&result));
289 1260804 : return result;
290 : }
291 :
292 : /*
293 : * Decompress all keys in tuple
294 : */
295 : void
296 3847998 : gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
297 : OffsetNumber o, GISTENTRY *attdata, bool *isnull)
298 : {
299 : int i;
300 :
301 8258586 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
302 : {
303 : Datum datum;
304 :
305 4410588 : datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
306 4410588 : gistdentryinit(giststate, i, &attdata[i],
307 : datum, r, p, o,
308 4410588 : false, isnull[i]);
309 : }
310 3847998 : }
311 :
312 : /*
313 : * Forms union of oldtup and addtup, if union == oldtup then return NULL
314 : */
315 : IndexTuple
316 1272746 : gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
317 : {
318 1272746 : bool neednew = false;
319 : GISTENTRY oldentries[INDEX_MAX_KEYS],
320 : addentries[INDEX_MAX_KEYS];
321 : bool oldisnull[INDEX_MAX_KEYS],
322 : addisnull[INDEX_MAX_KEYS];
323 : Datum attr[INDEX_MAX_KEYS];
324 : bool isnull[INDEX_MAX_KEYS];
325 1272746 : IndexTuple newtup = NULL;
326 : int i;
327 :
328 1272746 : gistDeCompressAtt(giststate, r, oldtup, NULL,
329 : (OffsetNumber) 0, oldentries, oldisnull);
330 :
331 1272746 : gistDeCompressAtt(giststate, r, addtup, NULL,
332 : (OffsetNumber) 0, addentries, addisnull);
333 :
334 2733022 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
335 : {
336 1460276 : gistMakeUnionKey(giststate, i,
337 1460276 : oldentries + i, oldisnull[i],
338 1460276 : addentries + i, addisnull[i],
339 1460276 : attr + i, isnull + i);
340 :
341 1460276 : if (neednew)
342 : /* we already need new key, so we can skip check */
343 182988 : continue;
344 :
345 1277288 : if (isnull[i])
346 : /* union of key may be NULL if and only if both keys are NULL */
347 18268 : continue;
348 :
349 1259020 : if (!addisnull[i])
350 : {
351 1258240 : if (oldisnull[i] ||
352 1258240 : !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
353 764068 : neednew = true;
354 : }
355 : }
356 :
357 1272746 : if (neednew)
358 : {
359 : /* need to update key */
360 764068 : newtup = gistFormTuple(giststate, r, attr, isnull, false);
361 764068 : newtup->t_tid = oldtup->t_tid;
362 : }
363 :
364 1272746 : return newtup;
365 : }
366 :
367 : /*
368 : * Search an upper index page for the entry with lowest penalty for insertion
369 : * of the new index key contained in "it".
370 : *
371 : * Returns the index of the page entry to insert into.
372 : */
373 : OffsetNumber
374 1243004 : gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
375 : GISTSTATE *giststate)
376 : {
377 : OffsetNumber result;
378 : OffsetNumber maxoff;
379 : OffsetNumber i;
380 : float best_penalty[INDEX_MAX_KEYS];
381 : GISTENTRY entry,
382 : identry[INDEX_MAX_KEYS];
383 : bool isnull[INDEX_MAX_KEYS];
384 : int keep_current_best;
385 :
386 : Assert(!GistPageIsLeaf(p));
387 :
388 1243004 : gistDeCompressAtt(giststate, r,
389 : it, NULL, (OffsetNumber) 0,
390 : identry, isnull);
391 :
392 : /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
393 1243004 : result = FirstOffsetNumber;
394 :
395 : /*
396 : * The index may have multiple columns, and there's a penalty value for
397 : * each column. The penalty associated with a column that appears earlier
398 : * in the index definition is strictly more important than the penalty of
399 : * a column that appears later in the index definition.
400 : *
401 : * best_penalty[j] is the best penalty we have seen so far for column j,
402 : * or -1 when we haven't yet examined column j. Array entries to the
403 : * right of the first -1 are undefined.
404 : */
405 1243004 : best_penalty[0] = -1;
406 :
407 : /*
408 : * If we find a tuple that's exactly as good as the currently best one, we
409 : * could use either one. When inserting a lot of tuples with the same or
410 : * similar keys, it's preferable to descend down the same path when
411 : * possible, as that's more cache-friendly. On the other hand, if all
412 : * inserts land on the same leaf page after a split, we're never going to
413 : * insert anything to the other half of the split, and will end up using
414 : * only 50% of the available space. Distributing the inserts evenly would
415 : * lead to better space usage, but that hurts cache-locality during
416 : * insertion. To get the best of both worlds, when we find a tuple that's
417 : * exactly as good as the previous best, choose randomly whether to stick
418 : * to the old best, or use the new one. Once we decide to stick to the
419 : * old best, we keep sticking to it for any subsequent equally good tuples
420 : * we might find. This favors tuples with low offsets, but still allows
421 : * some inserts to go to other equally-good subtrees.
422 : *
423 : * keep_current_best is -1 if we haven't yet had to make a random choice
424 : * whether to keep the current best tuple. If we have done so, and
425 : * decided to keep it, keep_current_best is 1; if we've decided to
426 : * replace, keep_current_best is 0. (This state will be reset to -1 as
427 : * soon as we've made the replacement, but sometimes we make the choice in
428 : * advance of actually finding a replacement best tuple.)
429 : */
430 1243004 : keep_current_best = -1;
431 :
432 : /*
433 : * Loop over tuples on page.
434 : */
435 1243004 : maxoff = PageGetMaxOffsetNumber(p);
436 : Assert(maxoff >= FirstOffsetNumber);
437 :
438 39139828 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
439 : {
440 38158850 : IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
441 : bool zero_penalty;
442 : int j;
443 :
444 38158850 : zero_penalty = true;
445 :
446 : /* Loop over index attributes. */
447 77301330 : for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
448 : {
449 : Datum datum;
450 : float usize;
451 : bool IsNull;
452 :
453 : /* Compute penalty for this column. */
454 45528000 : datum = index_getattr(itup, j + 1, giststate->leafTupdesc,
455 : &IsNull);
456 45528000 : gistdentryinit(giststate, j, &entry, datum, r, p, i,
457 : false, IsNull);
458 45528000 : usize = gistpenalty(giststate, j, &entry, IsNull,
459 45528000 : &identry[j], isnull[j]);
460 45528000 : if (usize > 0)
461 44996052 : zero_penalty = false;
462 :
463 45528000 : if (best_penalty[j] < 0 || usize < best_penalty[j])
464 : {
465 : /*
466 : * New best penalty for column. Tentatively select this tuple
467 : * as the target, and record the best penalty. Then reset the
468 : * next column's penalty to "unknown" (and indirectly, the
469 : * same for all the ones to its right). This will force us to
470 : * adopt this tuple's penalty values as the best for all the
471 : * remaining columns during subsequent loop iterations.
472 : */
473 36510854 : result = i;
474 36510854 : best_penalty[j] = usize;
475 :
476 36510854 : if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1)
477 7365912 : best_penalty[j + 1] = -1;
478 :
479 : /* we have new best, so reset keep-it decision */
480 36510854 : keep_current_best = -1;
481 : }
482 9017146 : else if (best_penalty[j] == usize)
483 : {
484 : /*
485 : * The current tuple is exactly as good for this column as the
486 : * best tuple seen so far. The next iteration of this loop
487 : * will compare the next column.
488 : */
489 : }
490 : else
491 : {
492 : /*
493 : * The current tuple is worse for this column than the best
494 : * tuple seen so far. Skip the remaining columns and move on
495 : * to the next tuple, if any.
496 : */
497 6385520 : zero_penalty = false; /* so outer loop won't exit */
498 6385520 : break;
499 : }
500 : }
501 :
502 : /*
503 : * If we looped past the last column, and did not update "result",
504 : * then this tuple is exactly as good as the prior best tuple.
505 : */
506 38158850 : if (j == IndexRelationGetNumberOfKeyAttributes(r) && result != i)
507 : {
508 2628388 : if (keep_current_best == -1)
509 : {
510 : /* we didn't make the random choice yet for this old best */
511 292190 : keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
512 : }
513 2628388 : if (keep_current_best == 0)
514 : {
515 : /* we choose to use the new tuple */
516 251350 : result = i;
517 : /* choose again if there are even more exactly-as-good ones */
518 251350 : keep_current_best = -1;
519 : }
520 : }
521 :
522 : /*
523 : * If we find a tuple with zero penalty for all columns, and we've
524 : * decided we don't want to search for another tuple with equal
525 : * penalty, there's no need to examine remaining tuples; just break
526 : * out of the loop and return it.
527 : */
528 38158850 : if (zero_penalty)
529 : {
530 524184 : if (keep_current_best == -1)
531 : {
532 : /* we didn't make the random choice yet for this old best */
533 524184 : keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
534 : }
535 524184 : if (keep_current_best == 1)
536 262026 : break;
537 : }
538 : }
539 :
540 1243004 : return result;
541 : }
542 :
543 : /*
544 : * initialize a GiST entry with a decompressed version of key
545 : */
546 : void
547 54776654 : gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
548 : Datum k, Relation r, Page pg, OffsetNumber o,
549 : bool l, bool isNull)
550 : {
551 54776654 : if (!isNull)
552 : {
553 : GISTENTRY *dep;
554 :
555 54510024 : gistentryinit(*e, k, r, pg, o, l);
556 :
557 : /* there may not be a decompress function in opclass */
558 54510024 : if (!OidIsValid(giststate->decompressFn[nkey].fn_oid))
559 48271182 : return;
560 :
561 : dep = (GISTENTRY *)
562 6238842 : DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey],
563 : giststate->supportCollation[nkey],
564 : PointerGetDatum(e)));
565 : /* decompressFn may just return the given pointer */
566 6238842 : if (dep != e)
567 744228 : gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
568 : dep->leafkey);
569 : }
570 : else
571 266630 : gistentryinit(*e, (Datum) 0, r, pg, o, l);
572 : }
573 :
574 : IndexTuple
575 1780958 : gistFormTuple(GISTSTATE *giststate, Relation r,
576 : const Datum *attdata, const bool *isnull, bool isleaf)
577 : {
578 : Datum compatt[INDEX_MAX_KEYS];
579 : IndexTuple res;
580 :
581 1780958 : gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
582 :
583 1780956 : res = index_form_tuple(isleaf ? giststate->leafTupdesc :
584 : giststate->nonLeafTupdesc,
585 : compatt, isnull);
586 :
587 : /*
588 : * The offset number on tuples on internal pages is unused. For historical
589 : * reasons, it is set to 0xffff.
590 : */
591 1780956 : ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
592 1780956 : return res;
593 : }
594 :
595 : void
596 1977102 : gistCompressValues(GISTSTATE *giststate, Relation r,
597 : const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)
598 : {
599 : int i;
600 :
601 : /*
602 : * Call the compress method on each attribute.
603 : */
604 4270036 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
605 : {
606 2292936 : if (isnull[i])
607 15580 : compatt[i] = (Datum) 0;
608 : else
609 : {
610 : GISTENTRY centry;
611 : GISTENTRY *cep;
612 :
613 2277356 : gistentryinit(centry, attdata[i], r, NULL, (OffsetNumber) 0,
614 : isleaf);
615 : /* there may not be a compress function in opclass */
616 2277356 : if (OidIsValid(giststate->compressFn[i].fn_oid))
617 : cep = (GISTENTRY *)
618 1784802 : DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[i],
619 : giststate->supportCollation[i],
620 : PointerGetDatum(¢ry)));
621 : else
622 492554 : cep = ¢ry;
623 2277354 : compatt[i] = cep->key;
624 : }
625 : }
626 :
627 1977100 : if (isleaf)
628 : {
629 : /*
630 : * Emplace each included attribute if any.
631 : */
632 1454076 : for (; i < r->rd_att->natts; i++)
633 : {
634 293140 : if (isnull[i])
635 2000 : compatt[i] = (Datum) 0;
636 : else
637 291140 : compatt[i] = attdata[i];
638 : }
639 : }
640 1977100 : }
641 :
642 : /*
643 : * initialize a GiST entry with fetched value in key field
644 : */
645 : static Datum
646 105762 : gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
647 : {
648 : GISTENTRY fentry;
649 : GISTENTRY *fep;
650 :
651 105762 : gistentryinit(fentry, k, r, NULL, (OffsetNumber) 0, false);
652 :
653 : fep = (GISTENTRY *)
654 105762 : DatumGetPointer(FunctionCall1Coll(&giststate->fetchFn[nkey],
655 : giststate->supportCollation[nkey],
656 : PointerGetDatum(&fentry)));
657 :
658 : /* fetchFn set 'key', return it to the caller */
659 105762 : return fep->key;
660 : }
661 :
662 : /*
663 : * Fetch all keys in tuple.
664 : * Returns a new HeapTuple containing the originally-indexed data.
665 : */
666 : HeapTuple
667 530622 : gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple)
668 : {
669 530622 : MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt);
670 : Datum fetchatt[INDEX_MAX_KEYS];
671 : bool isnull[INDEX_MAX_KEYS];
672 : int i;
673 :
674 1121594 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
675 : {
676 : Datum datum;
677 :
678 590972 : datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
679 :
680 590972 : if (giststate->fetchFn[i].fn_oid != InvalidOid)
681 : {
682 105774 : if (!isnull[i])
683 105762 : fetchatt[i] = gistFetchAtt(giststate, i, datum, r);
684 : else
685 12 : fetchatt[i] = (Datum) 0;
686 : }
687 485198 : else if (giststate->compressFn[i].fn_oid == InvalidOid)
688 : {
689 : /*
690 : * If opclass does not provide compress method that could change
691 : * original value, att is necessarily stored in original form.
692 : */
693 424848 : if (!isnull[i])
694 421512 : fetchatt[i] = datum;
695 : else
696 3336 : fetchatt[i] = (Datum) 0;
697 : }
698 : else
699 : {
700 : /*
701 : * Index-only scans not supported for this column. Since the
702 : * planner chose an index-only scan anyway, it is not interested
703 : * in this column, and we can replace it with a NULL.
704 : */
705 60350 : isnull[i] = true;
706 60350 : fetchatt[i] = (Datum) 0;
707 : }
708 : }
709 :
710 : /*
711 : * Get each included attribute.
712 : */
713 530622 : for (; i < r->rd_att->natts; i++)
714 : {
715 0 : fetchatt[i] = index_getattr(tuple, i + 1, giststate->leafTupdesc,
716 : &isnull[i]);
717 : }
718 530622 : MemoryContextSwitchTo(oldcxt);
719 :
720 530622 : return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull);
721 : }
722 :
723 : float
724 45834266 : gistpenalty(GISTSTATE *giststate, int attno,
725 : GISTENTRY *orig, bool isNullOrig,
726 : GISTENTRY *add, bool isNullAdd)
727 : {
728 45834266 : float penalty = 0.0;
729 :
730 45834266 : if (giststate->penaltyFn[attno].fn_strict == false ||
731 45834266 : (isNullOrig == false && isNullAdd == false))
732 : {
733 45544420 : FunctionCall3Coll(&giststate->penaltyFn[attno],
734 : giststate->supportCollation[attno],
735 : PointerGetDatum(orig),
736 : PointerGetDatum(add),
737 : PointerGetDatum(&penalty));
738 : /* disallow negative or NaN penalty */
739 45544420 : if (isnan(penalty) || penalty < 0.0)
740 3556 : penalty = 0.0;
741 : }
742 289846 : else if (isNullOrig && isNullAdd)
743 18502 : penalty = 0.0;
744 : else
745 : {
746 : /* try to prevent mixing null and non-null values */
747 271344 : penalty = get_float4_infinity();
748 : }
749 :
750 45834266 : return penalty;
751 : }
752 :
753 : /*
754 : * Initialize a new index page
755 : */
756 : void
757 31650 : gistinitpage(Page page, uint32 f)
758 : {
759 : GISTPageOpaque opaque;
760 :
761 31650 : PageInit(page, BLCKSZ, sizeof(GISTPageOpaqueData));
762 :
763 31650 : opaque = GistPageGetOpaque(page);
764 31650 : opaque->rightlink = InvalidBlockNumber;
765 31650 : opaque->flags = f;
766 31650 : opaque->gist_page_id = GIST_PAGE_ID;
767 31650 : }
768 :
769 : /*
770 : * Initialize a new index buffer
771 : */
772 : void
773 29002 : GISTInitBuffer(Buffer b, uint32 f)
774 : {
775 : Page page;
776 :
777 29002 : page = BufferGetPage(b);
778 29002 : gistinitpage(page, f);
779 29002 : }
780 :
781 : /*
782 : * Verify that a freshly-read page looks sane.
783 : */
784 : void
785 2205598 : gistcheckpage(Relation rel, Buffer buf)
786 : {
787 2205598 : Page page = BufferGetPage(buf);
788 :
789 : /*
790 : * ReadBuffer verifies that every newly-read page passes
791 : * PageHeaderIsValid, which means it either contains a reasonably sane
792 : * page header or is all-zero. We have to defend against the all-zero
793 : * case, however.
794 : */
795 2205598 : if (PageIsNew(page))
796 0 : ereport(ERROR,
797 : (errcode(ERRCODE_INDEX_CORRUPTED),
798 : errmsg("index \"%s\" contains unexpected zero page at block %u",
799 : RelationGetRelationName(rel),
800 : BufferGetBlockNumber(buf)),
801 : errhint("Please REINDEX it.")));
802 :
803 : /*
804 : * Additionally check that the special area looks sane.
805 : */
806 2205598 : if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
807 0 : ereport(ERROR,
808 : (errcode(ERRCODE_INDEX_CORRUPTED),
809 : errmsg("index \"%s\" contains corrupted page at block %u",
810 : RelationGetRelationName(rel),
811 : BufferGetBlockNumber(buf)),
812 : errhint("Please REINDEX it.")));
813 2205598 : }
814 :
815 :
816 : /*
817 : * Allocate a new page (either by recycling, or by extending the index file)
818 : *
819 : * The returned buffer is already pinned and exclusive-locked
820 : *
821 : * Caller is responsible for initializing the page by calling GISTInitBuffer
822 : */
823 : Buffer
824 27190 : gistNewBuffer(Relation r, Relation heaprel)
825 : {
826 : Buffer buffer;
827 :
828 : /* First, try to get a page from FSM */
829 : for (;;)
830 0 : {
831 27190 : BlockNumber blkno = GetFreeIndexPage(r);
832 :
833 27190 : if (blkno == InvalidBlockNumber)
834 27190 : break; /* nothing left in FSM */
835 :
836 0 : buffer = ReadBuffer(r, blkno);
837 :
838 : /*
839 : * We have to guard against the possibility that someone else already
840 : * recycled this page; the buffer may be locked if so.
841 : */
842 0 : if (ConditionalLockBuffer(buffer))
843 : {
844 0 : Page page = BufferGetPage(buffer);
845 :
846 : /*
847 : * If the page was never initialized, it's OK to use.
848 : */
849 0 : if (PageIsNew(page))
850 0 : return buffer;
851 :
852 0 : gistcheckpage(r, buffer);
853 :
854 : /*
855 : * Otherwise, recycle it if deleted, and too old to have any
856 : * processes interested in it.
857 : */
858 0 : if (gistPageRecyclable(page))
859 : {
860 : /*
861 : * If we are generating WAL for Hot Standby then create a WAL
862 : * record that will allow us to conflict with queries running
863 : * on standby, in case they have snapshots older than the
864 : * page's deleteXid.
865 : */
866 0 : if (XLogStandbyInfoActive() && RelationNeedsWAL(r))
867 0 : gistXLogPageReuse(r, heaprel, blkno, GistPageGetDeleteXid(page));
868 :
869 0 : return buffer;
870 : }
871 :
872 0 : LockBuffer(buffer, GIST_UNLOCK);
873 : }
874 :
875 : /* Can't use it, so release buffer and try again */
876 0 : ReleaseBuffer(buffer);
877 : }
878 :
879 : /* Must extend the file */
880 27190 : buffer = ExtendBufferedRel(BMR_REL(r), MAIN_FORKNUM, NULL,
881 : EB_LOCK_FIRST);
882 :
883 27190 : return buffer;
884 : }
885 :
886 : /* Can this page be recycled yet? */
887 : bool
888 2010 : gistPageRecyclable(Page page)
889 : {
890 2010 : if (PageIsNew(page))
891 0 : return true;
892 2010 : if (GistPageIsDeleted(page))
893 : {
894 : /*
895 : * The page was deleted, but when? If it was just deleted, a scan
896 : * might have seen the downlink to it, and will read the page later.
897 : * As long as that can happen, we must keep the deleted page around as
898 : * a tombstone.
899 : *
900 : * For that check if the deletion XID could still be visible to
901 : * anyone. If not, then no scan that's still in progress could have
902 : * seen its downlink, and we can recycle it.
903 : */
904 0 : FullTransactionId deletexid_full = GistPageGetDeleteXid(page);
905 :
906 0 : return GlobalVisCheckRemovableFullXid(NULL, deletexid_full);
907 : }
908 2010 : return false;
909 : }
910 :
911 : bytea *
912 180 : gistoptions(Datum reloptions, bool validate)
913 : {
914 : static const relopt_parse_elt tab[] = {
915 : {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
916 : {"buffering", RELOPT_TYPE_ENUM, offsetof(GiSTOptions, buffering_mode)}
917 : };
918 :
919 180 : return (bytea *) build_reloptions(reloptions, validate,
920 : RELOPT_KIND_GIST,
921 : sizeof(GiSTOptions),
922 : tab, lengthof(tab));
923 : }
924 :
925 : /*
926 : * gistproperty() -- Check boolean properties of indexes.
927 : *
928 : * This is optional for most AMs, but is required for GiST because the core
929 : * property code doesn't support AMPROP_DISTANCE_ORDERABLE. We also handle
930 : * AMPROP_RETURNABLE here to save opening the rel to call gistcanreturn.
931 : */
932 : bool
933 468 : gistproperty(Oid index_oid, int attno,
934 : IndexAMProperty prop, const char *propname,
935 : bool *res, bool *isnull)
936 : {
937 : Oid opclass,
938 : opfamily,
939 : opcintype;
940 : int16 procno;
941 :
942 : /* Only answer column-level inquiries */
943 468 : if (attno == 0)
944 294 : return false;
945 :
946 : /*
947 : * Currently, GiST distance-ordered scans require that there be a distance
948 : * function in the opclass with the default types (i.e. the one loaded
949 : * into the relcache entry, see initGISTstate). So we assume that if such
950 : * a function exists, then there's a reason for it (rather than grubbing
951 : * through all the opfamily's operators to find an ordered one).
952 : *
953 : * Essentially the same code can test whether we support returning the
954 : * column data, since that's true if the opclass provides a fetch proc.
955 : */
956 :
957 174 : switch (prop)
958 : {
959 12 : case AMPROP_DISTANCE_ORDERABLE:
960 12 : procno = GIST_DISTANCE_PROC;
961 12 : break;
962 12 : case AMPROP_RETURNABLE:
963 12 : procno = GIST_FETCH_PROC;
964 12 : break;
965 150 : default:
966 150 : return false;
967 : }
968 :
969 : /* First we need to know the column's opclass. */
970 24 : opclass = get_index_column_opclass(index_oid, attno);
971 24 : if (!OidIsValid(opclass))
972 : {
973 0 : *isnull = true;
974 0 : return true;
975 : }
976 :
977 : /* Now look up the opclass family and input datatype. */
978 24 : if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
979 : {
980 0 : *isnull = true;
981 0 : return true;
982 : }
983 :
984 : /* And now we can check whether the function is provided. */
985 :
986 24 : *res = SearchSysCacheExists4(AMPROCNUM,
987 : ObjectIdGetDatum(opfamily),
988 : ObjectIdGetDatum(opcintype),
989 : ObjectIdGetDatum(opcintype),
990 : Int16GetDatum(procno));
991 :
992 : /*
993 : * Special case: even without a fetch function, AMPROP_RETURNABLE is true
994 : * if the opclass has no compress function.
995 : */
996 24 : if (prop == AMPROP_RETURNABLE && !*res)
997 : {
998 12 : *res = !SearchSysCacheExists4(AMPROCNUM,
999 : ObjectIdGetDatum(opfamily),
1000 : ObjectIdGetDatum(opcintype),
1001 : ObjectIdGetDatum(opcintype),
1002 : Int16GetDatum(GIST_COMPRESS_PROC));
1003 : }
1004 :
1005 24 : *isnull = false;
1006 :
1007 24 : return true;
1008 : }
1009 :
1010 : /*
1011 : * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
1012 : * splits anyway. This function provides a fake sequence of LSNs for that
1013 : * purpose.
1014 : */
1015 : XLogRecPtr
1016 82 : gistGetFakeLSN(Relation rel)
1017 : {
1018 82 : if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
1019 : {
1020 : /*
1021 : * Temporary relations are only accessible in our session, so a simple
1022 : * backend-local counter will do.
1023 : */
1024 : static XLogRecPtr counter = FirstNormalUnloggedLSN;
1025 :
1026 16 : return counter++;
1027 : }
1028 66 : else if (RelationIsPermanent(rel))
1029 : {
1030 : /*
1031 : * WAL-logging on this relation will start after commit, so its LSNs
1032 : * must be distinct numbers smaller than the LSN at the next commit.
1033 : * Emit a dummy WAL record if insert-LSN hasn't advanced after the
1034 : * last call.
1035 : */
1036 : static XLogRecPtr lastlsn = InvalidXLogRecPtr;
1037 0 : XLogRecPtr currlsn = GetXLogInsertRecPtr();
1038 :
1039 : /* Shouldn't be called for WAL-logging relations */
1040 : Assert(!RelationNeedsWAL(rel));
1041 :
1042 : /* No need for an actual record if we already have a distinct LSN */
1043 0 : if (!XLogRecPtrIsInvalid(lastlsn) && lastlsn == currlsn)
1044 0 : currlsn = gistXLogAssignLSN();
1045 :
1046 0 : lastlsn = currlsn;
1047 0 : return currlsn;
1048 : }
1049 : else
1050 : {
1051 : /*
1052 : * Unlogged relations are accessible from other backends, and survive
1053 : * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
1054 : */
1055 : Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
1056 66 : return GetFakeLSNForUnloggedRel();
1057 : }
1058 : }
1059 :
1060 : /*
1061 : * Returns the same number that was received.
1062 : *
1063 : * This is for GiST opclasses that use the RT*StrategyNumber constants.
1064 : */
1065 : Datum
1066 2500 : gist_stratnum_identity(PG_FUNCTION_ARGS)
1067 : {
1068 2500 : StrategyNumber strat = PG_GETARG_UINT16(0);
1069 :
1070 2500 : PG_RETURN_UINT16(strat);
1071 : }
1072 :
1073 : /*
1074 : * Returns the opclass's private stratnum used for the given strategy.
1075 : *
1076 : * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
1077 : * and returns the result.
1078 : * Returns InvalidStrategy if the function is not defined.
1079 : */
1080 : StrategyNumber
1081 2494 : GistTranslateStratnum(Oid opclass, StrategyNumber strat)
1082 : {
1083 : Oid opfamily;
1084 : Oid opcintype;
1085 : Oid funcid;
1086 : Datum result;
1087 :
1088 : /* Look up the opclass family and input datatype. */
1089 2494 : if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
1090 0 : return InvalidStrategy;
1091 :
1092 : /* Check whether the function is provided. */
1093 2494 : funcid = get_opfamily_proc(opfamily, opcintype, opcintype, GIST_STRATNUM_PROC);
1094 2494 : if (!OidIsValid(funcid))
1095 0 : return InvalidStrategy;
1096 :
1097 : /* Ask the translation function */
1098 2494 : result = OidFunctionCall1Coll(funcid, InvalidOid, UInt16GetDatum(strat));
1099 2494 : return DatumGetUInt16(result);
1100 : }
|