Line data Source code
1 : /*
2 : * contrib/pageinspect/btreefuncs.c
3 : *
4 : *
5 : * btreefuncs.c
6 : *
7 : * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
8 : *
9 : * Permission to use, copy, modify, and distribute this software and
10 : * its documentation for any purpose, without fee, and without a
11 : * written agreement is hereby granted, provided that the above
12 : * copyright notice and this paragraph and the following two
13 : * paragraphs appear in all copies.
14 : *
15 : * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16 : * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17 : * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18 : * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19 : * OF THE POSSIBILITY OF SUCH DAMAGE.
20 : *
21 : * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22 : * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 : * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24 : * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25 : * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
26 : */
27 :
28 : #include "postgres.h"
29 :
30 : #include "access/htup_details.h"
31 : #include "access/nbtree.h"
32 : #include "access/relation.h"
33 : #include "catalog/namespace.h"
34 : #include "catalog/pg_am.h"
35 : #include "catalog/pg_type.h"
36 : #include "funcapi.h"
37 : #include "miscadmin.h"
38 : #include "pageinspect.h"
39 : #include "utils/array.h"
40 : #include "utils/builtins.h"
41 : #include "utils/rel.h"
42 : #include "utils/varlena.h"
43 :
44 50 : PG_FUNCTION_INFO_V1(bt_metap);
45 14 : PG_FUNCTION_INFO_V1(bt_page_items_1_9);
46 26 : PG_FUNCTION_INFO_V1(bt_page_items);
47 26 : PG_FUNCTION_INFO_V1(bt_page_items_bytea);
48 14 : PG_FUNCTION_INFO_V1(bt_page_stats_1_9);
49 14 : PG_FUNCTION_INFO_V1(bt_page_stats);
50 14 : PG_FUNCTION_INFO_V1(bt_multi_page_stats);
51 :
52 : #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
53 : #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
54 :
55 : /* ------------------------------------------------
56 : * structure for single btree page statistics
57 : * ------------------------------------------------
58 : */
59 : typedef struct BTPageStat
60 : {
61 : uint32 blkno;
62 : uint32 live_items;
63 : uint32 dead_items;
64 : uint32 page_size;
65 : uint32 max_avail;
66 : uint32 free_size;
67 : uint32 avg_item_size;
68 : char type;
69 :
70 : /* opaque data */
71 : BlockNumber btpo_prev;
72 : BlockNumber btpo_next;
73 : uint32 btpo_level;
74 : uint16 btpo_flags;
75 : BTCycleId btpo_cycleid;
76 : } BTPageStat;
77 :
78 : /*
79 : * cross-call data structure for SRF for page stats
80 : */
81 : typedef struct ua_page_stats
82 : {
83 : Oid relid;
84 : int64 blkno;
85 : int64 blk_count;
86 : bool allpages;
87 : } ua_page_stats;
88 :
89 : /*
90 : * cross-call data structure for SRF for page items
91 : */
92 : typedef struct ua_page_items
93 : {
94 : Page page;
95 : OffsetNumber offset;
96 : bool leafpage;
97 : bool rightmost;
98 : TupleDesc tupd;
99 : } ua_page_items;
100 :
101 :
102 : /* -------------------------------------------------
103 : * GetBTPageStatistics()
104 : *
105 : * Collect statistics of single b-tree page
106 : * -------------------------------------------------
107 : */
108 : static void
109 20 : GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
110 : {
111 20 : Page page = BufferGetPage(buffer);
112 20 : PageHeader phdr = (PageHeader) page;
113 20 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
114 20 : BTPageOpaque opaque = BTPageGetOpaque(page);
115 20 : int item_size = 0;
116 : int off;
117 :
118 20 : stat->blkno = blkno;
119 :
120 20 : stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
121 :
122 20 : stat->dead_items = stat->live_items = 0;
123 :
124 20 : stat->page_size = PageGetPageSize(page);
125 :
126 : /* page type (flags) */
127 20 : if (P_ISDELETED(opaque))
128 : {
129 : /* We divide deleted pages into leaf ('d') or internal ('D') */
130 0 : if (P_ISLEAF(opaque) || !P_HAS_FULLXID(opaque))
131 0 : stat->type = 'd';
132 : else
133 0 : stat->type = 'D';
134 :
135 : /*
136 : * Report safexid in a deleted page.
137 : *
138 : * Handle pg_upgrade'd deleted pages that used the previous safexid
139 : * representation in btpo_level field (this used to be a union type
140 : * called "bpto").
141 : */
142 0 : if (P_HAS_FULLXID(opaque))
143 : {
144 0 : FullTransactionId safexid = BTPageGetDeleteXid(page);
145 :
146 0 : elog(DEBUG2, "deleted page from block %u has safexid %u:%u",
147 : blkno, EpochFromFullTransactionId(safexid),
148 : XidFromFullTransactionId(safexid));
149 : }
150 : else
151 0 : elog(DEBUG2, "deleted page from block %u has safexid %u",
152 : blkno, opaque->btpo_level);
153 :
154 : /* Don't interpret BTDeletedPageData as index tuples */
155 0 : maxoff = InvalidOffsetNumber;
156 : }
157 20 : else if (P_IGNORE(opaque))
158 0 : stat->type = 'e';
159 20 : else if (P_ISLEAF(opaque))
160 16 : stat->type = 'l';
161 4 : else if (P_ISROOT(opaque))
162 4 : stat->type = 'r';
163 : else
164 0 : stat->type = 'i';
165 :
166 : /* btpage opaque data */
167 20 : stat->btpo_prev = opaque->btpo_prev;
168 20 : stat->btpo_next = opaque->btpo_next;
169 20 : stat->btpo_level = opaque->btpo_level;
170 20 : stat->btpo_flags = opaque->btpo_flags;
171 20 : stat->btpo_cycleid = opaque->btpo_cycleid;
172 :
173 : /* count live and dead tuples, and free space */
174 4044 : for (off = FirstOffsetNumber; off <= maxoff; off++)
175 : {
176 : IndexTuple itup;
177 :
178 4024 : ItemId id = PageGetItemId(page, off);
179 :
180 4024 : itup = (IndexTuple) PageGetItem(page, id);
181 :
182 4024 : item_size += IndexTupleSize(itup);
183 :
184 4024 : if (!ItemIdIsDead(id))
185 4024 : stat->live_items++;
186 : else
187 0 : stat->dead_items++;
188 : }
189 20 : stat->free_size = PageGetFreeSpace(page);
190 :
191 20 : if ((stat->live_items + stat->dead_items) > 0)
192 20 : stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
193 : else
194 0 : stat->avg_item_size = 0;
195 20 : }
196 :
197 : /* -----------------------------------------------
198 : * check_relation_block_range()
199 : *
200 : * Verify that a block number (given as int64) is valid for the relation.
201 : * -----------------------------------------------
202 : */
203 : static void
204 30 : check_relation_block_range(Relation rel, int64 blkno)
205 : {
206 : /* Ensure we can cast to BlockNumber */
207 30 : if (blkno < 0 || blkno > MaxBlockNumber)
208 4 : ereport(ERROR,
209 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
210 : errmsg("invalid block number %" PRId64, blkno)));
211 :
212 26 : if ((BlockNumber) (blkno) >= RelationGetNumberOfBlocks(rel))
213 6 : ereport(ERROR,
214 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
215 : errmsg("block number %" PRId64 " is out of range", blkno)));
216 20 : }
217 :
218 : /* -----------------------------------------------
219 : * bt_index_block_validate()
220 : *
221 : * Validate index type is btree and block number
222 : * is valid (and not the metapage).
223 : * -----------------------------------------------
224 : */
225 : static void
226 36 : bt_index_block_validate(Relation rel, int64 blkno)
227 : {
228 36 : if (!IS_INDEX(rel) || !IS_BTREE(rel))
229 4 : ereport(ERROR,
230 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
231 : errmsg("\"%s\" is not a %s index",
232 : RelationGetRelationName(rel), "btree")));
233 :
234 : /*
235 : * Reject attempts to read non-local temporary relations; we would be
236 : * likely to get wrong data since we have no visibility into the owning
237 : * session's local buffers.
238 : */
239 32 : if (RELATION_IS_OTHER_TEMP(rel))
240 0 : ereport(ERROR,
241 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
242 : errmsg("cannot access temporary tables of other sessions")));
243 :
244 32 : if (blkno == 0)
245 6 : ereport(ERROR,
246 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
247 : errmsg("block 0 is a meta page")));
248 :
249 26 : check_relation_block_range(rel, blkno);
250 16 : }
251 :
252 : /* -----------------------------------------------
253 : * bt_page_stats()
254 : *
255 : * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
256 : * Arguments are index relation name and block number
257 : * -----------------------------------------------
258 : */
259 : static Datum
260 12 : bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
261 : {
262 12 : text *relname = PG_GETARG_TEXT_PP(0);
263 12 : int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
264 : Buffer buffer;
265 : Relation rel;
266 : RangeVar *relrv;
267 : Datum result;
268 : HeapTuple tuple;
269 : TupleDesc tupleDesc;
270 : int j;
271 : char *values[11];
272 : BTPageStat stat;
273 :
274 12 : if (!superuser())
275 0 : ereport(ERROR,
276 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
277 : errmsg("must be superuser to use pageinspect functions")));
278 :
279 12 : relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
280 12 : rel = relation_openrv(relrv, AccessShareLock);
281 :
282 12 : bt_index_block_validate(rel, blkno);
283 :
284 4 : buffer = ReadBuffer(rel, blkno);
285 4 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
286 :
287 : /* keep compiler quiet */
288 4 : stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
289 4 : stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
290 :
291 4 : GetBTPageStatistics(blkno, buffer, &stat);
292 :
293 4 : UnlockReleaseBuffer(buffer);
294 4 : relation_close(rel, AccessShareLock);
295 :
296 : /* Build a tuple descriptor for our result type */
297 4 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
298 0 : elog(ERROR, "return type must be a row type");
299 :
300 4 : j = 0;
301 4 : values[j++] = psprintf("%u", stat.blkno);
302 4 : values[j++] = psprintf("%c", stat.type);
303 4 : values[j++] = psprintf("%u", stat.live_items);
304 4 : values[j++] = psprintf("%u", stat.dead_items);
305 4 : values[j++] = psprintf("%u", stat.avg_item_size);
306 4 : values[j++] = psprintf("%u", stat.page_size);
307 4 : values[j++] = psprintf("%u", stat.free_size);
308 4 : values[j++] = psprintf("%u", stat.btpo_prev);
309 4 : values[j++] = psprintf("%u", stat.btpo_next);
310 4 : values[j++] = psprintf("%u", stat.btpo_level);
311 4 : values[j++] = psprintf("%d", stat.btpo_flags);
312 :
313 4 : tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
314 : values);
315 :
316 4 : result = HeapTupleGetDatum(tuple);
317 :
318 4 : PG_RETURN_DATUM(result);
319 : }
320 :
321 : Datum
322 10 : bt_page_stats_1_9(PG_FUNCTION_ARGS)
323 : {
324 10 : return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_9);
325 : }
326 :
327 : /* entry point for old extension version */
328 : Datum
329 2 : bt_page_stats(PG_FUNCTION_ARGS)
330 : {
331 2 : return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_8);
332 : }
333 :
334 :
335 : /* -----------------------------------------------
336 : * bt_multi_page_stats()
337 : *
338 : * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1, 2);
339 : * Arguments are index relation name, first block number, number of blocks
340 : * (but number of blocks can be negative to mean "read all the rest")
341 : * -----------------------------------------------
342 : */
343 : Datum
344 28 : bt_multi_page_stats(PG_FUNCTION_ARGS)
345 : {
346 : Relation rel;
347 : ua_page_stats *uargs;
348 : FuncCallContext *fctx;
349 : MemoryContext mctx;
350 :
351 28 : if (!superuser())
352 0 : ereport(ERROR,
353 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
354 : errmsg("must be superuser to use pageinspect functions")));
355 :
356 28 : if (SRF_IS_FIRSTCALL())
357 : {
358 12 : text *relname = PG_GETARG_TEXT_PP(0);
359 12 : int64 blkno = PG_GETARG_INT64(1);
360 12 : int64 blk_count = PG_GETARG_INT64(2);
361 : RangeVar *relrv;
362 :
363 12 : fctx = SRF_FIRSTCALL_INIT();
364 :
365 12 : relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
366 12 : rel = relation_openrv(relrv, AccessShareLock);
367 :
368 : /* Check that rel is a valid btree index and 1st block number is OK */
369 12 : bt_index_block_validate(rel, blkno);
370 :
371 : /*
372 : * Check if upper bound of the specified range is valid. If only one
373 : * page is requested, skip as we've already validated the page. (Also,
374 : * it's important to skip this if blk_count is negative.)
375 : */
376 8 : if (blk_count > 1)
377 4 : check_relation_block_range(rel, blkno + blk_count - 1);
378 :
379 : /* Save arguments for reuse */
380 8 : mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
381 :
382 8 : uargs = palloc(sizeof(ua_page_stats));
383 :
384 8 : uargs->relid = RelationGetRelid(rel);
385 8 : uargs->blkno = blkno;
386 8 : uargs->blk_count = blk_count;
387 8 : uargs->allpages = (blk_count < 0);
388 :
389 8 : fctx->user_fctx = uargs;
390 :
391 8 : MemoryContextSwitchTo(mctx);
392 :
393 : /*
394 : * To avoid possibly leaking a relcache reference if the SRF isn't run
395 : * to completion, we close and re-open the index rel each time
396 : * through, using the index's OID for re-opens to ensure we get the
397 : * same rel. Keep the AccessShareLock though, to ensure it doesn't go
398 : * away underneath us.
399 : */
400 8 : relation_close(rel, NoLock);
401 : }
402 :
403 24 : fctx = SRF_PERCALL_SETUP();
404 24 : uargs = fctx->user_fctx;
405 :
406 : /* We should have lock already */
407 24 : rel = relation_open(uargs->relid, NoLock);
408 :
409 : /* In all-pages mode, recheck the index length each time */
410 24 : if (uargs->allpages)
411 10 : uargs->blk_count = RelationGetNumberOfBlocks(rel) - uargs->blkno;
412 :
413 24 : if (uargs->blk_count > 0)
414 : {
415 : /* We need to fetch next block statistics */
416 : Buffer buffer;
417 : Datum result;
418 : HeapTuple tuple;
419 : int j;
420 : char *values[11];
421 : BTPageStat stat;
422 : TupleDesc tupleDesc;
423 :
424 16 : buffer = ReadBuffer(rel, uargs->blkno);
425 16 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
426 :
427 : /* keep compiler quiet */
428 16 : stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
429 16 : stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
430 :
431 16 : GetBTPageStatistics(uargs->blkno, buffer, &stat);
432 :
433 16 : UnlockReleaseBuffer(buffer);
434 16 : relation_close(rel, NoLock);
435 :
436 : /* Build a tuple descriptor for our result type */
437 16 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
438 0 : elog(ERROR, "return type must be a row type");
439 :
440 16 : j = 0;
441 16 : values[j++] = psprintf("%u", stat.blkno);
442 16 : values[j++] = psprintf("%c", stat.type);
443 16 : values[j++] = psprintf("%u", stat.live_items);
444 16 : values[j++] = psprintf("%u", stat.dead_items);
445 16 : values[j++] = psprintf("%u", stat.avg_item_size);
446 16 : values[j++] = psprintf("%u", stat.page_size);
447 16 : values[j++] = psprintf("%u", stat.free_size);
448 16 : values[j++] = psprintf("%u", stat.btpo_prev);
449 16 : values[j++] = psprintf("%u", stat.btpo_next);
450 16 : values[j++] = psprintf("%u", stat.btpo_level);
451 16 : values[j++] = psprintf("%d", stat.btpo_flags);
452 :
453 : /* Construct tuple to be returned */
454 16 : tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
455 : values);
456 :
457 16 : result = HeapTupleGetDatum(tuple);
458 :
459 : /*
460 : * Move to the next block number and decrement the number of blocks
461 : * still to be fetched
462 : */
463 16 : uargs->blkno++;
464 16 : uargs->blk_count--;
465 :
466 16 : SRF_RETURN_NEXT(fctx, result);
467 : }
468 :
469 : /* Done, so finally we can release the index lock */
470 8 : relation_close(rel, AccessShareLock);
471 8 : SRF_RETURN_DONE(fctx);
472 : }
473 :
474 : /*-------------------------------------------------------
475 : * bt_page_print_tuples()
476 : *
477 : * Form a tuple describing index tuple at a given offset
478 : * ------------------------------------------------------
479 : */
480 : static Datum
481 6 : bt_page_print_tuples(ua_page_items *uargs)
482 : {
483 6 : Page page = uargs->page;
484 6 : OffsetNumber offset = uargs->offset;
485 6 : bool leafpage = uargs->leafpage;
486 6 : bool rightmost = uargs->rightmost;
487 : bool ispivottuple;
488 : Datum values[9];
489 : bool nulls[9];
490 : HeapTuple tuple;
491 : ItemId id;
492 : IndexTuple itup;
493 : int j;
494 : int off;
495 : int dlen;
496 : char *dump,
497 : *datacstring;
498 : char *ptr;
499 : ItemPointer htid;
500 :
501 6 : id = PageGetItemId(page, offset);
502 :
503 6 : if (!ItemIdIsValid(id))
504 0 : elog(ERROR, "invalid ItemId");
505 :
506 6 : itup = (IndexTuple) PageGetItem(page, id);
507 :
508 6 : j = 0;
509 6 : memset(nulls, 0, sizeof(nulls));
510 6 : values[j++] = Int16GetDatum(offset);
511 6 : values[j++] = ItemPointerGetDatum(&itup->t_tid);
512 6 : values[j++] = Int32GetDatum((int) IndexTupleSize(itup));
513 6 : values[j++] = BoolGetDatum(IndexTupleHasNulls(itup));
514 6 : values[j++] = BoolGetDatum(IndexTupleHasVarwidths(itup));
515 :
516 6 : ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
517 6 : dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
518 :
519 : /*
520 : * Make sure that "data" column does not include posting list or pivot
521 : * tuple representation of heap TID(s).
522 : *
523 : * Note: BTreeTupleIsPivot() won't work reliably on !heapkeyspace indexes
524 : * (those built before BTREE_VERSION 4), but we have no way of determining
525 : * if this page came from a !heapkeyspace index. We may only have a bytea
526 : * nbtree page image to go on, so in general there is no metapage that we
527 : * can check.
528 : *
529 : * That's okay here because BTreeTupleIsPivot() can only return false for
530 : * a !heapkeyspace pivot, never true for a !heapkeyspace non-pivot. Since
531 : * heap TID isn't part of the keyspace in a !heapkeyspace index anyway,
532 : * there cannot possibly be a pivot tuple heap TID representation that we
533 : * fail to make an adjustment for. A !heapkeyspace index can have
534 : * BTreeTupleIsPivot() return true (due to things like suffix truncation
535 : * for INCLUDE indexes in Postgres v11), but when that happens
536 : * BTreeTupleGetHeapTID() can be trusted to work reliably (i.e. return
537 : * NULL).
538 : *
539 : * Note: BTreeTupleIsPosting() always works reliably, even with
540 : * !heapkeyspace indexes.
541 : */
542 6 : if (BTreeTupleIsPosting(itup))
543 0 : dlen -= IndexTupleSize(itup) - BTreeTupleGetPostingOffset(itup);
544 6 : else if (BTreeTupleIsPivot(itup) && BTreeTupleGetHeapTID(itup) != NULL)
545 0 : dlen -= MAXALIGN(sizeof(ItemPointerData));
546 :
547 6 : if (dlen < 0 || dlen > INDEX_SIZE_MASK)
548 0 : elog(ERROR, "invalid tuple length %d for tuple at offset number %u",
549 : dlen, offset);
550 6 : dump = palloc0(dlen * 3 + 1);
551 6 : datacstring = dump;
552 54 : for (off = 0; off < dlen; off++)
553 : {
554 48 : if (off > 0)
555 42 : *dump++ = ' ';
556 48 : sprintf(dump, "%02x", *(ptr + off) & 0xff);
557 48 : dump += 2;
558 : }
559 6 : values[j++] = CStringGetTextDatum(datacstring);
560 6 : pfree(datacstring);
561 :
562 : /*
563 : * We need to work around the BTreeTupleIsPivot() !heapkeyspace limitation
564 : * again. Deduce whether or not tuple must be a pivot tuple based on
565 : * whether or not the page is a leaf page, as well as the page offset
566 : * number of the tuple.
567 : */
568 6 : ispivottuple = (!leafpage || (!rightmost && offset == P_HIKEY));
569 :
570 : /* LP_DEAD bit can never be set for pivot tuples, so show a NULL there */
571 6 : if (!ispivottuple)
572 6 : values[j++] = BoolGetDatum(ItemIdIsDead(id));
573 : else
574 : {
575 : Assert(!ItemIdIsDead(id));
576 0 : nulls[j++] = true;
577 : }
578 :
579 6 : htid = BTreeTupleGetHeapTID(itup);
580 6 : if (ispivottuple && !BTreeTupleIsPivot(itup))
581 : {
582 : /* Don't show bogus heap TID in !heapkeyspace pivot tuple */
583 0 : htid = NULL;
584 : }
585 :
586 6 : if (htid)
587 6 : values[j++] = ItemPointerGetDatum(htid);
588 : else
589 0 : nulls[j++] = true;
590 :
591 6 : if (BTreeTupleIsPosting(itup))
592 : {
593 : /* Build an array of item pointers */
594 : ItemPointer tids;
595 : Datum *tids_datum;
596 : int nposting;
597 :
598 0 : tids = BTreeTupleGetPosting(itup);
599 0 : nposting = BTreeTupleGetNPosting(itup);
600 0 : tids_datum = (Datum *) palloc(nposting * sizeof(Datum));
601 0 : for (int i = 0; i < nposting; i++)
602 0 : tids_datum[i] = ItemPointerGetDatum(&tids[i]);
603 0 : values[j++] = PointerGetDatum(construct_array_builtin(tids_datum, nposting, TIDOID));
604 0 : pfree(tids_datum);
605 : }
606 : else
607 6 : nulls[j++] = true;
608 :
609 : /* Build and return the result tuple */
610 6 : tuple = heap_form_tuple(uargs->tupd, values, nulls);
611 :
612 6 : return HeapTupleGetDatum(tuple);
613 : }
614 :
615 : /*-------------------------------------------------------
616 : * bt_page_items()
617 : *
618 : * Get IndexTupleData set in a btree page
619 : *
620 : * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
621 : *-------------------------------------------------------
622 : */
623 : static Datum
624 16 : bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
625 : {
626 16 : text *relname = PG_GETARG_TEXT_PP(0);
627 16 : int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
628 : Datum result;
629 : FuncCallContext *fctx;
630 : MemoryContext mctx;
631 : ua_page_items *uargs;
632 :
633 16 : if (!superuser())
634 0 : ereport(ERROR,
635 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
636 : errmsg("must be superuser to use pageinspect functions")));
637 :
638 16 : if (SRF_IS_FIRSTCALL())
639 : {
640 : RangeVar *relrv;
641 : Relation rel;
642 : Buffer buffer;
643 : BTPageOpaque opaque;
644 : TupleDesc tupleDesc;
645 :
646 12 : fctx = SRF_FIRSTCALL_INIT();
647 :
648 12 : relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
649 12 : rel = relation_openrv(relrv, AccessShareLock);
650 :
651 12 : bt_index_block_validate(rel, blkno);
652 :
653 4 : buffer = ReadBuffer(rel, blkno);
654 4 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
655 :
656 : /*
657 : * We copy the page into local storage to avoid holding pin on the
658 : * buffer longer than we must, and possibly failing to release it at
659 : * all if the calling query doesn't fetch all rows.
660 : */
661 4 : mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
662 :
663 4 : uargs = palloc(sizeof(ua_page_items));
664 :
665 4 : uargs->page = palloc(BLCKSZ);
666 4 : memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
667 :
668 4 : UnlockReleaseBuffer(buffer);
669 4 : relation_close(rel, AccessShareLock);
670 :
671 4 : uargs->offset = FirstOffsetNumber;
672 :
673 4 : opaque = BTPageGetOpaque(uargs->page);
674 :
675 4 : if (!P_ISDELETED(opaque))
676 4 : fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
677 : else
678 : {
679 : /* Don't interpret BTDeletedPageData as index tuples */
680 0 : elog(NOTICE, "page from block " INT64_FORMAT " is deleted", blkno);
681 0 : fctx->max_calls = 0;
682 : }
683 4 : uargs->leafpage = P_ISLEAF(opaque);
684 4 : uargs->rightmost = P_RIGHTMOST(opaque);
685 :
686 : /* Build a tuple descriptor for our result type */
687 4 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
688 0 : elog(ERROR, "return type must be a row type");
689 4 : tupleDesc = BlessTupleDesc(tupleDesc);
690 :
691 4 : uargs->tupd = tupleDesc;
692 :
693 4 : fctx->user_fctx = uargs;
694 :
695 4 : MemoryContextSwitchTo(mctx);
696 : }
697 :
698 8 : fctx = SRF_PERCALL_SETUP();
699 8 : uargs = fctx->user_fctx;
700 :
701 8 : if (fctx->call_cntr < fctx->max_calls)
702 : {
703 4 : result = bt_page_print_tuples(uargs);
704 4 : uargs->offset++;
705 4 : SRF_RETURN_NEXT(fctx, result);
706 : }
707 :
708 4 : SRF_RETURN_DONE(fctx);
709 : }
710 :
711 : Datum
712 12 : bt_page_items_1_9(PG_FUNCTION_ARGS)
713 : {
714 12 : return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_9);
715 : }
716 :
717 : /* entry point for old extension version */
718 : Datum
719 4 : bt_page_items(PG_FUNCTION_ARGS)
720 : {
721 4 : return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_8);
722 : }
723 :
724 : /*-------------------------------------------------------
725 : * bt_page_items_bytea()
726 : *
727 : * Get IndexTupleData set in a btree page
728 : *
729 : * Usage: SELECT * FROM bt_page_items(get_raw_page('t1_pkey', 1));
730 : *-------------------------------------------------------
731 : */
732 :
733 : Datum
734 18 : bt_page_items_bytea(PG_FUNCTION_ARGS)
735 : {
736 18 : bytea *raw_page = PG_GETARG_BYTEA_P(0);
737 : Datum result;
738 : FuncCallContext *fctx;
739 : ua_page_items *uargs;
740 :
741 18 : if (!superuser())
742 0 : ereport(ERROR,
743 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
744 : errmsg("must be superuser to use raw page functions")));
745 :
746 18 : if (SRF_IS_FIRSTCALL())
747 : {
748 : BTPageOpaque opaque;
749 : MemoryContext mctx;
750 : TupleDesc tupleDesc;
751 :
752 16 : fctx = SRF_FIRSTCALL_INIT();
753 16 : mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
754 :
755 16 : uargs = palloc(sizeof(ua_page_items));
756 :
757 16 : uargs->page = get_page_from_raw(raw_page);
758 :
759 14 : if (PageIsNew(uargs->page))
760 : {
761 2 : MemoryContextSwitchTo(mctx);
762 2 : PG_RETURN_NULL();
763 : }
764 :
765 12 : uargs->offset = FirstOffsetNumber;
766 :
767 : /* verify the special space has the expected size */
768 12 : if (PageGetSpecialSize(uargs->page) != MAXALIGN(sizeof(BTPageOpaqueData)))
769 4 : ereport(ERROR,
770 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
771 : errmsg("input page is not a valid %s page", "btree"),
772 : errdetail("Expected special size %d, got %d.",
773 : (int) MAXALIGN(sizeof(BTPageOpaqueData)),
774 : (int) PageGetSpecialSize(uargs->page))));
775 :
776 8 : opaque = BTPageGetOpaque(uargs->page);
777 :
778 8 : if (P_ISMETA(opaque))
779 4 : ereport(ERROR,
780 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
781 : errmsg("block is a meta page")));
782 :
783 4 : if (P_ISLEAF(opaque) && opaque->btpo_level != 0)
784 2 : ereport(ERROR,
785 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
786 : errmsg("block is not a valid btree leaf page")));
787 :
788 2 : if (P_ISDELETED(opaque))
789 0 : elog(NOTICE, "page is deleted");
790 :
791 2 : if (!P_ISDELETED(opaque))
792 2 : fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
793 : else
794 : {
795 : /* Don't interpret BTDeletedPageData as index tuples */
796 0 : elog(NOTICE, "page from block is deleted");
797 0 : fctx->max_calls = 0;
798 : }
799 2 : uargs->leafpage = P_ISLEAF(opaque);
800 2 : uargs->rightmost = P_RIGHTMOST(opaque);
801 :
802 : /* Build a tuple descriptor for our result type */
803 2 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
804 0 : elog(ERROR, "return type must be a row type");
805 2 : tupleDesc = BlessTupleDesc(tupleDesc);
806 :
807 2 : uargs->tupd = tupleDesc;
808 :
809 2 : fctx->user_fctx = uargs;
810 :
811 2 : MemoryContextSwitchTo(mctx);
812 : }
813 :
814 4 : fctx = SRF_PERCALL_SETUP();
815 4 : uargs = fctx->user_fctx;
816 :
817 4 : if (fctx->call_cntr < fctx->max_calls)
818 : {
819 2 : result = bt_page_print_tuples(uargs);
820 2 : uargs->offset++;
821 2 : SRF_RETURN_NEXT(fctx, result);
822 : }
823 :
824 2 : SRF_RETURN_DONE(fctx);
825 : }
826 :
827 : /* Number of output arguments (columns) for bt_metap() */
828 : #define BT_METAP_COLS_V1_8 9
829 :
830 : /* ------------------------------------------------
831 : * bt_metap()
832 : *
833 : * Get a btree's meta-page information
834 : *
835 : * Usage: SELECT * FROM bt_metap('t1_pkey')
836 : * ------------------------------------------------
837 : */
838 : Datum
839 4 : bt_metap(PG_FUNCTION_ARGS)
840 : {
841 4 : text *relname = PG_GETARG_TEXT_PP(0);
842 : Datum result;
843 : Relation rel;
844 : RangeVar *relrv;
845 : BTMetaPageData *metad;
846 : TupleDesc tupleDesc;
847 : int j;
848 : char *values[9];
849 : Buffer buffer;
850 : Page page;
851 : HeapTuple tuple;
852 :
853 4 : if (!superuser())
854 0 : ereport(ERROR,
855 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
856 : errmsg("must be superuser to use pageinspect functions")));
857 :
858 4 : relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
859 4 : rel = relation_openrv(relrv, AccessShareLock);
860 :
861 4 : if (!IS_INDEX(rel) || !IS_BTREE(rel))
862 2 : ereport(ERROR,
863 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
864 : errmsg("\"%s\" is not a %s index",
865 : RelationGetRelationName(rel), "btree")));
866 :
867 : /*
868 : * Reject attempts to read non-local temporary relations; we would be
869 : * likely to get wrong data since we have no visibility into the owning
870 : * session's local buffers.
871 : */
872 2 : if (RELATION_IS_OTHER_TEMP(rel))
873 0 : ereport(ERROR,
874 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
875 : errmsg("cannot access temporary tables of other sessions")));
876 :
877 2 : buffer = ReadBuffer(rel, 0);
878 2 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
879 :
880 2 : page = BufferGetPage(buffer);
881 2 : metad = BTPageGetMeta(page);
882 :
883 : /* Build a tuple descriptor for our result type */
884 2 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
885 0 : elog(ERROR, "return type must be a row type");
886 :
887 : /*
888 : * We need a kluge here to detect API versions prior to 1.8. Earlier
889 : * versions incorrectly used int4 for certain columns.
890 : *
891 : * There is no way to reliably avoid the problems created by the old
892 : * function definition at this point, so insist that the user update the
893 : * extension.
894 : */
895 2 : if (tupleDesc->natts < BT_METAP_COLS_V1_8)
896 0 : ereport(ERROR,
897 : (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
898 : errmsg("function has wrong number of declared columns"),
899 : errhint("To resolve the problem, update the \"pageinspect\" extension to the latest version.")));
900 :
901 2 : j = 0;
902 2 : values[j++] = psprintf("%d", metad->btm_magic);
903 2 : values[j++] = psprintf("%d", metad->btm_version);
904 2 : values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_root);
905 2 : values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_level);
906 2 : values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_fastroot);
907 2 : values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_fastlevel);
908 :
909 : /*
910 : * Get values of extended metadata if available, use default values
911 : * otherwise. Note that we rely on the assumption that btm_allequalimage
912 : * is initialized to zero with indexes that were built on versions prior
913 : * to Postgres 13 (just like _bt_metaversion()).
914 : */
915 2 : if (metad->btm_version >= BTREE_NOVAC_VERSION)
916 : {
917 4 : values[j++] = psprintf(INT64_FORMAT,
918 2 : (int64) metad->btm_last_cleanup_num_delpages);
919 2 : values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
920 2 : values[j++] = metad->btm_allequalimage ? "t" : "f";
921 : }
922 : else
923 : {
924 0 : values[j++] = "0";
925 0 : values[j++] = "-1";
926 0 : values[j++] = "f";
927 : }
928 :
929 2 : tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
930 : values);
931 :
932 2 : result = HeapTupleGetDatum(tuple);
933 :
934 2 : UnlockReleaseBuffer(buffer);
935 2 : relation_close(rel, AccessShareLock);
936 :
937 2 : PG_RETURN_DATUM(result);
938 : }
|