Line data Source code
1 : /*
2 : * contrib/pageinspect/btreefuncs.c
3 : *
4 : *
5 : * btreefuncs.c
6 : *
7 : * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
8 : *
9 : * Permission to use, copy, modify, and distribute this software and
10 : * its documentation for any purpose, without fee, and without a
11 : * written agreement is hereby granted, provided that the above
12 : * copyright notice and this paragraph and the following two
13 : * paragraphs appear in all copies.
14 : *
15 : * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16 : * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17 : * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18 : * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19 : * OF THE POSSIBILITY OF SUCH DAMAGE.
20 : *
21 : * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22 : * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 : * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24 : * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25 : * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
26 : */
27 :
28 : #include "postgres.h"
29 :
30 : #include "access/nbtree.h"
31 : #include "access/relation.h"
32 : #include "catalog/namespace.h"
33 : #include "catalog/pg_am.h"
34 : #include "catalog/pg_type.h"
35 : #include "funcapi.h"
36 : #include "miscadmin.h"
37 : #include "pageinspect.h"
38 : #include "utils/array.h"
39 : #include "utils/builtins.h"
40 : #include "utils/rel.h"
41 : #include "utils/varlena.h"
42 :
43 50 : PG_FUNCTION_INFO_V1(bt_metap);
44 14 : PG_FUNCTION_INFO_V1(bt_page_items_1_9);
45 26 : PG_FUNCTION_INFO_V1(bt_page_items);
46 26 : PG_FUNCTION_INFO_V1(bt_page_items_bytea);
47 14 : PG_FUNCTION_INFO_V1(bt_page_stats_1_9);
48 14 : PG_FUNCTION_INFO_V1(bt_page_stats);
49 14 : PG_FUNCTION_INFO_V1(bt_multi_page_stats);
50 :
51 : #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
52 : #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
53 :
54 : /* ------------------------------------------------
55 : * structure for single btree page statistics
56 : * ------------------------------------------------
57 : */
58 : typedef struct BTPageStat
59 : {
60 : uint32 blkno;
61 : uint32 live_items;
62 : uint32 dead_items;
63 : uint32 page_size;
64 : uint32 max_avail;
65 : uint32 free_size;
66 : uint32 avg_item_size;
67 : char type;
68 :
69 : /* opaque data */
70 : BlockNumber btpo_prev;
71 : BlockNumber btpo_next;
72 : uint32 btpo_level;
73 : uint16 btpo_flags;
74 : BTCycleId btpo_cycleid;
75 : } BTPageStat;
76 :
77 : /*
78 : * cross-call data structure for SRF for page stats
79 : */
80 : typedef struct ua_page_stats
81 : {
82 : Oid relid;
83 : int64 blkno;
84 : int64 blk_count;
85 : bool allpages;
86 : } ua_page_stats;
87 :
88 : /*
89 : * cross-call data structure for SRF for page items
90 : */
91 : typedef struct ua_page_items
92 : {
93 : Page page;
94 : OffsetNumber offset;
95 : bool leafpage;
96 : bool rightmost;
97 : TupleDesc tupd;
98 : } ua_page_items;
99 :
100 :
101 : /* -------------------------------------------------
102 : * GetBTPageStatistics()
103 : *
104 : * Collect statistics of single b-tree page
105 : * -------------------------------------------------
106 : */
107 : static void
108 20 : GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
109 : {
110 20 : Page page = BufferGetPage(buffer);
111 20 : PageHeader phdr = (PageHeader) page;
112 20 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
113 20 : BTPageOpaque opaque = BTPageGetOpaque(page);
114 20 : int item_size = 0;
115 : int off;
116 :
117 20 : stat->blkno = blkno;
118 :
119 20 : stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
120 :
121 20 : stat->dead_items = stat->live_items = 0;
122 :
123 20 : stat->page_size = PageGetPageSize(page);
124 :
125 : /* page type (flags) */
126 20 : if (P_ISDELETED(opaque))
127 : {
128 : /* We divide deleted pages into leaf ('d') or internal ('D') */
129 0 : if (P_ISLEAF(opaque) || !P_HAS_FULLXID(opaque))
130 0 : stat->type = 'd';
131 : else
132 0 : stat->type = 'D';
133 :
134 : /*
135 : * Report safexid in a deleted page.
136 : *
137 : * Handle pg_upgrade'd deleted pages that used the previous safexid
138 : * representation in btpo_level field (this used to be a union type
139 : * called "bpto").
140 : */
141 0 : if (P_HAS_FULLXID(opaque))
142 : {
143 0 : FullTransactionId safexid = BTPageGetDeleteXid(page);
144 :
145 0 : elog(DEBUG2, "deleted page from block %u has safexid %u:%u",
146 : blkno, EpochFromFullTransactionId(safexid),
147 : XidFromFullTransactionId(safexid));
148 : }
149 : else
150 0 : elog(DEBUG2, "deleted page from block %u has safexid %u",
151 : blkno, opaque->btpo_level);
152 :
153 : /* Don't interpret BTDeletedPageData as index tuples */
154 0 : maxoff = InvalidOffsetNumber;
155 : }
156 20 : else if (P_IGNORE(opaque))
157 0 : stat->type = 'e';
158 20 : else if (P_ISLEAF(opaque))
159 16 : stat->type = 'l';
160 4 : else if (P_ISROOT(opaque))
161 4 : stat->type = 'r';
162 : else
163 0 : stat->type = 'i';
164 :
165 : /* btpage opaque data */
166 20 : stat->btpo_prev = opaque->btpo_prev;
167 20 : stat->btpo_next = opaque->btpo_next;
168 20 : stat->btpo_level = opaque->btpo_level;
169 20 : stat->btpo_flags = opaque->btpo_flags;
170 20 : stat->btpo_cycleid = opaque->btpo_cycleid;
171 :
172 : /* count live and dead tuples, and free space */
173 4044 : for (off = FirstOffsetNumber; off <= maxoff; off++)
174 : {
175 : IndexTuple itup;
176 :
177 4024 : ItemId id = PageGetItemId(page, off);
178 :
179 4024 : itup = (IndexTuple) PageGetItem(page, id);
180 :
181 4024 : item_size += IndexTupleSize(itup);
182 :
183 4024 : if (!ItemIdIsDead(id))
184 4024 : stat->live_items++;
185 : else
186 0 : stat->dead_items++;
187 : }
188 20 : stat->free_size = PageGetFreeSpace(page);
189 :
190 20 : if ((stat->live_items + stat->dead_items) > 0)
191 20 : stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
192 : else
193 0 : stat->avg_item_size = 0;
194 20 : }
195 :
196 : /* -----------------------------------------------
197 : * check_relation_block_range()
198 : *
199 : * Verify that a block number (given as int64) is valid for the relation.
200 : * -----------------------------------------------
201 : */
202 : static void
203 30 : check_relation_block_range(Relation rel, int64 blkno)
204 : {
205 : /* Ensure we can cast to BlockNumber */
206 30 : if (blkno < 0 || blkno > MaxBlockNumber)
207 4 : ereport(ERROR,
208 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
209 : errmsg("invalid block number %" PRId64, blkno)));
210 :
211 26 : if ((BlockNumber) (blkno) >= RelationGetNumberOfBlocks(rel))
212 6 : ereport(ERROR,
213 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
214 : errmsg("block number %" PRId64 " is out of range", blkno)));
215 20 : }
216 :
217 : /* -----------------------------------------------
218 : * bt_index_block_validate()
219 : *
220 : * Validate index type is btree and block number
221 : * is valid (and not the metapage).
222 : * -----------------------------------------------
223 : */
224 : static void
225 36 : bt_index_block_validate(Relation rel, int64 blkno)
226 : {
227 36 : if (!IS_INDEX(rel) || !IS_BTREE(rel))
228 4 : ereport(ERROR,
229 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
230 : errmsg("\"%s\" is not a %s index",
231 : RelationGetRelationName(rel), "btree")));
232 :
233 : /*
234 : * Reject attempts to read non-local temporary relations; we would be
235 : * likely to get wrong data since we have no visibility into the owning
236 : * session's local buffers.
237 : */
238 32 : if (RELATION_IS_OTHER_TEMP(rel))
239 0 : ereport(ERROR,
240 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
241 : errmsg("cannot access temporary tables of other sessions")));
242 :
243 32 : if (blkno == 0)
244 6 : ereport(ERROR,
245 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
246 : errmsg("block 0 is a meta page")));
247 :
248 26 : check_relation_block_range(rel, blkno);
249 16 : }
250 :
251 : /* -----------------------------------------------
252 : * bt_page_stats()
253 : *
254 : * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
255 : * Arguments are index relation name and block number
256 : * -----------------------------------------------
257 : */
258 : static Datum
259 12 : bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
260 : {
261 12 : text *relname = PG_GETARG_TEXT_PP(0);
262 12 : int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
263 : Buffer buffer;
264 : Relation rel;
265 : RangeVar *relrv;
266 : Datum result;
267 : HeapTuple tuple;
268 : TupleDesc tupleDesc;
269 : int j;
270 : char *values[11];
271 : BTPageStat stat;
272 :
273 12 : if (!superuser())
274 0 : ereport(ERROR,
275 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
276 : errmsg("must be superuser to use pageinspect functions")));
277 :
278 12 : relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
279 12 : rel = relation_openrv(relrv, AccessShareLock);
280 :
281 12 : bt_index_block_validate(rel, blkno);
282 :
283 4 : buffer = ReadBuffer(rel, blkno);
284 4 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
285 :
286 : /* keep compiler quiet */
287 4 : stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
288 4 : stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
289 :
290 4 : GetBTPageStatistics(blkno, buffer, &stat);
291 :
292 4 : UnlockReleaseBuffer(buffer);
293 4 : relation_close(rel, AccessShareLock);
294 :
295 : /* Build a tuple descriptor for our result type */
296 4 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
297 0 : elog(ERROR, "return type must be a row type");
298 :
299 4 : j = 0;
300 4 : values[j++] = psprintf("%u", stat.blkno);
301 4 : values[j++] = psprintf("%c", stat.type);
302 4 : values[j++] = psprintf("%u", stat.live_items);
303 4 : values[j++] = psprintf("%u", stat.dead_items);
304 4 : values[j++] = psprintf("%u", stat.avg_item_size);
305 4 : values[j++] = psprintf("%u", stat.page_size);
306 4 : values[j++] = psprintf("%u", stat.free_size);
307 4 : values[j++] = psprintf("%u", stat.btpo_prev);
308 4 : values[j++] = psprintf("%u", stat.btpo_next);
309 4 : values[j++] = psprintf("%u", stat.btpo_level);
310 4 : values[j++] = psprintf("%d", stat.btpo_flags);
311 :
312 4 : tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
313 : values);
314 :
315 4 : result = HeapTupleGetDatum(tuple);
316 :
317 4 : PG_RETURN_DATUM(result);
318 : }
319 :
320 : Datum
321 10 : bt_page_stats_1_9(PG_FUNCTION_ARGS)
322 : {
323 10 : return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_9);
324 : }
325 :
326 : /* entry point for old extension version */
327 : Datum
328 2 : bt_page_stats(PG_FUNCTION_ARGS)
329 : {
330 2 : return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_8);
331 : }
332 :
333 :
334 : /* -----------------------------------------------
335 : * bt_multi_page_stats()
336 : *
337 : * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1, 2);
338 : * Arguments are index relation name, first block number, number of blocks
339 : * (but number of blocks can be negative to mean "read all the rest")
340 : * -----------------------------------------------
341 : */
342 : Datum
343 28 : bt_multi_page_stats(PG_FUNCTION_ARGS)
344 : {
345 : Relation rel;
346 : ua_page_stats *uargs;
347 : FuncCallContext *fctx;
348 : MemoryContext mctx;
349 :
350 28 : if (!superuser())
351 0 : ereport(ERROR,
352 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
353 : errmsg("must be superuser to use pageinspect functions")));
354 :
355 28 : if (SRF_IS_FIRSTCALL())
356 : {
357 12 : text *relname = PG_GETARG_TEXT_PP(0);
358 12 : int64 blkno = PG_GETARG_INT64(1);
359 12 : int64 blk_count = PG_GETARG_INT64(2);
360 : RangeVar *relrv;
361 :
362 12 : fctx = SRF_FIRSTCALL_INIT();
363 :
364 12 : relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
365 12 : rel = relation_openrv(relrv, AccessShareLock);
366 :
367 : /* Check that rel is a valid btree index and 1st block number is OK */
368 12 : bt_index_block_validate(rel, blkno);
369 :
370 : /*
371 : * Check if upper bound of the specified range is valid. If only one
372 : * page is requested, skip as we've already validated the page. (Also,
373 : * it's important to skip this if blk_count is negative.)
374 : */
375 8 : if (blk_count > 1)
376 4 : check_relation_block_range(rel, blkno + blk_count - 1);
377 :
378 : /* Save arguments for reuse */
379 8 : mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
380 :
381 8 : uargs = palloc(sizeof(ua_page_stats));
382 :
383 8 : uargs->relid = RelationGetRelid(rel);
384 8 : uargs->blkno = blkno;
385 8 : uargs->blk_count = blk_count;
386 8 : uargs->allpages = (blk_count < 0);
387 :
388 8 : fctx->user_fctx = uargs;
389 :
390 8 : MemoryContextSwitchTo(mctx);
391 :
392 : /*
393 : * To avoid possibly leaking a relcache reference if the SRF isn't run
394 : * to completion, we close and re-open the index rel each time
395 : * through, using the index's OID for re-opens to ensure we get the
396 : * same rel. Keep the AccessShareLock though, to ensure it doesn't go
397 : * away underneath us.
398 : */
399 8 : relation_close(rel, NoLock);
400 : }
401 :
402 24 : fctx = SRF_PERCALL_SETUP();
403 24 : uargs = fctx->user_fctx;
404 :
405 : /* We should have lock already */
406 24 : rel = relation_open(uargs->relid, NoLock);
407 :
408 : /* In all-pages mode, recheck the index length each time */
409 24 : if (uargs->allpages)
410 10 : uargs->blk_count = RelationGetNumberOfBlocks(rel) - uargs->blkno;
411 :
412 24 : if (uargs->blk_count > 0)
413 : {
414 : /* We need to fetch next block statistics */
415 : Buffer buffer;
416 : Datum result;
417 : HeapTuple tuple;
418 : int j;
419 : char *values[11];
420 : BTPageStat stat;
421 : TupleDesc tupleDesc;
422 :
423 16 : buffer = ReadBuffer(rel, uargs->blkno);
424 16 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
425 :
426 : /* keep compiler quiet */
427 16 : stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
428 16 : stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
429 :
430 16 : GetBTPageStatistics(uargs->blkno, buffer, &stat);
431 :
432 16 : UnlockReleaseBuffer(buffer);
433 16 : relation_close(rel, NoLock);
434 :
435 : /* Build a tuple descriptor for our result type */
436 16 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
437 0 : elog(ERROR, "return type must be a row type");
438 :
439 16 : j = 0;
440 16 : values[j++] = psprintf("%u", stat.blkno);
441 16 : values[j++] = psprintf("%c", stat.type);
442 16 : values[j++] = psprintf("%u", stat.live_items);
443 16 : values[j++] = psprintf("%u", stat.dead_items);
444 16 : values[j++] = psprintf("%u", stat.avg_item_size);
445 16 : values[j++] = psprintf("%u", stat.page_size);
446 16 : values[j++] = psprintf("%u", stat.free_size);
447 16 : values[j++] = psprintf("%u", stat.btpo_prev);
448 16 : values[j++] = psprintf("%u", stat.btpo_next);
449 16 : values[j++] = psprintf("%u", stat.btpo_level);
450 16 : values[j++] = psprintf("%d", stat.btpo_flags);
451 :
452 : /* Construct tuple to be returned */
453 16 : tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
454 : values);
455 :
456 16 : result = HeapTupleGetDatum(tuple);
457 :
458 : /*
459 : * Move to the next block number and decrement the number of blocks
460 : * still to be fetched
461 : */
462 16 : uargs->blkno++;
463 16 : uargs->blk_count--;
464 :
465 16 : SRF_RETURN_NEXT(fctx, result);
466 : }
467 :
468 : /* Done, so finally we can release the index lock */
469 8 : relation_close(rel, AccessShareLock);
470 8 : SRF_RETURN_DONE(fctx);
471 : }
472 :
473 : /*-------------------------------------------------------
474 : * bt_page_print_tuples()
475 : *
476 : * Form a tuple describing index tuple at a given offset
477 : * ------------------------------------------------------
478 : */
479 : static Datum
480 6 : bt_page_print_tuples(ua_page_items *uargs)
481 : {
482 6 : Page page = uargs->page;
483 6 : OffsetNumber offset = uargs->offset;
484 6 : bool leafpage = uargs->leafpage;
485 6 : bool rightmost = uargs->rightmost;
486 : bool ispivottuple;
487 : Datum values[9];
488 : bool nulls[9];
489 : HeapTuple tuple;
490 : ItemId id;
491 : IndexTuple itup;
492 : int j;
493 : int off;
494 : int dlen;
495 : char *dump,
496 : *datacstring;
497 : char *ptr;
498 : ItemPointer htid;
499 :
500 6 : id = PageGetItemId(page, offset);
501 :
502 6 : if (!ItemIdIsValid(id))
503 0 : elog(ERROR, "invalid ItemId");
504 :
505 6 : itup = (IndexTuple) PageGetItem(page, id);
506 :
507 6 : j = 0;
508 6 : memset(nulls, 0, sizeof(nulls));
509 6 : values[j++] = DatumGetInt16(offset);
510 6 : values[j++] = ItemPointerGetDatum(&itup->t_tid);
511 6 : values[j++] = Int32GetDatum((int) IndexTupleSize(itup));
512 6 : values[j++] = BoolGetDatum(IndexTupleHasNulls(itup));
513 6 : values[j++] = BoolGetDatum(IndexTupleHasVarwidths(itup));
514 :
515 6 : ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
516 6 : dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
517 :
518 : /*
519 : * Make sure that "data" column does not include posting list or pivot
520 : * tuple representation of heap TID(s).
521 : *
522 : * Note: BTreeTupleIsPivot() won't work reliably on !heapkeyspace indexes
523 : * (those built before BTREE_VERSION 4), but we have no way of determining
524 : * if this page came from a !heapkeyspace index. We may only have a bytea
525 : * nbtree page image to go on, so in general there is no metapage that we
526 : * can check.
527 : *
528 : * That's okay here because BTreeTupleIsPivot() can only return false for
529 : * a !heapkeyspace pivot, never true for a !heapkeyspace non-pivot. Since
530 : * heap TID isn't part of the keyspace in a !heapkeyspace index anyway,
531 : * there cannot possibly be a pivot tuple heap TID representation that we
532 : * fail to make an adjustment for. A !heapkeyspace index can have
533 : * BTreeTupleIsPivot() return true (due to things like suffix truncation
534 : * for INCLUDE indexes in Postgres v11), but when that happens
535 : * BTreeTupleGetHeapTID() can be trusted to work reliably (i.e. return
536 : * NULL).
537 : *
538 : * Note: BTreeTupleIsPosting() always works reliably, even with
539 : * !heapkeyspace indexes.
540 : */
541 6 : if (BTreeTupleIsPosting(itup))
542 0 : dlen -= IndexTupleSize(itup) - BTreeTupleGetPostingOffset(itup);
543 6 : else if (BTreeTupleIsPivot(itup) && BTreeTupleGetHeapTID(itup) != NULL)
544 0 : dlen -= MAXALIGN(sizeof(ItemPointerData));
545 :
546 6 : if (dlen < 0 || dlen > INDEX_SIZE_MASK)
547 0 : elog(ERROR, "invalid tuple length %d for tuple at offset number %u",
548 : dlen, offset);
549 6 : dump = palloc0(dlen * 3 + 1);
550 6 : datacstring = dump;
551 54 : for (off = 0; off < dlen; off++)
552 : {
553 48 : if (off > 0)
554 42 : *dump++ = ' ';
555 48 : sprintf(dump, "%02x", *(ptr + off) & 0xff);
556 48 : dump += 2;
557 : }
558 6 : values[j++] = CStringGetTextDatum(datacstring);
559 6 : pfree(datacstring);
560 :
561 : /*
562 : * We need to work around the BTreeTupleIsPivot() !heapkeyspace limitation
563 : * again. Deduce whether or not tuple must be a pivot tuple based on
564 : * whether or not the page is a leaf page, as well as the page offset
565 : * number of the tuple.
566 : */
567 6 : ispivottuple = (!leafpage || (!rightmost && offset == P_HIKEY));
568 :
569 : /* LP_DEAD bit can never be set for pivot tuples, so show a NULL there */
570 6 : if (!ispivottuple)
571 6 : values[j++] = BoolGetDatum(ItemIdIsDead(id));
572 : else
573 : {
574 : Assert(!ItemIdIsDead(id));
575 0 : nulls[j++] = true;
576 : }
577 :
578 6 : htid = BTreeTupleGetHeapTID(itup);
579 6 : if (ispivottuple && !BTreeTupleIsPivot(itup))
580 : {
581 : /* Don't show bogus heap TID in !heapkeyspace pivot tuple */
582 0 : htid = NULL;
583 : }
584 :
585 6 : if (htid)
586 6 : values[j++] = ItemPointerGetDatum(htid);
587 : else
588 0 : nulls[j++] = true;
589 :
590 6 : if (BTreeTupleIsPosting(itup))
591 : {
592 : /* Build an array of item pointers */
593 : ItemPointer tids;
594 : Datum *tids_datum;
595 : int nposting;
596 :
597 0 : tids = BTreeTupleGetPosting(itup);
598 0 : nposting = BTreeTupleGetNPosting(itup);
599 0 : tids_datum = (Datum *) palloc(nposting * sizeof(Datum));
600 0 : for (int i = 0; i < nposting; i++)
601 0 : tids_datum[i] = ItemPointerGetDatum(&tids[i]);
602 0 : values[j++] = PointerGetDatum(construct_array_builtin(tids_datum, nposting, TIDOID));
603 0 : pfree(tids_datum);
604 : }
605 : else
606 6 : nulls[j++] = true;
607 :
608 : /* Build and return the result tuple */
609 6 : tuple = heap_form_tuple(uargs->tupd, values, nulls);
610 :
611 6 : return HeapTupleGetDatum(tuple);
612 : }
613 :
614 : /*-------------------------------------------------------
615 : * bt_page_items()
616 : *
617 : * Get IndexTupleData set in a btree page
618 : *
619 : * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
620 : *-------------------------------------------------------
621 : */
622 : static Datum
623 16 : bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
624 : {
625 16 : text *relname = PG_GETARG_TEXT_PP(0);
626 16 : int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
627 : Datum result;
628 : FuncCallContext *fctx;
629 : MemoryContext mctx;
630 : ua_page_items *uargs;
631 :
632 16 : if (!superuser())
633 0 : ereport(ERROR,
634 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
635 : errmsg("must be superuser to use pageinspect functions")));
636 :
637 16 : if (SRF_IS_FIRSTCALL())
638 : {
639 : RangeVar *relrv;
640 : Relation rel;
641 : Buffer buffer;
642 : BTPageOpaque opaque;
643 : TupleDesc tupleDesc;
644 :
645 12 : fctx = SRF_FIRSTCALL_INIT();
646 :
647 12 : relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
648 12 : rel = relation_openrv(relrv, AccessShareLock);
649 :
650 12 : bt_index_block_validate(rel, blkno);
651 :
652 4 : buffer = ReadBuffer(rel, blkno);
653 4 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
654 :
655 : /*
656 : * We copy the page into local storage to avoid holding pin on the
657 : * buffer longer than we must, and possibly failing to release it at
658 : * all if the calling query doesn't fetch all rows.
659 : */
660 4 : mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
661 :
662 4 : uargs = palloc(sizeof(ua_page_items));
663 :
664 4 : uargs->page = palloc(BLCKSZ);
665 4 : memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
666 :
667 4 : UnlockReleaseBuffer(buffer);
668 4 : relation_close(rel, AccessShareLock);
669 :
670 4 : uargs->offset = FirstOffsetNumber;
671 :
672 4 : opaque = BTPageGetOpaque(uargs->page);
673 :
674 4 : if (!P_ISDELETED(opaque))
675 4 : fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
676 : else
677 : {
678 : /* Don't interpret BTDeletedPageData as index tuples */
679 0 : elog(NOTICE, "page from block " INT64_FORMAT " is deleted", blkno);
680 0 : fctx->max_calls = 0;
681 : }
682 4 : uargs->leafpage = P_ISLEAF(opaque);
683 4 : uargs->rightmost = P_RIGHTMOST(opaque);
684 :
685 : /* Build a tuple descriptor for our result type */
686 4 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
687 0 : elog(ERROR, "return type must be a row type");
688 4 : tupleDesc = BlessTupleDesc(tupleDesc);
689 :
690 4 : uargs->tupd = tupleDesc;
691 :
692 4 : fctx->user_fctx = uargs;
693 :
694 4 : MemoryContextSwitchTo(mctx);
695 : }
696 :
697 8 : fctx = SRF_PERCALL_SETUP();
698 8 : uargs = fctx->user_fctx;
699 :
700 8 : if (fctx->call_cntr < fctx->max_calls)
701 : {
702 4 : result = bt_page_print_tuples(uargs);
703 4 : uargs->offset++;
704 4 : SRF_RETURN_NEXT(fctx, result);
705 : }
706 :
707 4 : SRF_RETURN_DONE(fctx);
708 : }
709 :
710 : Datum
711 12 : bt_page_items_1_9(PG_FUNCTION_ARGS)
712 : {
713 12 : return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_9);
714 : }
715 :
716 : /* entry point for old extension version */
717 : Datum
718 4 : bt_page_items(PG_FUNCTION_ARGS)
719 : {
720 4 : return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_8);
721 : }
722 :
723 : /*-------------------------------------------------------
724 : * bt_page_items_bytea()
725 : *
726 : * Get IndexTupleData set in a btree page
727 : *
728 : * Usage: SELECT * FROM bt_page_items(get_raw_page('t1_pkey', 1));
729 : *-------------------------------------------------------
730 : */
731 :
732 : Datum
733 18 : bt_page_items_bytea(PG_FUNCTION_ARGS)
734 : {
735 18 : bytea *raw_page = PG_GETARG_BYTEA_P(0);
736 : Datum result;
737 : FuncCallContext *fctx;
738 : ua_page_items *uargs;
739 :
740 18 : if (!superuser())
741 0 : ereport(ERROR,
742 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
743 : errmsg("must be superuser to use raw page functions")));
744 :
745 18 : if (SRF_IS_FIRSTCALL())
746 : {
747 : BTPageOpaque opaque;
748 : MemoryContext mctx;
749 : TupleDesc tupleDesc;
750 :
751 16 : fctx = SRF_FIRSTCALL_INIT();
752 16 : mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
753 :
754 16 : uargs = palloc(sizeof(ua_page_items));
755 :
756 16 : uargs->page = get_page_from_raw(raw_page);
757 :
758 14 : if (PageIsNew(uargs->page))
759 : {
760 2 : MemoryContextSwitchTo(mctx);
761 2 : PG_RETURN_NULL();
762 : }
763 :
764 12 : uargs->offset = FirstOffsetNumber;
765 :
766 : /* verify the special space has the expected size */
767 12 : if (PageGetSpecialSize(uargs->page) != MAXALIGN(sizeof(BTPageOpaqueData)))
768 4 : ereport(ERROR,
769 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
770 : errmsg("input page is not a valid %s page", "btree"),
771 : errdetail("Expected special size %d, got %d.",
772 : (int) MAXALIGN(sizeof(BTPageOpaqueData)),
773 : (int) PageGetSpecialSize(uargs->page))));
774 :
775 8 : opaque = BTPageGetOpaque(uargs->page);
776 :
777 8 : if (P_ISMETA(opaque))
778 4 : ereport(ERROR,
779 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
780 : errmsg("block is a meta page")));
781 :
782 4 : if (P_ISLEAF(opaque) && opaque->btpo_level != 0)
783 2 : ereport(ERROR,
784 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
785 : errmsg("block is not a valid btree leaf page")));
786 :
787 2 : if (P_ISDELETED(opaque))
788 0 : elog(NOTICE, "page is deleted");
789 :
790 2 : if (!P_ISDELETED(opaque))
791 2 : fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
792 : else
793 : {
794 : /* Don't interpret BTDeletedPageData as index tuples */
795 0 : elog(NOTICE, "page from block is deleted");
796 0 : fctx->max_calls = 0;
797 : }
798 2 : uargs->leafpage = P_ISLEAF(opaque);
799 2 : uargs->rightmost = P_RIGHTMOST(opaque);
800 :
801 : /* Build a tuple descriptor for our result type */
802 2 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
803 0 : elog(ERROR, "return type must be a row type");
804 2 : tupleDesc = BlessTupleDesc(tupleDesc);
805 :
806 2 : uargs->tupd = tupleDesc;
807 :
808 2 : fctx->user_fctx = uargs;
809 :
810 2 : MemoryContextSwitchTo(mctx);
811 : }
812 :
813 4 : fctx = SRF_PERCALL_SETUP();
814 4 : uargs = fctx->user_fctx;
815 :
816 4 : if (fctx->call_cntr < fctx->max_calls)
817 : {
818 2 : result = bt_page_print_tuples(uargs);
819 2 : uargs->offset++;
820 2 : SRF_RETURN_NEXT(fctx, result);
821 : }
822 :
823 2 : SRF_RETURN_DONE(fctx);
824 : }
825 :
826 : /* Number of output arguments (columns) for bt_metap() */
827 : #define BT_METAP_COLS_V1_8 9
828 :
829 : /* ------------------------------------------------
830 : * bt_metap()
831 : *
832 : * Get a btree's meta-page information
833 : *
834 : * Usage: SELECT * FROM bt_metap('t1_pkey')
835 : * ------------------------------------------------
836 : */
837 : Datum
838 4 : bt_metap(PG_FUNCTION_ARGS)
839 : {
840 4 : text *relname = PG_GETARG_TEXT_PP(0);
841 : Datum result;
842 : Relation rel;
843 : RangeVar *relrv;
844 : BTMetaPageData *metad;
845 : TupleDesc tupleDesc;
846 : int j;
847 : char *values[9];
848 : Buffer buffer;
849 : Page page;
850 : HeapTuple tuple;
851 :
852 4 : if (!superuser())
853 0 : ereport(ERROR,
854 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
855 : errmsg("must be superuser to use pageinspect functions")));
856 :
857 4 : relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
858 4 : rel = relation_openrv(relrv, AccessShareLock);
859 :
860 4 : if (!IS_INDEX(rel) || !IS_BTREE(rel))
861 2 : ereport(ERROR,
862 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
863 : errmsg("\"%s\" is not a %s index",
864 : RelationGetRelationName(rel), "btree")));
865 :
866 : /*
867 : * Reject attempts to read non-local temporary relations; we would be
868 : * likely to get wrong data since we have no visibility into the owning
869 : * session's local buffers.
870 : */
871 2 : if (RELATION_IS_OTHER_TEMP(rel))
872 0 : ereport(ERROR,
873 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
874 : errmsg("cannot access temporary tables of other sessions")));
875 :
876 2 : buffer = ReadBuffer(rel, 0);
877 2 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
878 :
879 2 : page = BufferGetPage(buffer);
880 2 : metad = BTPageGetMeta(page);
881 :
882 : /* Build a tuple descriptor for our result type */
883 2 : if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
884 0 : elog(ERROR, "return type must be a row type");
885 :
886 : /*
887 : * We need a kluge here to detect API versions prior to 1.8. Earlier
888 : * versions incorrectly used int4 for certain columns.
889 : *
890 : * There is no way to reliably avoid the problems created by the old
891 : * function definition at this point, so insist that the user update the
892 : * extension.
893 : */
894 2 : if (tupleDesc->natts < BT_METAP_COLS_V1_8)
895 0 : ereport(ERROR,
896 : (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
897 : errmsg("function has wrong number of declared columns"),
898 : errhint("To resolve the problem, update the \"pageinspect\" extension to the latest version.")));
899 :
900 2 : j = 0;
901 2 : values[j++] = psprintf("%d", metad->btm_magic);
902 2 : values[j++] = psprintf("%d", metad->btm_version);
903 2 : values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_root);
904 2 : values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_level);
905 2 : values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_fastroot);
906 2 : values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_fastlevel);
907 :
908 : /*
909 : * Get values of extended metadata if available, use default values
910 : * otherwise. Note that we rely on the assumption that btm_allequalimage
911 : * is initialized to zero with indexes that were built on versions prior
912 : * to Postgres 13 (just like _bt_metaversion()).
913 : */
914 2 : if (metad->btm_version >= BTREE_NOVAC_VERSION)
915 : {
916 4 : values[j++] = psprintf(INT64_FORMAT,
917 2 : (int64) metad->btm_last_cleanup_num_delpages);
918 2 : values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
919 2 : values[j++] = metad->btm_allequalimage ? "t" : "f";
920 : }
921 : else
922 : {
923 0 : values[j++] = "0";
924 0 : values[j++] = "-1";
925 0 : values[j++] = "f";
926 : }
927 :
928 2 : tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
929 : values);
930 :
931 2 : result = HeapTupleGetDatum(tuple);
932 :
933 2 : UnlockReleaseBuffer(buffer);
934 2 : relation_close(rel, AccessShareLock);
935 :
936 2 : PG_RETURN_DATUM(result);
937 : }
|