LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginpostinglist.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 111 129 86.0 %
Date: 2025-01-18 04:15:08 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ginpostinglist.c
       4             :  *    routines for dealing with posting lists.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/ginpostinglist.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : 
      19             : #ifdef USE_ASSERT_CHECKING
      20             : #define CHECK_ENCODING_ROUNDTRIP
      21             : #endif
      22             : 
      23             : /*
      24             :  * For encoding purposes, item pointers are represented as 64-bit unsigned
      25             :  * integers. The lowest 11 bits represent the offset number, and the next
      26             :  * lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
      27             :  * only 43 low bits are used.
      28             :  *
      29             :  * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
      30             :  * 2^11 on all supported block sizes. We are frugal with the bits, because
      31             :  * smaller integers use fewer bytes in the varbyte encoding, saving disk
      32             :  * space. (If we get a new table AM in the future that wants to use the full
      33             :  * range of possible offset numbers, we'll need to change this.)
      34             :  *
      35             :  * These 43-bit integers are encoded using varbyte encoding. In each byte,
      36             :  * the 7 low bits contain data, while the highest bit is a continuation bit.
      37             :  * When the continuation bit is set, the next byte is part of the same
      38             :  * integer, otherwise this is the last byte of this integer. 43 bits need
      39             :  * at most 7 bytes in this encoding:
      40             :  *
      41             :  * 0XXXXXXX
      42             :  * 1XXXXXXX 0XXXXYYY
      43             :  * 1XXXXXXX 1XXXXYYY 0YYYYYYY
      44             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
      45             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      46             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      47             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
      48             :  *
      49             :  * X = bits used for offset number
      50             :  * Y = bits used for block number
      51             :  * u = unused bit
      52             :  *
      53             :  * The bytes are in stored in little-endian order.
      54             :  *
      55             :  * An important property of this encoding is that removing an item from list
      56             :  * never increases the size of the resulting compressed posting list. Proof:
      57             :  *
      58             :  * Removing number is actually replacement of two numbers with their sum. We
      59             :  * have to prove that varbyte encoding of a sum can't be longer than varbyte
      60             :  * encoding of its summands. Sum of two numbers is at most one bit wider than
      61             :  * the larger of the summands. Widening a number by one bit enlarges its length
      62             :  * in varbyte encoding by at most one byte. Therefore, varbyte encoding of sum
      63             :  * is at most one byte longer than varbyte encoding of larger summand. Lesser
      64             :  * summand is at least one byte, so the sum cannot take more space than the
      65             :  * summands, Q.E.D.
      66             :  *
      67             :  * This property greatly simplifies VACUUM, which can assume that posting
      68             :  * lists always fit on the same page after vacuuming. Note that even though
      69             :  * that holds for removing items from a posting list, you must also be
      70             :  * careful to not cause expansion e.g. when merging uncompressed items on the
      71             :  * page into the compressed lists, when vacuuming.
      72             :  */
      73             : 
      74             : /*
      75             :  * How many bits do you need to encode offset number? OffsetNumber is a 16-bit
      76             :  * integer, but you can't fit that many items on a page. 11 ought to be more
      77             :  * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and
      78             :  * use the minimum number of bits, but that would require changing the on-disk
      79             :  * format if MaxHeapTuplesPerPage changes. Better to leave some slack.
      80             :  */
      81             : #define MaxHeapTuplesPerPageBits        11
      82             : 
      83             : /* Max. number of bytes needed to encode the largest supported integer. */
      84             : #define MaxBytesPerInteger              7
      85             : 
      86             : static inline uint64
      87    34229964 : itemptr_to_uint64(const ItemPointer iptr)
      88             : {
      89             :     uint64      val;
      90             : 
      91             :     Assert(ItemPointerIsValid(iptr));
      92             :     Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits));
      93             : 
      94    34229964 :     val = GinItemPointerGetBlockNumber(iptr);
      95    34229964 :     val <<= MaxHeapTuplesPerPageBits;
      96    34229964 :     val |= GinItemPointerGetOffsetNumber(iptr);
      97             : 
      98    34229964 :     return val;
      99             : }
     100             : 
     101             : static inline void
     102    34398916 : uint64_to_itemptr(uint64 val, ItemPointer iptr)
     103             : {
     104    34398916 :     GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1));
     105    34398916 :     val = val >> MaxHeapTuplesPerPageBits;
     106    34398916 :     GinItemPointerSetBlockNumber(iptr, val);
     107             : 
     108             :     Assert(ItemPointerIsValid(iptr));
     109    34398916 : }
     110             : 
     111             : /*
     112             :  * Varbyte-encode 'val' into *ptr. *ptr is incremented to next integer.
     113             :  */
     114             : static void
     115    32822514 : encode_varbyte(uint64 val, unsigned char **ptr)
     116             : {
     117    32822514 :     unsigned char *p = *ptr;
     118             : 
     119    33417788 :     while (val > 0x7F)
     120             :     {
     121      595274 :         *(p++) = 0x80 | (val & 0x7F);
     122      595274 :         val >>= 7;
     123             :     }
     124    32822514 :     *(p++) = (unsigned char) val;
     125             : 
     126    32822514 :     *ptr = p;
     127    32822514 : }
     128             : 
     129             : /*
     130             :  * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer.
     131             :  */
     132             : static uint64
     133    34398916 : decode_varbyte(unsigned char **ptr)
     134             : {
     135             :     uint64      val;
     136    34398916 :     unsigned char *p = *ptr;
     137             :     uint64      c;
     138             : 
     139             :     /* 1st byte */
     140    34398916 :     c = *(p++);
     141    34398916 :     val = c & 0x7F;
     142    34398916 :     if (c & 0x80)
     143             :     {
     144             :         /* 2nd byte */
     145     1245578 :         c = *(p++);
     146     1245578 :         val |= (c & 0x7F) << 7;
     147     1245578 :         if (c & 0x80)
     148             :         {
     149             :             /* 3rd byte */
     150       87332 :             c = *(p++);
     151       87332 :             val |= (c & 0x7F) << 14;
     152       87332 :             if (c & 0x80)
     153             :             {
     154             :                 /* 4th byte */
     155           2 :                 c = *(p++);
     156           2 :                 val |= (c & 0x7F) << 21;
     157           2 :                 if (c & 0x80)
     158             :                 {
     159             :                     /* 5th byte */
     160           2 :                     c = *(p++);
     161           2 :                     val |= (c & 0x7F) << 28;
     162           2 :                     if (c & 0x80)
     163             :                     {
     164             :                         /* 6th byte */
     165           2 :                         c = *(p++);
     166           2 :                         val |= (c & 0x7F) << 35;
     167           2 :                         if (c & 0x80)
     168             :                         {
     169             :                             /* 7th byte, should not have continuation bit */
     170           2 :                             c = *(p++);
     171           2 :                             val |= c << 42;
     172             :                             Assert((c & 0x80) == 0);
     173             :                         }
     174             :                     }
     175             :                 }
     176             :             }
     177             :         }
     178             :     }
     179             : 
     180    34398916 :     *ptr = p;
     181             : 
     182    34398916 :     return val;
     183             : }
     184             : 
     185             : /*
     186             :  * Encode a posting list.
     187             :  *
     188             :  * The encoded list is returned in a palloc'd struct, which will be at most
     189             :  * 'maxsize' bytes in size.  The number items in the returned segment is
     190             :  * returned in *nwritten. If it's not equal to nipd, not all the items fit
     191             :  * in 'maxsize', and only the first *nwritten were encoded.
     192             :  *
     193             :  * The allocated size of the returned struct is short-aligned, and the padding
     194             :  * byte at the end, if any, is zero.
     195             :  */
     196             : GinPostingList *
     197      782608 : ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
     198             :                        int *nwritten)
     199             : {
     200             :     uint64      prev;
     201      782608 :     int         totalpacked = 0;
     202             :     int         maxbytes;
     203             :     GinPostingList *result;
     204             :     unsigned char *ptr;
     205             :     unsigned char *endptr;
     206             : 
     207      782608 :     maxsize = SHORTALIGN_DOWN(maxsize);
     208             : 
     209      782608 :     result = palloc(maxsize);
     210             : 
     211      782608 :     maxbytes = maxsize - offsetof(GinPostingList, bytes);
     212             :     Assert(maxbytes > 0);
     213             : 
     214             :     /* Store the first special item */
     215      782608 :     result->first = ipd[0];
     216             : 
     217      782608 :     prev = itemptr_to_uint64(&result->first);
     218             : 
     219      782608 :     ptr = result->bytes;
     220      782608 :     endptr = result->bytes + maxbytes;
     221    33600804 :     for (totalpacked = 1; totalpacked < nipd; totalpacked++)
     222             :     {
     223    32822514 :         uint64      val = itemptr_to_uint64(&ipd[totalpacked]);
     224    32822514 :         uint64      delta = val - prev;
     225             : 
     226             :         Assert(val > prev);
     227             : 
     228    32822514 :         if (endptr - ptr >= MaxBytesPerInteger)
     229    32792852 :             encode_varbyte(delta, &ptr);
     230             :         else
     231             :         {
     232             :             /*
     233             :              * There are less than 7 bytes left. Have to check if the next
     234             :              * item fits in that space before writing it out.
     235             :              */
     236             :             unsigned char buf[MaxBytesPerInteger];
     237       29662 :             unsigned char *p = buf;
     238             : 
     239       29662 :             encode_varbyte(delta, &p);
     240       29662 :             if (p - buf > (endptr - ptr))
     241        4318 :                 break;          /* output is full */
     242             : 
     243       25344 :             memcpy(ptr, buf, p - buf);
     244       25344 :             ptr += (p - buf);
     245             :         }
     246    32818196 :         prev = val;
     247             :     }
     248      782608 :     result->nbytes = ptr - result->bytes;
     249             : 
     250             :     /*
     251             :      * If we wrote an odd number of bytes, zero out the padding byte at the
     252             :      * end.
     253             :      */
     254      782608 :     if (result->nbytes != SHORTALIGN(result->nbytes))
     255       60752 :         result->bytes[result->nbytes] = 0;
     256             : 
     257      782608 :     if (nwritten)
     258       60488 :         *nwritten = totalpacked;
     259             : 
     260             :     Assert(SizeOfGinPostingList(result) <= maxsize);
     261             : 
     262             :     /*
     263             :      * Check that the encoded segment decodes back to the original items.
     264             :      */
     265             : #if defined (CHECK_ENCODING_ROUNDTRIP)
     266             :     {
     267             :         int         ndecoded;
     268             :         ItemPointer tmp = ginPostingListDecode(result, &ndecoded);
     269             : 
     270             :         Assert(ndecoded == totalpacked);
     271             :         Assert(memcmp(tmp, ipd, ndecoded * sizeof(ItemPointerData)) == 0);
     272             :         pfree(tmp);
     273             :     }
     274             : #endif
     275             : 
     276      782608 :     return result;
     277             : }
     278             : 
     279             : /*
     280             :  * Decode a compressed posting list into an array of item pointers.
     281             :  * The number of items is returned in *ndecoded.
     282             :  */
     283             : ItemPointer
     284      621490 : ginPostingListDecode(GinPostingList *plist, int *ndecoded_out)
     285             : {
     286     1242980 :     return ginPostingListDecodeAllSegments(plist,
     287      621490 :                                            SizeOfGinPostingList(plist),
     288             :                                            ndecoded_out);
     289             : }
     290             : 
     291             : /*
     292             :  * Decode multiple posting list segments into an array of item pointers.
     293             :  * The number of items is returned in *ndecoded_out. The segments are stored
     294             :  * one after each other, with total size 'len' bytes.
     295             :  */
     296             : ItemPointer
     297      621672 : ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
     298             : {
     299             :     ItemPointer result;
     300             :     int         nallocated;
     301             :     uint64      val;
     302      621672 :     char       *endseg = ((char *) segment) + len;
     303             :     int         ndecoded;
     304             :     unsigned char *ptr;
     305             :     unsigned char *endptr;
     306             : 
     307             :     /*
     308             :      * Guess an initial size of the array.
     309             :      */
     310      621672 :     nallocated = segment->nbytes * 2 + 1;
     311      621672 :     result = palloc(nallocated * sizeof(ItemPointerData));
     312             : 
     313      621672 :     ndecoded = 0;
     314     1246514 :     while ((char *) segment < endseg)
     315             :     {
     316             :         /* enlarge output array if needed */
     317      624842 :         if (ndecoded >= nallocated)
     318             :         {
     319           0 :             nallocated *= 2;
     320           0 :             result = repalloc(result, nallocated * sizeof(ItemPointerData));
     321             :         }
     322             : 
     323             :         /* copy the first item */
     324             :         Assert(OffsetNumberIsValid(ItemPointerGetOffsetNumber(&segment->first)));
     325             :         Assert(ndecoded == 0 || ginCompareItemPointers(&segment->first, &result[ndecoded - 1]) > 0);
     326      624842 :         result[ndecoded] = segment->first;
     327      624842 :         ndecoded++;
     328             : 
     329      624842 :         val = itemptr_to_uint64(&segment->first);
     330      624842 :         ptr = segment->bytes;
     331      624842 :         endptr = segment->bytes + segment->nbytes;
     332    35023758 :         while (ptr < endptr)
     333             :         {
     334             :             /* enlarge output array if needed */
     335    34398916 :             if (ndecoded >= nallocated)
     336             :             {
     337         580 :                 nallocated *= 2;
     338         580 :                 result = repalloc(result, nallocated * sizeof(ItemPointerData));
     339             :             }
     340             : 
     341    34398916 :             val += decode_varbyte(&ptr);
     342             : 
     343    34398916 :             uint64_to_itemptr(val, &result[ndecoded]);
     344    34398916 :             ndecoded++;
     345             :         }
     346      624842 :         segment = GinNextPostingListSegment(segment);
     347             :     }
     348             : 
     349      621672 :     if (ndecoded_out)
     350      621672 :         *ndecoded_out = ndecoded;
     351      621672 :     return result;
     352             : }
     353             : 
     354             : /*
     355             :  * Add all item pointers from a bunch of posting lists to a TIDBitmap.
     356             :  */
     357             : int
     358          30 : ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
     359             :                                      TIDBitmap *tbm)
     360             : {
     361             :     int         ndecoded;
     362             :     ItemPointer items;
     363             : 
     364          30 :     items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
     365          30 :     tbm_add_tuples(tbm, items, ndecoded, false);
     366          30 :     pfree(items);
     367             : 
     368          30 :     return ndecoded;
     369             : }
     370             : 
     371             : /*
     372             :  * Merge two ordered arrays of itempointers, eliminating any duplicates.
     373             :  *
     374             :  * Returns a palloc'd array, and *nmerged is set to the number of items in
     375             :  * the result, after eliminating duplicates.
     376             :  */
     377             : ItemPointer
     378       88504 : ginMergeItemPointers(ItemPointerData *a, uint32 na,
     379             :                      ItemPointerData *b, uint32 nb,
     380             :                      int *nmerged)
     381             : {
     382             :     ItemPointerData *dst;
     383             : 
     384       88504 :     dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
     385             : 
     386             :     /*
     387             :      * If the argument arrays don't overlap, we can just append them to each
     388             :      * other.
     389             :      */
     390       88504 :     if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
     391             :     {
     392       49188 :         memcpy(dst, a, na * sizeof(ItemPointerData));
     393       49188 :         memcpy(&dst[na], b, nb * sizeof(ItemPointerData));
     394       49188 :         *nmerged = na + nb;
     395             :     }
     396       39316 :     else if (ginCompareItemPointers(&b[nb - 1], &a[0]) < 0)
     397             :     {
     398       39316 :         memcpy(dst, b, nb * sizeof(ItemPointerData));
     399       39316 :         memcpy(&dst[nb], a, na * sizeof(ItemPointerData));
     400       39316 :         *nmerged = na + nb;
     401             :     }
     402             :     else
     403             :     {
     404           0 :         ItemPointerData *dptr = dst;
     405           0 :         ItemPointerData *aptr = a;
     406           0 :         ItemPointerData *bptr = b;
     407             : 
     408           0 :         while (aptr - a < na && bptr - b < nb)
     409             :         {
     410           0 :             int         cmp = ginCompareItemPointers(aptr, bptr);
     411             : 
     412           0 :             if (cmp > 0)
     413           0 :                 *dptr++ = *bptr++;
     414           0 :             else if (cmp == 0)
     415             :             {
     416             :                 /* only keep one copy of the identical items */
     417           0 :                 *dptr++ = *bptr++;
     418           0 :                 aptr++;
     419             :             }
     420             :             else
     421           0 :                 *dptr++ = *aptr++;
     422             :         }
     423             : 
     424           0 :         while (aptr - a < na)
     425           0 :             *dptr++ = *aptr++;
     426             : 
     427           0 :         while (bptr - b < nb)
     428           0 :             *dptr++ = *bptr++;
     429             : 
     430           0 :         *nmerged = dptr - dst;
     431             :     }
     432             : 
     433       88504 :     return dst;
     434             : }

Generated by: LCOV version 1.14