LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginpostinglist.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 96.9 % 129 125
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 9 9
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * ginpostinglist.c
       4              :  *    routines for dealing with posting lists.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *          src/backend/access/gin/ginpostinglist.c
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/gin_private.h"
      18              : 
      19              : #ifdef USE_ASSERT_CHECKING
      20              : #define CHECK_ENCODING_ROUNDTRIP
      21              : #endif
      22              : 
      23              : /*
      24              :  * For encoding purposes, item pointers are represented as 64-bit unsigned
      25              :  * integers. The lowest 11 bits represent the offset number, and the next
      26              :  * lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
      27              :  * only 43 low bits are used.
      28              :  *
      29              :  * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
      30              :  * 2^11 on all supported block sizes. We are frugal with the bits, because
      31              :  * smaller integers use fewer bytes in the varbyte encoding, saving disk
      32              :  * space. (If we get a new table AM in the future that wants to use the full
      33              :  * range of possible offset numbers, we'll need to change this.)
      34              :  *
      35              :  * These 43-bit integers are encoded using varbyte encoding. In each byte,
      36              :  * the 7 low bits contain data, while the highest bit is a continuation bit.
      37              :  * When the continuation bit is set, the next byte is part of the same
      38              :  * integer, otherwise this is the last byte of this integer. 43 bits need
      39              :  * at most 7 bytes in this encoding:
      40              :  *
      41              :  * 0XXXXXXX
      42              :  * 1XXXXXXX 0XXXXYYY
      43              :  * 1XXXXXXX 1XXXXYYY 0YYYYYYY
      44              :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
      45              :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      46              :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      47              :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
      48              :  *
      49              :  * X = bits used for offset number
      50              :  * Y = bits used for block number
      51              :  * u = unused bit
      52              :  *
      53              :  * The bytes are in stored in little-endian order.
      54              :  *
      55              :  * An important property of this encoding is that removing an item from list
      56              :  * never increases the size of the resulting compressed posting list. Proof:
      57              :  *
      58              :  * Removing number is actually replacement of two numbers with their sum. We
      59              :  * have to prove that varbyte encoding of a sum can't be longer than varbyte
      60              :  * encoding of its summands. Sum of two numbers is at most one bit wider than
      61              :  * the larger of the summands. Widening a number by one bit enlarges its length
      62              :  * in varbyte encoding by at most one byte. Therefore, varbyte encoding of sum
      63              :  * is at most one byte longer than varbyte encoding of larger summand. Lesser
      64              :  * summand is at least one byte, so the sum cannot take more space than the
      65              :  * summands, Q.E.D.
      66              :  *
      67              :  * This property greatly simplifies VACUUM, which can assume that posting
      68              :  * lists always fit on the same page after vacuuming. Note that even though
      69              :  * that holds for removing items from a posting list, you must also be
      70              :  * careful to not cause expansion e.g. when merging uncompressed items on the
      71              :  * page into the compressed lists, when vacuuming.
      72              :  */
      73              : 
      74              : /*
      75              :  * How many bits do you need to encode offset number? OffsetNumber is a 16-bit
      76              :  * integer, but you can't fit that many items on a page. 11 ought to be more
      77              :  * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and
      78              :  * use the minimum number of bits, but that would require changing the on-disk
      79              :  * format if MaxHeapTuplesPerPage changes. Better to leave some slack.
      80              :  */
      81              : #define MaxHeapTuplesPerPageBits        11
      82              : 
      83              : /* Max. number of bytes needed to encode the largest supported integer. */
      84              : #define MaxBytesPerInteger              7
      85              : 
      86              : static inline uint64
      87     36765235 : itemptr_to_uint64(const ItemPointerData *iptr)
      88              : {
      89              :     uint64      val;
      90              : 
      91              :     Assert(ItemPointerIsValid(iptr));
      92              :     Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits));
      93              : 
      94     36765235 :     val = GinItemPointerGetBlockNumber(iptr);
      95     36765235 :     val <<= MaxHeapTuplesPerPageBits;
      96     36765235 :     val |= GinItemPointerGetOffsetNumber(iptr);
      97              : 
      98     36765235 :     return val;
      99              : }
     100              : 
     101              : static inline void
     102     36604916 : uint64_to_itemptr(uint64 val, ItemPointer iptr)
     103              : {
     104     36604916 :     GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1));
     105     36604916 :     val = val >> MaxHeapTuplesPerPageBits;
     106     36604916 :     GinItemPointerSetBlockNumber(iptr, val);
     107              : 
     108              :     Assert(ItemPointerIsValid(iptr));
     109     36604916 : }
     110              : 
     111              : /*
     112              :  * Varbyte-encode 'val' into *ptr. *ptr is incremented to next integer.
     113              :  */
     114              : static void
     115     35784854 : encode_varbyte(uint64 val, unsigned char **ptr)
     116              : {
     117     35784854 :     unsigned char *p = *ptr;
     118              : 
     119     41671084 :     while (val > 0x7F)
     120              :     {
     121      5886230 :         *(p++) = 0x80 | (val & 0x7F);
     122      5886230 :         val >>= 7;
     123              :     }
     124     35784854 :     *(p++) = (unsigned char) val;
     125              : 
     126     35784854 :     *ptr = p;
     127     35784854 : }
     128              : 
     129              : /*
     130              :  * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer.
     131              :  */
     132              : static uint64
     133     36604916 : decode_varbyte(unsigned char **ptr)
     134              : {
     135              :     uint64      val;
     136     36604916 :     unsigned char *p = *ptr;
     137              :     uint64      c;
     138              : 
     139              :     /* 1st byte */
     140     36604916 :     c = *(p++);
     141     36604916 :     val = c & 0x7F;
     142     36604916 :     if (c & 0x80)
     143              :     {
     144              :         /* 2nd byte */
     145      6205928 :         c = *(p++);
     146      6205928 :         val |= (c & 0x7F) << 7;
     147      6205928 :         if (c & 0x80)
     148              :         {
     149              :             /* 3rd byte */
     150        55654 :             c = *(p++);
     151        55654 :             val |= (c & 0x7F) << 14;
     152        55654 :             if (c & 0x80)
     153              :             {
     154              :                 /* 4th byte */
     155            1 :                 c = *(p++);
     156            1 :                 val |= (c & 0x7F) << 21;
     157            1 :                 if (c & 0x80)
     158              :                 {
     159              :                     /* 5th byte */
     160            1 :                     c = *(p++);
     161            1 :                     val |= (c & 0x7F) << 28;
     162            1 :                     if (c & 0x80)
     163              :                     {
     164              :                         /* 6th byte */
     165            1 :                         c = *(p++);
     166            1 :                         val |= (c & 0x7F) << 35;
     167            1 :                         if (c & 0x80)
     168              :                         {
     169              :                             /* 7th byte, should not have continuation bit */
     170            1 :                             c = *(p++);
     171            1 :                             val |= c << 42;
     172              :                             Assert((c & 0x80) == 0);
     173              :                         }
     174              :                     }
     175              :                 }
     176              :             }
     177              :         }
     178              :     }
     179              : 
     180     36604916 :     *ptr = p;
     181              : 
     182     36604916 :     return val;
     183              : }
     184              : 
     185              : /*
     186              :  * Encode a posting list.
     187              :  *
     188              :  * The encoded list is returned in a palloc'd struct, which will be at most
     189              :  * 'maxsize' bytes in size.  The number items in the returned segment is
     190              :  * returned in *nwritten. If it's not equal to nipd, not all the items fit
     191              :  * in 'maxsize', and only the first *nwritten were encoded.
     192              :  *
     193              :  * The allocated size of the returned struct is short-aligned, and the padding
     194              :  * byte at the end, if any, is zero.
     195              :  */
     196              : GinPostingList *
     197       526988 : ginCompressPostingList(const ItemPointerData *ipd, int nipd, int maxsize,
     198              :                        int *nwritten)
     199              : {
     200              :     uint64      prev;
     201       526988 :     int         totalpacked = 0;
     202              :     int         maxbytes;
     203              :     GinPostingList *result;
     204              :     unsigned char *ptr;
     205              :     unsigned char *endptr;
     206              : 
     207       526988 :     maxsize = SHORTALIGN_DOWN(maxsize);
     208              : 
     209       526988 :     result = palloc(maxsize);
     210              : 
     211       526988 :     maxbytes = maxsize - offsetof(GinPostingList, bytes);
     212              :     Assert(maxbytes > 0);
     213              : 
     214              :     /* Store the first special item */
     215       526988 :     result->first = ipd[0];
     216              : 
     217       526988 :     prev = itemptr_to_uint64(&result->first);
     218              : 
     219       526988 :     ptr = result->bytes;
     220       526988 :     endptr = result->bytes + maxbytes;
     221     36309679 :     for (totalpacked = 1; totalpacked < nipd; totalpacked++)
     222              :     {
     223     35784854 :         uint64      val = itemptr_to_uint64(&ipd[totalpacked]);
     224     35784854 :         uint64      delta = val - prev;
     225              : 
     226              :         Assert(val > prev);
     227              : 
     228     35784854 :         if (endptr - ptr >= MaxBytesPerInteger)
     229     35769996 :             encode_varbyte(delta, &ptr);
     230              :         else
     231              :         {
     232              :             /*
     233              :              * There are less than 7 bytes left. Have to check if the next
     234              :              * item fits in that space before writing it out.
     235              :              */
     236              :             unsigned char buf[MaxBytesPerInteger];
     237        14858 :             unsigned char *p = buf;
     238              : 
     239        14858 :             encode_varbyte(delta, &p);
     240        14858 :             if (p - buf > (endptr - ptr))
     241         2163 :                 break;          /* output is full */
     242              : 
     243        12695 :             memcpy(ptr, buf, p - buf);
     244        12695 :             ptr += (p - buf);
     245              :         }
     246     35782691 :         prev = val;
     247              :     }
     248       526988 :     result->nbytes = ptr - result->bytes;
     249              : 
     250              :     /*
     251              :      * If we wrote an odd number of bytes, zero out the padding byte at the
     252              :      * end.
     253              :      */
     254       526988 :     if (result->nbytes != SHORTALIGN(result->nbytes))
     255        90686 :         result->bytes[result->nbytes] = 0;
     256              : 
     257       526988 :     if (nwritten)
     258       526973 :         *nwritten = totalpacked;
     259              : 
     260              :     Assert(SizeOfGinPostingList(result) <= maxsize);
     261              : 
     262              :     /*
     263              :      * Check that the encoded segment decodes back to the original items.
     264              :      */
     265              : #if defined (CHECK_ENCODING_ROUNDTRIP)
     266              :     {
     267              :         int         ndecoded;
     268              :         ItemPointer tmp = ginPostingListDecode(result, &ndecoded);
     269              : 
     270              :         Assert(ndecoded == totalpacked);
     271              :         Assert(memcmp(tmp, ipd, ndecoded * sizeof(ItemPointerData)) == 0);
     272              :         pfree(tmp);
     273              :     }
     274              : #endif
     275              : 
     276       526988 :     return result;
     277              : }
     278              : 
     279              : /*
     280              :  * Decode a compressed posting list into an array of item pointers.
     281              :  * The number of items is returned in *ndecoded.
     282              :  */
     283              : ItemPointer
     284       416205 : ginPostingListDecode(GinPostingList *plist, int *ndecoded_out)
     285              : {
     286       832410 :     return ginPostingListDecodeAllSegments(plist,
     287       416205 :                                            SizeOfGinPostingList(plist),
     288              :                                            ndecoded_out);
     289              : }
     290              : 
     291              : /*
     292              :  * Decode multiple posting list segments into an array of item pointers.
     293              :  * The number of items is returned in *ndecoded_out. The segments are stored
     294              :  * one after each other, with total size 'len' bytes.
     295              :  */
     296              : ItemPointer
     297       451808 : ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
     298              : {
     299              :     ItemPointer result;
     300              :     int         nallocated;
     301              :     uint64      val;
     302       451808 :     char       *endseg = ((char *) segment) + len;
     303              :     int         ndecoded;
     304              :     unsigned char *ptr;
     305              :     unsigned char *endptr;
     306              : 
     307              :     /*
     308              :      * Guess an initial size of the array.
     309              :      */
     310       451808 :     nallocated = segment->nbytes * 2 + 1;
     311       451808 :     result = palloc(nallocated * sizeof(ItemPointerData));
     312              : 
     313       451808 :     ndecoded = 0;
     314       905201 :     while ((char *) segment < endseg)
     315              :     {
     316              :         /* enlarge output array if needed */
     317       453393 :         if (ndecoded >= nallocated)
     318              :         {
     319            0 :             nallocated *= 2;
     320            0 :             result = repalloc(result, nallocated * sizeof(ItemPointerData));
     321              :         }
     322              : 
     323              :         /* copy the first item */
     324              :         Assert(OffsetNumberIsValid(ItemPointerGetOffsetNumber(&segment->first)));
     325              :         Assert(ndecoded == 0 || ginCompareItemPointers(&segment->first, &result[ndecoded - 1]) > 0);
     326       453393 :         result[ndecoded] = segment->first;
     327       453393 :         ndecoded++;
     328              : 
     329       453393 :         val = itemptr_to_uint64(&segment->first);
     330       453393 :         ptr = segment->bytes;
     331       453393 :         endptr = segment->bytes + segment->nbytes;
     332     37058309 :         while (ptr < endptr)
     333              :         {
     334              :             /* enlarge output array if needed */
     335     36604916 :             if (ndecoded >= nallocated)
     336              :             {
     337          290 :                 nallocated *= 2;
     338          290 :                 result = repalloc(result, nallocated * sizeof(ItemPointerData));
     339              :             }
     340              : 
     341     36604916 :             val += decode_varbyte(&ptr);
     342              : 
     343     36604916 :             uint64_to_itemptr(val, &result[ndecoded]);
     344     36604916 :             ndecoded++;
     345              :         }
     346       453393 :         segment = GinNextPostingListSegment(segment);
     347              :     }
     348              : 
     349       451808 :     if (ndecoded_out)
     350       451808 :         *ndecoded_out = ndecoded;
     351       451808 :     return result;
     352              : }
     353              : 
     354              : /*
     355              :  * Add all item pointers from a bunch of posting lists to a TIDBitmap.
     356              :  */
     357              : int
     358           15 : ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
     359              :                                      TIDBitmap *tbm)
     360              : {
     361              :     int         ndecoded;
     362              :     ItemPointer items;
     363              : 
     364           15 :     items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
     365           15 :     tbm_add_tuples(tbm, items, ndecoded, false);
     366           15 :     pfree(items);
     367              : 
     368           15 :     return ndecoded;
     369              : }
     370              : 
     371              : /*
     372              :  * Merge two ordered arrays of itempointers, eliminating any duplicates.
     373              :  *
     374              :  * Returns a palloc'd array, and *nmerged is set to the number of items in
     375              :  * the result, after eliminating duplicates.
     376              :  */
     377              : ItemPointer
     378       177860 : ginMergeItemPointers(ItemPointerData *a, uint32 na,
     379              :                      ItemPointerData *b, uint32 nb,
     380              :                      int *nmerged)
     381              : {
     382              :     ItemPointerData *dst;
     383              : 
     384       177860 :     dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
     385              : 
     386              :     /*
     387              :      * If the argument arrays don't overlap, we can just append them to each
     388              :      * other.
     389              :      */
     390       177860 :     if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
     391              :     {
     392        54258 :         memcpy(dst, a, na * sizeof(ItemPointerData));
     393        54258 :         memcpy(&dst[na], b, nb * sizeof(ItemPointerData));
     394        54258 :         *nmerged = na + nb;
     395              :     }
     396       123602 :     else if (ginCompareItemPointers(&b[nb - 1], &a[0]) < 0)
     397              :     {
     398       118382 :         memcpy(dst, b, nb * sizeof(ItemPointerData));
     399       118382 :         memcpy(&dst[nb], a, na * sizeof(ItemPointerData));
     400       118382 :         *nmerged = na + nb;
     401              :     }
     402              :     else
     403              :     {
     404         5220 :         ItemPointerData *dptr = dst;
     405         5220 :         ItemPointerData *aptr = a;
     406         5220 :         ItemPointerData *bptr = b;
     407              : 
     408        84561 :         while (aptr - a < na && bptr - b < nb)
     409              :         {
     410        79341 :             int         cmp = ginCompareItemPointers(aptr, bptr);
     411              : 
     412        79341 :             if (cmp > 0)
     413        43936 :                 *dptr++ = *bptr++;
     414        35405 :             else if (cmp == 0)
     415              :             {
     416              :                 /* only keep one copy of the identical items */
     417            0 :                 *dptr++ = *bptr++;
     418            0 :                 aptr++;
     419              :             }
     420              :             else
     421        35405 :                 *dptr++ = *aptr++;
     422              :         }
     423              : 
     424        11650 :         while (aptr - a < na)
     425         6430 :             *dptr++ = *aptr++;
     426              : 
     427        27758 :         while (bptr - b < nb)
     428        22538 :             *dptr++ = *bptr++;
     429              : 
     430         5220 :         *nmerged = dptr - dst;
     431              :     }
     432              : 
     433       177860 :     return dst;
     434              : }
        

Generated by: LCOV version 2.0-1