LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginpostinglist.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 106 129 82.2 %
Date: 2019-11-21 14:06:36 Functions: 8 9 88.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ginpostinglist.c
       4             :  *    routines for dealing with posting lists.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/ginpostinglist.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : 
      19             : #ifdef USE_ASSERT_CHECKING
      20             : #define CHECK_ENCODING_ROUNDTRIP
      21             : #endif
      22             : 
      23             : /*
      24             :  * For encoding purposes, item pointers are represented as 64-bit unsigned
      25             :  * integers. The lowest 11 bits represent the offset number, and the next
      26             :  * lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
      27             :  * only 43 low bits are used.
      28             :  *
      29             :  * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
      30             :  * 2^11 on all supported block sizes. We are frugal with the bits, because
      31             :  * smaller integers use fewer bytes in the varbyte encoding, saving disk
      32             :  * space. (If we get a new table AM in the future that wants to use the full
      33             :  * range of possible offset numbers, we'll need to change this.)
      34             :  *
      35             :  * These 43-bit integers are encoded using varbyte encoding. In each byte,
      36             :  * the 7 low bits contain data, while the highest bit is a continuation bit.
      37             :  * When the continuation bit is set, the next byte is part of the same
      38             :  * integer, otherwise this is the last byte of this integer. 43 bits need
      39             :  * at most 7 bytes in this encoding:
      40             :  *
      41             :  * 0XXXXXXX
      42             :  * 1XXXXXXX 0XXXXYYY
      43             :  * 1XXXXXXX 1XXXXYYY 0YYYYYYY
      44             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
      45             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      46             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      47             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
      48             :  *
      49             :  * X = bits used for offset number
      50             :  * Y = bits used for block number
      51             :  * u = unused bit
      52             :  *
      53             :  * The bytes are in stored in little-endian order.
      54             :  *
      55             :  * An important property of this encoding is that removing an item from list
      56             :  * never increases the size of the resulting compressed posting list. Proof:
      57             :  *
      58             :  * Removing number is actually replacement of two numbers with their sum. We
      59             :  * have to prove that varbyte encoding of a sum can't be longer than varbyte
      60             :  * encoding of its summands. Sum of two numbers is at most one bit wider than
      61             :  * the larger of the summands. Widening a number by one bit enlarges its length
      62             :  * in varbyte encoding by at most one byte. Therefore, varbyte encoding of sum
      63             :  * is at most one byte longer than varbyte encoding of larger summand. Lesser
      64             :  * summand is at least one byte, so the sum cannot take more space than the
      65             :  * summands, Q.E.D.
      66             :  *
      67             :  * This property greatly simplifies VACUUM, which can assume that posting
      68             :  * lists always fit on the same page after vacuuming. Note that even though
      69             :  * that holds for removing items from a posting list, you must also be
      70             :  * careful to not cause expansion e.g. when merging uncompressed items on the
      71             :  * page into the compressed lists, when vacuuming.
      72             :  */
      73             : 
      74             : /*
      75             :  * How many bits do you need to encode offset number? OffsetNumber is a 16-bit
      76             :  * integer, but you can't fit that many items on a page. 11 ought to be more
      77             :  * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and
      78             :  * use the minimum number of bits, but that would require changing the on-disk
      79             :  * format if MaxHeapTuplesPerPage changes. Better to leave some slack.
      80             :  */
      81             : #define MaxHeapTuplesPerPageBits        11
      82             : 
      83             : /* Max. number of bytes needed to encode the largest supported integer. */
      84             : #define MaxBytesPerInteger              7
      85             : 
      86             : static inline uint64
      87    27607702 : itemptr_to_uint64(const ItemPointer iptr)
      88             : {
      89             :     uint64      val;
      90             : 
      91             :     Assert(ItemPointerIsValid(iptr));
      92             :     Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits));
      93             : 
      94    27607702 :     val = GinItemPointerGetBlockNumber(iptr);
      95    27607702 :     val <<= MaxHeapTuplesPerPageBits;
      96    27607702 :     val |= GinItemPointerGetOffsetNumber(iptr);
      97             : 
      98    27607702 :     return val;
      99             : }
     100             : 
     101             : static inline void
     102    26831230 : uint64_to_itemptr(uint64 val, ItemPointer iptr)
     103             : {
     104    26831230 :     GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1));
     105    26831230 :     val = val >> MaxHeapTuplesPerPageBits;
     106    26831230 :     GinItemPointerSetBlockNumber(iptr, val);
     107             : 
     108             :     Assert(ItemPointerIsValid(iptr));
     109    26831230 : }
     110             : 
     111             : /*
     112             :  * Varbyte-encode 'val' into *ptr. *ptr is incremented to next integer.
     113             :  */
     114             : static void
     115    27190156 : encode_varbyte(uint64 val, unsigned char **ptr)
     116             : {
     117    27190156 :     unsigned char *p = *ptr;
     118             : 
     119    54793172 :     while (val > 0x7F)
     120             :     {
     121      412860 :         *(p++) = 0x80 | (val & 0x7F);
     122      412860 :         val >>= 7;
     123             :     }
     124    27190156 :     *(p++) = (unsigned char) val;
     125             : 
     126    27190156 :     *ptr = p;
     127    27190156 : }
     128             : 
     129             : /*
     130             :  * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer.
     131             :  */
     132             : static uint64
     133    26831230 : decode_varbyte(unsigned char **ptr)
     134             : {
     135             :     uint64      val;
     136    26831230 :     unsigned char *p = *ptr;
     137             :     uint64      c;
     138             : 
     139             :     /* 1st byte */
     140    26831230 :     c = *(p++);
     141    26831230 :     val = c & 0x7F;
     142    26831230 :     if (c & 0x80)
     143             :     {
     144             :         /* 2nd byte */
     145      434180 :         c = *(p++);
     146      434180 :         val |= (c & 0x7F) << 7;
     147      434180 :         if (c & 0x80)
     148             :         {
     149             :             /* 3rd byte */
     150       32518 :             c = *(p++);
     151       32518 :             val |= (c & 0x7F) << 14;
     152       32518 :             if (c & 0x80)
     153             :             {
     154             :                 /* 4th byte */
     155           2 :                 c = *(p++);
     156           2 :                 val |= (c & 0x7F) << 21;
     157           2 :                 if (c & 0x80)
     158             :                 {
     159             :                     /* 5th byte */
     160           2 :                     c = *(p++);
     161           2 :                     val |= (c & 0x7F) << 28;
     162           2 :                     if (c & 0x80)
     163             :                     {
     164             :                         /* 6th byte */
     165           2 :                         c = *(p++);
     166           2 :                         val |= (c & 0x7F) << 35;
     167           2 :                         if (c & 0x80)
     168             :                         {
     169             :                             /* 7th byte, should not have continuation bit */
     170           2 :                             c = *(p++);
     171           2 :                             val |= c << 42;
     172             :                             Assert((c & 0x80) == 0);
     173             :                         }
     174             :                     }
     175             :                 }
     176             :             }
     177             :         }
     178             :     }
     179             : 
     180    26831230 :     *ptr = p;
     181             : 
     182    26831230 :     return val;
     183             : }
     184             : 
     185             : /*
     186             :  * Encode a posting list.
     187             :  *
     188             :  * The encoded list is returned in a palloc'd struct, which will be at most
     189             :  * 'maxsize' bytes in size.  The number items in the returned segment is
     190             :  * returned in *nwritten. If it's not equal to nipd, not all the items fit
     191             :  * in 'maxsize', and only the first *nwritten were encoded.
     192             :  *
     193             :  * The allocated size of the returned struct is short-aligned, and the padding
     194             :  * byte at the end, if any, is zero.
     195             :  */
     196             : GinPostingList *
     197      270638 : ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
     198             :                        int *nwritten)
     199             : {
     200             :     uint64      prev;
     201      270638 :     int         totalpacked = 0;
     202             :     int         maxbytes;
     203             :     GinPostingList *result;
     204             :     unsigned char *ptr;
     205             :     unsigned char *endptr;
     206             : 
     207      270638 :     maxsize = SHORTALIGN_DOWN(maxsize);
     208             : 
     209      270638 :     result = palloc(maxsize);
     210             : 
     211      270638 :     maxbytes = maxsize - offsetof(GinPostingList, bytes);
     212             :     Assert(maxbytes > 0);
     213             : 
     214             :     /* Store the first special item */
     215      270638 :     result->first = ipd[0];
     216             : 
     217      270638 :     prev = itemptr_to_uint64(&result->first);
     218             : 
     219      270638 :     ptr = result->bytes;
     220      270638 :     endptr = result->bytes + maxbytes;
     221    27457892 :     for (totalpacked = 1; totalpacked < nipd; totalpacked++)
     222             :     {
     223    27190156 :         uint64      val = itemptr_to_uint64(&ipd[totalpacked]);
     224    27190156 :         uint64      delta = val - prev;
     225             : 
     226             :         Assert(val > prev);
     227             : 
     228    27190156 :         if (endptr - ptr >= MaxBytesPerInteger)
     229    27170320 :             encode_varbyte(delta, &ptr);
     230             :         else
     231             :         {
     232             :             /*
     233             :              * There are less than 7 bytes left. Have to check if the next
     234             :              * item fits in that space before writing it out.
     235             :              */
     236             :             unsigned char buf[MaxBytesPerInteger];
     237       19836 :             unsigned char *p = buf;
     238             : 
     239       19836 :             encode_varbyte(delta, &p);
     240       19836 :             if (p - buf > (endptr - ptr))
     241        2902 :                 break;          /* output is full */
     242             : 
     243       16934 :             memcpy(ptr, buf, p - buf);
     244       16934 :             ptr += (p - buf);
     245             :         }
     246    27187254 :         prev = val;
     247             :     }
     248      270638 :     result->nbytes = ptr - result->bytes;
     249             : 
     250             :     /*
     251             :      * If we wrote an odd number of bytes, zero out the padding byte at the
     252             :      * end.
     253             :      */
     254      270638 :     if (result->nbytes != SHORTALIGN(result->nbytes))
     255       43666 :         result->bytes[result->nbytes] = 0;
     256             : 
     257      270638 :     if (nwritten)
     258       45690 :         *nwritten = totalpacked;
     259             : 
     260             :     Assert(SizeOfGinPostingList(result) <= maxsize);
     261             : 
     262             :     /*
     263             :      * Check that the encoded segment decodes back to the original items.
     264             :      */
     265             : #if defined (CHECK_ENCODING_ROUNDTRIP)
     266             :     {
     267             :         int         ndecoded;
     268             :         ItemPointer tmp = ginPostingListDecode(result, &ndecoded);
     269             :         int         i;
     270             : 
     271             :         Assert(ndecoded == totalpacked);
     272             :         for (i = 0; i < ndecoded; i++)
     273             :             Assert(memcmp(&tmp[i], &ipd[i], sizeof(ItemPointerData)) == 0);
     274             :         pfree(tmp);
     275             :     }
     276             : #endif
     277             : 
     278      270638 :     return result;
     279             : }
     280             : 
     281             : /*
     282             :  * Decode a compressed posting list into an array of item pointers.
     283             :  * The number of items is returned in *ndecoded.
     284             :  */
     285             : ItemPointer
     286      145840 : ginPostingListDecode(GinPostingList *plist, int *ndecoded)
     287             : {
     288      145840 :     return ginPostingListDecodeAllSegments(plist,
     289      145840 :                                            SizeOfGinPostingList(plist),
     290             :                                            ndecoded);
     291             : }
     292             : 
     293             : /*
     294             :  * Decode multiple posting list segments into an array of item pointers.
     295             :  * The number of items is returned in *ndecoded_out. The segments are stored
     296             :  * one after each other, with total size 'len' bytes.
     297             :  */
     298             : ItemPointer
     299      145908 : ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
     300             : {
     301             :     ItemPointer result;
     302             :     int         nallocated;
     303             :     uint64      val;
     304      145908 :     char       *endseg = ((char *) segment) + len;
     305             :     int         ndecoded;
     306             :     unsigned char *ptr;
     307             :     unsigned char *endptr;
     308             : 
     309             :     /*
     310             :      * Guess an initial size of the array.
     311             :      */
     312      145908 :     nallocated = segment->nbytes * 2 + 1;
     313      145908 :     result = palloc(nallocated * sizeof(ItemPointerData));
     314             : 
     315      145908 :     ndecoded = 0;
     316      438724 :     while ((char *) segment < endseg)
     317             :     {
     318             :         /* enlarge output array if needed */
     319      146908 :         if (ndecoded >= nallocated)
     320             :         {
     321           0 :             nallocated *= 2;
     322           0 :             result = repalloc(result, nallocated * sizeof(ItemPointerData));
     323             :         }
     324             : 
     325             :         /* copy the first item */
     326             :         Assert(OffsetNumberIsValid(ItemPointerGetOffsetNumber(&segment->first)));
     327             :         Assert(ndecoded == 0 || ginCompareItemPointers(&segment->first, &result[ndecoded - 1]) > 0);
     328      146908 :         result[ndecoded] = segment->first;
     329      146908 :         ndecoded++;
     330             : 
     331      146908 :         val = itemptr_to_uint64(&segment->first);
     332      146908 :         ptr = segment->bytes;
     333      146908 :         endptr = segment->bytes + segment->nbytes;
     334    27125046 :         while (ptr < endptr)
     335             :         {
     336             :             /* enlarge output array if needed */
     337    26831230 :             if (ndecoded >= nallocated)
     338             :             {
     339         184 :                 nallocated *= 2;
     340         184 :                 result = repalloc(result, nallocated * sizeof(ItemPointerData));
     341             :             }
     342             : 
     343    26831230 :             val += decode_varbyte(&ptr);
     344             : 
     345    26831230 :             uint64_to_itemptr(val, &result[ndecoded]);
     346    26831230 :             ndecoded++;
     347             :         }
     348      146908 :         segment = GinNextPostingListSegment(segment);
     349             :     }
     350             : 
     351      145908 :     if (ndecoded_out)
     352      145908 :         *ndecoded_out = ndecoded;
     353      145908 :     return result;
     354             : }
     355             : 
     356             : /*
     357             :  * Add all item pointers from a bunch of posting lists to a TIDBitmap.
     358             :  */
     359             : int
     360           0 : ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
     361             :                                      TIDBitmap *tbm)
     362             : {
     363             :     int         ndecoded;
     364             :     ItemPointer items;
     365             : 
     366           0 :     items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
     367           0 :     tbm_add_tuples(tbm, items, ndecoded, false);
     368           0 :     pfree(items);
     369             : 
     370           0 :     return ndecoded;
     371             : }
     372             : 
     373             : /*
     374             :  * Merge two ordered arrays of itempointers, eliminating any duplicates.
     375             :  *
     376             :  * Returns a palloc'd array, and *nmerged is set to the number of items in
     377             :  * the result, after eliminating duplicates.
     378             :  */
     379             : ItemPointer
     380       67824 : ginMergeItemPointers(ItemPointerData *a, uint32 na,
     381             :                      ItemPointerData *b, uint32 nb,
     382             :                      int *nmerged)
     383             : {
     384             :     ItemPointerData *dst;
     385             : 
     386       67824 :     dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
     387             : 
     388             :     /*
     389             :      * If the argument arrays don't overlap, we can just append them to each
     390             :      * other.
     391             :      */
     392       67824 :     if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
     393             :     {
     394       42516 :         memcpy(dst, a, na * sizeof(ItemPointerData));
     395       42516 :         memcpy(&dst[na], b, nb * sizeof(ItemPointerData));
     396       42516 :         *nmerged = na + nb;
     397             :     }
     398       25308 :     else if (ginCompareItemPointers(&b[nb - 1], &a[0]) < 0)
     399             :     {
     400       25308 :         memcpy(dst, b, nb * sizeof(ItemPointerData));
     401       25308 :         memcpy(&dst[nb], a, na * sizeof(ItemPointerData));
     402       25308 :         *nmerged = na + nb;
     403             :     }
     404             :     else
     405             :     {
     406           0 :         ItemPointerData *dptr = dst;
     407           0 :         ItemPointerData *aptr = a;
     408           0 :         ItemPointerData *bptr = b;
     409             : 
     410           0 :         while (aptr - a < na && bptr - b < nb)
     411             :         {
     412           0 :             int         cmp = ginCompareItemPointers(aptr, bptr);
     413             : 
     414           0 :             if (cmp > 0)
     415           0 :                 *dptr++ = *bptr++;
     416           0 :             else if (cmp == 0)
     417             :             {
     418             :                 /* only keep one copy of the identical items */
     419           0 :                 *dptr++ = *bptr++;
     420           0 :                 aptr++;
     421             :             }
     422             :             else
     423           0 :                 *dptr++ = *aptr++;
     424             :         }
     425             : 
     426           0 :         while (aptr - a < na)
     427           0 :             *dptr++ = *aptr++;
     428             : 
     429           0 :         while (bptr - b < nb)
     430           0 :             *dptr++ = *bptr++;
     431             : 
     432           0 :         *nmerged = dptr - dst;
     433             :     }
     434             : 
     435       67824 :     return dst;
     436             : }

Generated by: LCOV version 1.13