LCOV - code coverage report
Current view: top level - src/backend/access/tablesample - system.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 65 65 100.0 %
Date: 2023-10-02 04:11:32 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * system.c
       4             :  *    support routines for SYSTEM tablesample method
       5             :  *
       6             :  * To ensure repeatability of samples, it is necessary that selection of a
       7             :  * given tuple be history-independent; otherwise syncscanning would break
       8             :  * repeatability, to say nothing of logically-irrelevant maintenance such
       9             :  * as physical extension or shortening of the relation.
      10             :  *
      11             :  * To achieve that, we proceed by hashing each candidate block number together
      12             :  * with the active seed, and then selecting it if the hash is less than the
      13             :  * cutoff value computed from the selection probability by BeginSampleScan.
      14             :  *
      15             :  *
      16             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      17             :  * Portions Copyright (c) 1994, Regents of the University of California
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/access/tablesample/system.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : 
      25             : #include "postgres.h"
      26             : 
      27             : #include <math.h>
      28             : 
      29             : #include "access/relscan.h"
      30             : #include "access/tsmapi.h"
      31             : #include "catalog/pg_type.h"
      32             : #include "common/hashfn.h"
      33             : #include "optimizer/optimizer.h"
      34             : #include "utils/builtins.h"
      35             : 
      36             : 
      37             : /* Private state */
      38             : typedef struct
      39             : {
      40             :     uint64      cutoff;         /* select blocks with hash less than this */
      41             :     uint32      seed;           /* random seed */
      42             :     BlockNumber nextblock;      /* next block to consider sampling */
      43             :     OffsetNumber lt;            /* last tuple returned from current block */
      44             : } SystemSamplerData;
      45             : 
      46             : 
      47             : static void system_samplescangetsamplesize(PlannerInfo *root,
      48             :                                            RelOptInfo *baserel,
      49             :                                            List *paramexprs,
      50             :                                            BlockNumber *pages,
      51             :                                            double *tuples);
      52             : static void system_initsamplescan(SampleScanState *node,
      53             :                                   int eflags);
      54             : static void system_beginsamplescan(SampleScanState *node,
      55             :                                    Datum *params,
      56             :                                    int nparams,
      57             :                                    uint32 seed);
      58             : static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
      59             : static OffsetNumber system_nextsampletuple(SampleScanState *node,
      60             :                                            BlockNumber blockno,
      61             :                                            OffsetNumber maxoffset);
      62             : 
      63             : 
      64             : /*
      65             :  * Create a TsmRoutine descriptor for the SYSTEM method.
      66             :  */
      67             : Datum
      68         406 : tsm_system_handler(PG_FUNCTION_ARGS)
      69             : {
      70         406 :     TsmRoutine *tsm = makeNode(TsmRoutine);
      71             : 
      72         406 :     tsm->parameterTypes = list_make1_oid(FLOAT4OID);
      73         406 :     tsm->repeatable_across_queries = true;
      74         406 :     tsm->repeatable_across_scans = true;
      75         406 :     tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
      76         406 :     tsm->InitSampleScan = system_initsamplescan;
      77         406 :     tsm->BeginSampleScan = system_beginsamplescan;
      78         406 :     tsm->NextSampleBlock = system_nextsampleblock;
      79         406 :     tsm->NextSampleTuple = system_nextsampletuple;
      80         406 :     tsm->EndSampleScan = NULL;
      81             : 
      82         406 :     PG_RETURN_POINTER(tsm);
      83             : }
      84             : 
      85             : /*
      86             :  * Sample size estimation.
      87             :  */
      88             : static void
      89          96 : system_samplescangetsamplesize(PlannerInfo *root,
      90             :                                RelOptInfo *baserel,
      91             :                                List *paramexprs,
      92             :                                BlockNumber *pages,
      93             :                                double *tuples)
      94             : {
      95             :     Node       *pctnode;
      96             :     float4      samplefract;
      97             : 
      98             :     /* Try to extract an estimate for the sample percentage */
      99          96 :     pctnode = (Node *) linitial(paramexprs);
     100          96 :     pctnode = estimate_expression_value(root, pctnode);
     101             : 
     102          96 :     if (IsA(pctnode, Const) &&
     103          84 :         !((Const *) pctnode)->constisnull)
     104             :     {
     105          78 :         samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
     106          78 :         if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
     107          66 :             samplefract /= 100.0f;
     108             :         else
     109             :         {
     110             :             /* Default samplefract if the value is bogus */
     111          12 :             samplefract = 0.1f;
     112             :         }
     113             :     }
     114             :     else
     115             :     {
     116             :         /* Default samplefract if we didn't obtain a non-null Const */
     117          18 :         samplefract = 0.1f;
     118             :     }
     119             : 
     120             :     /* We'll visit a sample of the pages ... */
     121          96 :     *pages = clamp_row_est(baserel->pages * samplefract);
     122             : 
     123             :     /* ... and hopefully get a representative number of tuples from them */
     124          96 :     *tuples = clamp_row_est(baserel->tuples * samplefract);
     125          96 : }
     126             : 
     127             : /*
     128             :  * Initialize during executor setup.
     129             :  */
     130             : static void
     131          96 : system_initsamplescan(SampleScanState *node, int eflags)
     132             : {
     133          96 :     node->tsm_state = palloc0(sizeof(SystemSamplerData));
     134          96 : }
     135             : 
     136             : /*
     137             :  * Examine parameters and prepare for a sample scan.
     138             :  */
     139             : static void
     140          84 : system_beginsamplescan(SampleScanState *node,
     141             :                        Datum *params,
     142             :                        int nparams,
     143             :                        uint32 seed)
     144             : {
     145          84 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     146          84 :     double      percent = DatumGetFloat4(params[0]);
     147             :     double      dcutoff;
     148             : 
     149          84 :     if (percent < 0 || percent > 100 || isnan(percent))
     150          12 :         ereport(ERROR,
     151             :                 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
     152             :                  errmsg("sample percentage must be between 0 and 100")));
     153             : 
     154             :     /*
     155             :      * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
     156             :      * store that as a uint64, of course.  Note that this gives strictly
     157             :      * correct behavior at the limits of zero or one probability.
     158             :      */
     159          72 :     dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
     160          72 :     sampler->cutoff = (uint64) dcutoff;
     161          72 :     sampler->seed = seed;
     162          72 :     sampler->nextblock = 0;
     163          72 :     sampler->lt = InvalidOffsetNumber;
     164             : 
     165             :     /*
     166             :      * Bulkread buffer access strategy probably makes sense unless we're
     167             :      * scanning a very small fraction of the table.  The 1% cutoff here is a
     168             :      * guess.  We should use pagemode visibility checking, since we scan all
     169             :      * tuples on each selected page.
     170             :      */
     171          72 :     node->use_bulkread = (percent >= 1);
     172          72 :     node->use_pagemode = true;
     173          72 : }
     174             : 
     175             : /*
     176             :  * Select next block to sample.
     177             :  */
     178             : static BlockNumber
     179        4326 : system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
     180             : {
     181        4326 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     182        4326 :     BlockNumber nextblock = sampler->nextblock;
     183             :     uint32      hashinput[2];
     184             : 
     185             :     /*
     186             :      * We compute the hash by applying hash_any to an array of 2 uint32's
     187             :      * containing the block number and seed.  This is efficient to set up, and
     188             :      * with the current implementation of hash_any, it gives
     189             :      * machine-independent results, which is a nice property for regression
     190             :      * testing.
     191             :      *
     192             :      * These words in the hash input are the same throughout the block:
     193             :      */
     194        4326 :     hashinput[1] = sampler->seed;
     195             : 
     196             :     /*
     197             :      * Loop over block numbers until finding suitable block or reaching end of
     198             :      * relation.
     199             :      */
     200        8532 :     for (; nextblock < nblocks; nextblock++)
     201             :     {
     202             :         uint32      hash;
     203             : 
     204        8466 :         hashinput[0] = nextblock;
     205             : 
     206        8466 :         hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
     207             :                                        (int) sizeof(hashinput)));
     208        8466 :         if (hash < sampler->cutoff)
     209        4260 :             break;
     210             :     }
     211             : 
     212        4326 :     if (nextblock < nblocks)
     213             :     {
     214             :         /* Found a suitable block; remember where we should start next time */
     215        4260 :         sampler->nextblock = nextblock + 1;
     216        4260 :         return nextblock;
     217             :     }
     218             : 
     219             :     /* Done, but let's reset nextblock to 0 for safety. */
     220          66 :     sampler->nextblock = 0;
     221          66 :     return InvalidBlockNumber;
     222             : }
     223             : 
     224             : /*
     225             :  * Select next sampled tuple in current block.
     226             :  *
     227             :  * In block sampling, we just want to sample all the tuples in each selected
     228             :  * block.
     229             :  *
     230             :  * It is OK here to return an offset without knowing if the tuple is visible
     231             :  * (or even exists); nodeSamplescan.c will deal with that.
     232             :  *
     233             :  * When we reach end of the block, return InvalidOffsetNumber which tells
     234             :  * SampleScan to go to next block.
     235             :  */
     236             : static OffsetNumber
     237      124578 : system_nextsampletuple(SampleScanState *node,
     238             :                        BlockNumber blockno,
     239             :                        OffsetNumber maxoffset)
     240             : {
     241      124578 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     242      124578 :     OffsetNumber tupoffset = sampler->lt;
     243             : 
     244             :     /* Advance to next possible offset on page */
     245      124578 :     if (tupoffset == InvalidOffsetNumber)
     246        4260 :         tupoffset = FirstOffsetNumber;
     247             :     else
     248      120318 :         tupoffset++;
     249             : 
     250             :     /* Done? */
     251      124578 :     if (tupoffset > maxoffset)
     252        4254 :         tupoffset = InvalidOffsetNumber;
     253             : 
     254      124578 :     sampler->lt = tupoffset;
     255             : 
     256      124578 :     return tupoffset;
     257             : }

Generated by: LCOV version 1.14