LCOV - code coverage report
Current view: top level - src/backend/access/tablesample - system.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 65 65 100.0 %
Date: 2019-11-13 22:07:24 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * system.c
       4             :  *    support routines for SYSTEM tablesample method
       5             :  *
       6             :  * To ensure repeatability of samples, it is necessary that selection of a
       7             :  * given tuple be history-independent; otherwise syncscanning would break
       8             :  * repeatability, to say nothing of logically-irrelevant maintenance such
       9             :  * as physical extension or shortening of the relation.
      10             :  *
      11             :  * To achieve that, we proceed by hashing each candidate block number together
      12             :  * with the active seed, and then selecting it if the hash is less than the
      13             :  * cutoff value computed from the selection probability by BeginSampleScan.
      14             :  *
      15             :  *
      16             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      17             :  * Portions Copyright (c) 1994, Regents of the University of California
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/access/tablesample/system.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : 
      25             : #include "postgres.h"
      26             : 
      27             : #include <math.h>
      28             : 
      29             : #include "access/relscan.h"
      30             : #include "access/tsmapi.h"
      31             : #include "catalog/pg_type.h"
      32             : #include "optimizer/optimizer.h"
      33             : #include "utils/builtins.h"
      34             : #include "utils/hashutils.h"
      35             : 
      36             : 
      37             : /* Private state */
      38             : typedef struct
      39             : {
      40             :     uint64      cutoff;         /* select blocks with hash less than this */
      41             :     uint32      seed;           /* random seed */
      42             :     BlockNumber nextblock;      /* next block to consider sampling */
      43             :     OffsetNumber lt;            /* last tuple returned from current block */
      44             : } SystemSamplerData;
      45             : 
      46             : 
      47             : static void system_samplescangetsamplesize(PlannerInfo *root,
      48             :                                            RelOptInfo *baserel,
      49             :                                            List *paramexprs,
      50             :                                            BlockNumber *pages,
      51             :                                            double *tuples);
      52             : static void system_initsamplescan(SampleScanState *node,
      53             :                                   int eflags);
      54             : static void system_beginsamplescan(SampleScanState *node,
      55             :                                    Datum *params,
      56             :                                    int nparams,
      57             :                                    uint32 seed);
      58             : static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
      59             : static OffsetNumber system_nextsampletuple(SampleScanState *node,
      60             :                                            BlockNumber blockno,
      61             :                                            OffsetNumber maxoffset);
      62             : 
      63             : 
      64             : /*
      65             :  * Create a TsmRoutine descriptor for the SYSTEM method.
      66             :  */
      67             : Datum
      68         272 : tsm_system_handler(PG_FUNCTION_ARGS)
      69             : {
      70         272 :     TsmRoutine *tsm = makeNode(TsmRoutine);
      71             : 
      72         272 :     tsm->parameterTypes = list_make1_oid(FLOAT4OID);
      73         272 :     tsm->repeatable_across_queries = true;
      74         272 :     tsm->repeatable_across_scans = true;
      75         272 :     tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
      76         272 :     tsm->InitSampleScan = system_initsamplescan;
      77         272 :     tsm->BeginSampleScan = system_beginsamplescan;
      78         272 :     tsm->NextSampleBlock = system_nextsampleblock;
      79         272 :     tsm->NextSampleTuple = system_nextsampletuple;
      80         272 :     tsm->EndSampleScan = NULL;
      81             : 
      82         272 :     PG_RETURN_POINTER(tsm);
      83             : }
      84             : 
      85             : /*
      86             :  * Sample size estimation.
      87             :  */
      88             : static void
      89          64 : system_samplescangetsamplesize(PlannerInfo *root,
      90             :                                RelOptInfo *baserel,
      91             :                                List *paramexprs,
      92             :                                BlockNumber *pages,
      93             :                                double *tuples)
      94             : {
      95             :     Node       *pctnode;
      96             :     float4      samplefract;
      97             : 
      98             :     /* Try to extract an estimate for the sample percentage */
      99          64 :     pctnode = (Node *) linitial(paramexprs);
     100          64 :     pctnode = estimate_expression_value(root, pctnode);
     101             : 
     102         120 :     if (IsA(pctnode, Const) &&
     103          56 :         !((Const *) pctnode)->constisnull)
     104             :     {
     105          52 :         samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
     106         104 :         if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
     107          44 :             samplefract /= 100.0f;
     108             :         else
     109             :         {
     110             :             /* Default samplefract if the value is bogus */
     111           8 :             samplefract = 0.1f;
     112             :         }
     113             :     }
     114             :     else
     115             :     {
     116             :         /* Default samplefract if we didn't obtain a non-null Const */
     117          12 :         samplefract = 0.1f;
     118             :     }
     119             : 
     120             :     /* We'll visit a sample of the pages ... */
     121          64 :     *pages = clamp_row_est(baserel->pages * samplefract);
     122             : 
     123             :     /* ... and hopefully get a representative number of tuples from them */
     124          64 :     *tuples = clamp_row_est(baserel->tuples * samplefract);
     125          64 : }
     126             : 
     127             : /*
     128             :  * Initialize during executor setup.
     129             :  */
     130             : static void
     131          64 : system_initsamplescan(SampleScanState *node, int eflags)
     132             : {
     133          64 :     node->tsm_state = palloc0(sizeof(SystemSamplerData));
     134          64 : }
     135             : 
     136             : /*
     137             :  * Examine parameters and prepare for a sample scan.
     138             :  */
     139             : static void
     140          60 : system_beginsamplescan(SampleScanState *node,
     141             :                        Datum *params,
     142             :                        int nparams,
     143             :                        uint32 seed)
     144             : {
     145          60 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     146          60 :     double      percent = DatumGetFloat4(params[0]);
     147             :     double      dcutoff;
     148             : 
     149          60 :     if (percent < 0 || percent > 100 || isnan(percent))
     150           8 :         ereport(ERROR,
     151             :                 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
     152             :                  errmsg("sample percentage must be between 0 and 100")));
     153             : 
     154             :     /*
     155             :      * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
     156             :      * store that as a uint64, of course.  Note that this gives strictly
     157             :      * correct behavior at the limits of zero or one probability.
     158             :      */
     159          52 :     dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
     160          52 :     sampler->cutoff = (uint64) dcutoff;
     161          52 :     sampler->seed = seed;
     162          52 :     sampler->nextblock = 0;
     163          52 :     sampler->lt = InvalidOffsetNumber;
     164             : 
     165             :     /*
     166             :      * Bulkread buffer access strategy probably makes sense unless we're
     167             :      * scanning a very small fraction of the table.  The 1% cutoff here is a
     168             :      * guess.  We should use pagemode visibility checking, since we scan all
     169             :      * tuples on each selected page.
     170             :      */
     171          52 :     node->use_bulkread = (percent >= 1);
     172          52 :     node->use_pagemode = true;
     173          52 : }
     174             : 
     175             : /*
     176             :  * Select next block to sample.
     177             :  */
     178             : static BlockNumber
     179        2892 : system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
     180             : {
     181        2892 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     182        2892 :     BlockNumber nextblock = sampler->nextblock;
     183             :     uint32      hashinput[2];
     184             : 
     185             :     /*
     186             :      * We compute the hash by applying hash_any to an array of 2 uint32's
     187             :      * containing the block number and seed.  This is efficient to set up, and
     188             :      * with the current implementation of hash_any, it gives
     189             :      * machine-independent results, which is a nice property for regression
     190             :      * testing.
     191             :      *
     192             :      * These words in the hash input are the same throughout the block:
     193             :      */
     194        2892 :     hashinput[1] = sampler->seed;
     195             : 
     196             :     /*
     197             :      * Loop over block numbers until finding suitable block or reaching end of
     198             :      * relation.
     199             :      */
     200        5700 :     for (; nextblock < nblocks; nextblock++)
     201             :     {
     202             :         uint32      hash;
     203             : 
     204        5656 :         hashinput[0] = nextblock;
     205             : 
     206        5656 :         hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
     207             :                                        (int) sizeof(hashinput)));
     208        5656 :         if (hash < sampler->cutoff)
     209        2848 :             break;
     210             :     }
     211             : 
     212        2892 :     if (nextblock < nblocks)
     213             :     {
     214             :         /* Found a suitable block; remember where we should start next time */
     215        2848 :         sampler->nextblock = nextblock + 1;
     216        2848 :         return nextblock;
     217             :     }
     218             : 
     219             :     /* Done, but let's reset nextblock to 0 for safety. */
     220          44 :     sampler->nextblock = 0;
     221          44 :     return InvalidBlockNumber;
     222             : }
     223             : 
     224             : /*
     225             :  * Select next sampled tuple in current block.
     226             :  *
     227             :  * In block sampling, we just want to sample all the tuples in each selected
     228             :  * block.
     229             :  *
     230             :  * It is OK here to return an offset without knowing if the tuple is visible
     231             :  * (or even exists); nodeSamplescan.c will deal with that.
     232             :  *
     233             :  * When we reach end of the block, return InvalidOffsetNumber which tells
     234             :  * SampleScan to go to next block.
     235             :  */
     236             : static OffsetNumber
     237       83080 : system_nextsampletuple(SampleScanState *node,
     238             :                        BlockNumber blockno,
     239             :                        OffsetNumber maxoffset)
     240             : {
     241       83080 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     242       83080 :     OffsetNumber tupoffset = sampler->lt;
     243             : 
     244             :     /* Advance to next possible offset on page */
     245       83080 :     if (tupoffset == InvalidOffsetNumber)
     246        2848 :         tupoffset = FirstOffsetNumber;
     247             :     else
     248       80232 :         tupoffset++;
     249             : 
     250             :     /* Done? */
     251       83080 :     if (tupoffset > maxoffset)
     252        2840 :         tupoffset = InvalidOffsetNumber;
     253             : 
     254       83080 :     sampler->lt = tupoffset;
     255             : 
     256       83080 :     return tupoffset;
     257             : }

Generated by: LCOV version 1.13