Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * bernoulli.c 4 : * support routines for BERNOULLI tablesample method 5 : * 6 : * To ensure repeatability of samples, it is necessary that selection of a 7 : * given tuple be history-independent; otherwise syncscanning would break 8 : * repeatability, to say nothing of logically-irrelevant maintenance such 9 : * as physical extension or shortening of the relation. 10 : * 11 : * To achieve that, we proceed by hashing each candidate TID together with 12 : * the active seed, and then selecting it if the hash is less than the 13 : * cutoff value computed from the selection probability by BeginSampleScan. 14 : * 15 : * 16 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group 17 : * Portions Copyright (c) 1994, Regents of the University of California 18 : * 19 : * IDENTIFICATION 20 : * src/backend/access/tablesample/bernoulli.c 21 : * 22 : *------------------------------------------------------------------------- 23 : */ 24 : 25 : #include "postgres.h" 26 : 27 : #include <math.h> 28 : 29 : #include "access/tsmapi.h" 30 : #include "catalog/pg_type.h" 31 : #include "common/hashfn.h" 32 : #include "optimizer/optimizer.h" 33 : #include "utils/fmgrprotos.h" 34 : 35 : 36 : /* Private state */ 37 : typedef struct 38 : { 39 : uint64 cutoff; /* select tuples with hash less than this */ 40 : uint32 seed; /* random seed */ 41 : OffsetNumber lt; /* last tuple returned from current block */ 42 : } BernoulliSamplerData; 43 : 44 : 45 : static void bernoulli_samplescangetsamplesize(PlannerInfo *root, 46 : RelOptInfo *baserel, 47 : List *paramexprs, 48 : BlockNumber *pages, 49 : double *tuples); 50 : static void bernoulli_initsamplescan(SampleScanState *node, 51 : int eflags); 52 : static void bernoulli_beginsamplescan(SampleScanState *node, 53 : Datum *params, 54 : int nparams, 55 : uint32 seed); 56 : static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node, 57 : BlockNumber blockno, 58 : OffsetNumber maxoffset); 59 : 60 : 61 : /* 62 : * Create a TsmRoutine descriptor for the BERNOULLI method. 63 : */ 64 : Datum 65 456 : tsm_bernoulli_handler(PG_FUNCTION_ARGS) 66 : { 67 456 : TsmRoutine *tsm = makeNode(TsmRoutine); 68 : 69 456 : tsm->parameterTypes = list_make1_oid(FLOAT4OID); 70 456 : tsm->repeatable_across_queries = true; 71 456 : tsm->repeatable_across_scans = true; 72 456 : tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize; 73 456 : tsm->InitSampleScan = bernoulli_initsamplescan; 74 456 : tsm->BeginSampleScan = bernoulli_beginsamplescan; 75 456 : tsm->NextSampleBlock = NULL; 76 456 : tsm->NextSampleTuple = bernoulli_nextsampletuple; 77 456 : tsm->EndSampleScan = NULL; 78 : 79 456 : PG_RETURN_POINTER(tsm); 80 : } 81 : 82 : /* 83 : * Sample size estimation. 84 : */ 85 : static void 86 120 : bernoulli_samplescangetsamplesize(PlannerInfo *root, 87 : RelOptInfo *baserel, 88 : List *paramexprs, 89 : BlockNumber *pages, 90 : double *tuples) 91 : { 92 : Node *pctnode; 93 : float4 samplefract; 94 : 95 : /* Try to extract an estimate for the sample percentage */ 96 120 : pctnode = (Node *) linitial(paramexprs); 97 120 : pctnode = estimate_expression_value(root, pctnode); 98 : 99 120 : if (IsA(pctnode, Const) && 100 102 : !((Const *) pctnode)->constisnull) 101 : { 102 102 : samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue); 103 102 : if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract)) 104 90 : samplefract /= 100.0f; 105 : else 106 : { 107 : /* Default samplefract if the value is bogus */ 108 12 : samplefract = 0.1f; 109 : } 110 : } 111 : else 112 : { 113 : /* Default samplefract if we didn't obtain a non-null Const */ 114 18 : samplefract = 0.1f; 115 : } 116 : 117 : /* We'll visit all pages of the baserel */ 118 120 : *pages = baserel->pages; 119 : 120 120 : *tuples = clamp_row_est(baserel->tuples * samplefract); 121 120 : } 122 : 123 : /* 124 : * Initialize during executor setup. 125 : */ 126 : static void 127 120 : bernoulli_initsamplescan(SampleScanState *node, int eflags) 128 : { 129 120 : node->tsm_state = palloc0(sizeof(BernoulliSamplerData)); 130 120 : } 131 : 132 : /* 133 : * Examine parameters and prepare for a sample scan. 134 : */ 135 : static void 136 90 : bernoulli_beginsamplescan(SampleScanState *node, 137 : Datum *params, 138 : int nparams, 139 : uint32 seed) 140 : { 141 90 : BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state; 142 90 : double percent = DatumGetFloat4(params[0]); 143 : double dcutoff; 144 : 145 90 : if (percent < 0 || percent > 100 || isnan(percent)) 146 12 : ereport(ERROR, 147 : (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), 148 : errmsg("sample percentage must be between 0 and 100"))); 149 : 150 : /* 151 : * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to 152 : * store that as a uint64, of course. Note that this gives strictly 153 : * correct behavior at the limits of zero or one probability. 154 : */ 155 78 : dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100); 156 78 : sampler->cutoff = (uint64) dcutoff; 157 78 : sampler->seed = seed; 158 78 : sampler->lt = InvalidOffsetNumber; 159 : 160 : /* 161 : * Use bulkread, since we're scanning all pages. But pagemode visibility 162 : * checking is a win only at larger sampling fractions. The 25% cutoff 163 : * here is based on very limited experimentation. 164 : */ 165 78 : node->use_bulkread = true; 166 78 : node->use_pagemode = (percent >= 25); 167 78 : } 168 : 169 : /* 170 : * Select next sampled tuple in current block. 171 : * 172 : * It is OK here to return an offset without knowing if the tuple is visible 173 : * (or even exists). The reason is that we do the coinflip for every tuple 174 : * offset in the table. Since all tuples have the same probability of being 175 : * returned, it doesn't matter if we do extra coinflips for invisible tuples. 176 : * 177 : * When we reach end of the block, return InvalidOffsetNumber which tells 178 : * SampleScan to go to next block. 179 : */ 180 : static OffsetNumber 181 128832 : bernoulli_nextsampletuple(SampleScanState *node, 182 : BlockNumber blockno, 183 : OffsetNumber maxoffset) 184 : { 185 128832 : BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state; 186 128832 : OffsetNumber tupoffset = sampler->lt; 187 : uint32 hashinput[3]; 188 : 189 : /* Advance to first/next tuple in block */ 190 128832 : if (tupoffset == InvalidOffsetNumber) 191 8388 : tupoffset = FirstOffsetNumber; 192 : else 193 120444 : tupoffset++; 194 : 195 : /* 196 : * We compute the hash by applying hash_any to an array of 3 uint32's 197 : * containing the block, offset, and seed. This is efficient to set up, 198 : * and with the current implementation of hash_any, it gives 199 : * machine-independent results, which is a nice property for regression 200 : * testing. 201 : * 202 : * These words in the hash input are the same throughout the block: 203 : */ 204 128832 : hashinput[0] = blockno; 205 128832 : hashinput[2] = sampler->seed; 206 : 207 : /* 208 : * Loop over tuple offsets until finding suitable TID or reaching end of 209 : * block. 210 : */ 211 249036 : for (; tupoffset <= maxoffset; tupoffset++) 212 : { 213 : uint32 hash; 214 : 215 240648 : hashinput[1] = tupoffset; 216 : 217 240648 : hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput, 218 : (int) sizeof(hashinput))); 219 240648 : if (hash < sampler->cutoff) 220 120444 : break; 221 : } 222 : 223 128832 : if (tupoffset > maxoffset) 224 8388 : tupoffset = InvalidOffsetNumber; 225 : 226 128832 : sampler->lt = tupoffset; 227 : 228 128832 : return tupoffset; 229 : }