Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * system.c 4 : * support routines for SYSTEM tablesample method 5 : * 6 : * To ensure repeatability of samples, it is necessary that selection of a 7 : * given tuple be history-independent; otherwise syncscanning would break 8 : * repeatability, to say nothing of logically-irrelevant maintenance such 9 : * as physical extension or shortening of the relation. 10 : * 11 : * To achieve that, we proceed by hashing each candidate block number together 12 : * with the active seed, and then selecting it if the hash is less than the 13 : * cutoff value computed from the selection probability by BeginSampleScan. 14 : * 15 : * 16 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group 17 : * Portions Copyright (c) 1994, Regents of the University of California 18 : * 19 : * IDENTIFICATION 20 : * src/backend/access/tablesample/system.c 21 : * 22 : *------------------------------------------------------------------------- 23 : */ 24 : 25 : #include "postgres.h" 26 : 27 : #include <math.h> 28 : 29 : #include "access/relscan.h" 30 : #include "access/tsmapi.h" 31 : #include "catalog/pg_type.h" 32 : #include "common/hashfn.h" 33 : #include "optimizer/optimizer.h" 34 : #include "utils/builtins.h" 35 : 36 : 37 : /* Private state */ 38 : typedef struct 39 : { 40 : uint64 cutoff; /* select blocks with hash less than this */ 41 : uint32 seed; /* random seed */ 42 : BlockNumber nextblock; /* next block to consider sampling */ 43 : OffsetNumber lt; /* last tuple returned from current block */ 44 : } SystemSamplerData; 45 : 46 : 47 : static void system_samplescangetsamplesize(PlannerInfo *root, 48 : RelOptInfo *baserel, 49 : List *paramexprs, 50 : BlockNumber *pages, 51 : double *tuples); 52 : static void system_initsamplescan(SampleScanState *node, 53 : int eflags); 54 : static void system_beginsamplescan(SampleScanState *node, 55 : Datum *params, 56 : int nparams, 57 : uint32 seed); 58 : static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks); 59 : static OffsetNumber system_nextsampletuple(SampleScanState *node, 60 : BlockNumber blockno, 61 : OffsetNumber maxoffset); 62 : 63 : 64 : /* 65 : * Create a TsmRoutine descriptor for the SYSTEM method. 66 : */ 67 : Datum 68 406 : tsm_system_handler(PG_FUNCTION_ARGS) 69 : { 70 406 : TsmRoutine *tsm = makeNode(TsmRoutine); 71 : 72 406 : tsm->parameterTypes = list_make1_oid(FLOAT4OID); 73 406 : tsm->repeatable_across_queries = true; 74 406 : tsm->repeatable_across_scans = true; 75 406 : tsm->SampleScanGetSampleSize = system_samplescangetsamplesize; 76 406 : tsm->InitSampleScan = system_initsamplescan; 77 406 : tsm->BeginSampleScan = system_beginsamplescan; 78 406 : tsm->NextSampleBlock = system_nextsampleblock; 79 406 : tsm->NextSampleTuple = system_nextsampletuple; 80 406 : tsm->EndSampleScan = NULL; 81 : 82 406 : PG_RETURN_POINTER(tsm); 83 : } 84 : 85 : /* 86 : * Sample size estimation. 87 : */ 88 : static void 89 96 : system_samplescangetsamplesize(PlannerInfo *root, 90 : RelOptInfo *baserel, 91 : List *paramexprs, 92 : BlockNumber *pages, 93 : double *tuples) 94 : { 95 : Node *pctnode; 96 : float4 samplefract; 97 : 98 : /* Try to extract an estimate for the sample percentage */ 99 96 : pctnode = (Node *) linitial(paramexprs); 100 96 : pctnode = estimate_expression_value(root, pctnode); 101 : 102 96 : if (IsA(pctnode, Const) && 103 84 : !((Const *) pctnode)->constisnull) 104 : { 105 78 : samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue); 106 78 : if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract)) 107 66 : samplefract /= 100.0f; 108 : else 109 : { 110 : /* Default samplefract if the value is bogus */ 111 12 : samplefract = 0.1f; 112 : } 113 : } 114 : else 115 : { 116 : /* Default samplefract if we didn't obtain a non-null Const */ 117 18 : samplefract = 0.1f; 118 : } 119 : 120 : /* We'll visit a sample of the pages ... */ 121 96 : *pages = clamp_row_est(baserel->pages * samplefract); 122 : 123 : /* ... and hopefully get a representative number of tuples from them */ 124 96 : *tuples = clamp_row_est(baserel->tuples * samplefract); 125 96 : } 126 : 127 : /* 128 : * Initialize during executor setup. 129 : */ 130 : static void 131 96 : system_initsamplescan(SampleScanState *node, int eflags) 132 : { 133 96 : node->tsm_state = palloc0(sizeof(SystemSamplerData)); 134 96 : } 135 : 136 : /* 137 : * Examine parameters and prepare for a sample scan. 138 : */ 139 : static void 140 84 : system_beginsamplescan(SampleScanState *node, 141 : Datum *params, 142 : int nparams, 143 : uint32 seed) 144 : { 145 84 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; 146 84 : double percent = DatumGetFloat4(params[0]); 147 : double dcutoff; 148 : 149 84 : if (percent < 0 || percent > 100 || isnan(percent)) 150 12 : ereport(ERROR, 151 : (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), 152 : errmsg("sample percentage must be between 0 and 100"))); 153 : 154 : /* 155 : * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to 156 : * store that as a uint64, of course. Note that this gives strictly 157 : * correct behavior at the limits of zero or one probability. 158 : */ 159 72 : dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100); 160 72 : sampler->cutoff = (uint64) dcutoff; 161 72 : sampler->seed = seed; 162 72 : sampler->nextblock = 0; 163 72 : sampler->lt = InvalidOffsetNumber; 164 : 165 : /* 166 : * Bulkread buffer access strategy probably makes sense unless we're 167 : * scanning a very small fraction of the table. The 1% cutoff here is a 168 : * guess. We should use pagemode visibility checking, since we scan all 169 : * tuples on each selected page. 170 : */ 171 72 : node->use_bulkread = (percent >= 1); 172 72 : node->use_pagemode = true; 173 72 : } 174 : 175 : /* 176 : * Select next block to sample. 177 : */ 178 : static BlockNumber 179 4326 : system_nextsampleblock(SampleScanState *node, BlockNumber nblocks) 180 : { 181 4326 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; 182 4326 : BlockNumber nextblock = sampler->nextblock; 183 : uint32 hashinput[2]; 184 : 185 : /* 186 : * We compute the hash by applying hash_any to an array of 2 uint32's 187 : * containing the block number and seed. This is efficient to set up, and 188 : * with the current implementation of hash_any, it gives 189 : * machine-independent results, which is a nice property for regression 190 : * testing. 191 : * 192 : * These words in the hash input are the same throughout the block: 193 : */ 194 4326 : hashinput[1] = sampler->seed; 195 : 196 : /* 197 : * Loop over block numbers until finding suitable block or reaching end of 198 : * relation. 199 : */ 200 8532 : for (; nextblock < nblocks; nextblock++) 201 : { 202 : uint32 hash; 203 : 204 8466 : hashinput[0] = nextblock; 205 : 206 8466 : hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput, 207 : (int) sizeof(hashinput))); 208 8466 : if (hash < sampler->cutoff) 209 4260 : break; 210 : } 211 : 212 4326 : if (nextblock < nblocks) 213 : { 214 : /* Found a suitable block; remember where we should start next time */ 215 4260 : sampler->nextblock = nextblock + 1; 216 4260 : return nextblock; 217 : } 218 : 219 : /* Done, but let's reset nextblock to 0 for safety. */ 220 66 : sampler->nextblock = 0; 221 66 : return InvalidBlockNumber; 222 : } 223 : 224 : /* 225 : * Select next sampled tuple in current block. 226 : * 227 : * In block sampling, we just want to sample all the tuples in each selected 228 : * block. 229 : * 230 : * It is OK here to return an offset without knowing if the tuple is visible 231 : * (or even exists); nodeSamplescan.c will deal with that. 232 : * 233 : * When we reach end of the block, return InvalidOffsetNumber which tells 234 : * SampleScan to go to next block. 235 : */ 236 : static OffsetNumber 237 124578 : system_nextsampletuple(SampleScanState *node, 238 : BlockNumber blockno, 239 : OffsetNumber maxoffset) 240 : { 241 124578 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; 242 124578 : OffsetNumber tupoffset = sampler->lt; 243 : 244 : /* Advance to next possible offset on page */ 245 124578 : if (tupoffset == InvalidOffsetNumber) 246 4260 : tupoffset = FirstOffsetNumber; 247 : else 248 120318 : tupoffset++; 249 : 250 : /* Done? */ 251 124578 : if (tupoffset > maxoffset) 252 4254 : tupoffset = InvalidOffsetNumber; 253 : 254 124578 : sampler->lt = tupoffset; 255 : 256 124578 : return tupoffset; 257 : }