LCOV - code coverage report
Current view: top level - src/backend/access/table - tableam.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 145 156 92.9 %
Date: 2019-11-21 14:06:36 Functions: 19 19 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*----------------------------------------------------------------------
       2             :  *
       3             :  * tableam.c
       4             :  *      Table access method routines too big to be inline functions.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/table/tableam.c
      12             :  *
      13             :  * NOTES
      14             :  *    Note that most function in here are documented in tableam.h, rather than
      15             :  *    here. That's because there's a lot of inline functions in tableam.h and
      16             :  *    it'd be harder to understand if one constantly had to switch between files.
      17             :  *
      18             :  *----------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include <math.h>
      23             : 
      24             : #include "access/heapam.h"        /* for ss_* */
      25             : #include "access/tableam.h"
      26             : #include "access/xact.h"
      27             : #include "optimizer/plancat.h"
      28             : #include "storage/bufmgr.h"
      29             : #include "storage/shmem.h"
      30             : #include "storage/smgr.h"
      31             : 
      32             : 
      33             : /* GUC variables */
      34             : char       *default_table_access_method = DEFAULT_TABLE_ACCESS_METHOD;
      35             : bool        synchronize_seqscans = true;
      36             : 
      37             : 
      38             : /* ----------------------------------------------------------------------------
      39             :  * Slot functions.
      40             :  * ----------------------------------------------------------------------------
      41             :  */
      42             : 
      43             : const TupleTableSlotOps *
      44    11750386 : table_slot_callbacks(Relation relation)
      45             : {
      46             :     const TupleTableSlotOps *tts_cb;
      47             : 
      48    11750386 :     if (relation->rd_tableam)
      49    11747118 :         tts_cb = relation->rd_tableam->slot_callbacks(relation);
      50        3268 :     else if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
      51             :     {
      52             :         /*
      53             :          * Historically FDWs expect to store heap tuples in slots. Continue
      54             :          * handing them one, to make it less painful to adapt FDWs to new
      55             :          * versions. The cost of a heap slot over a virtual slot is pretty
      56             :          * small.
      57             :          */
      58         718 :         tts_cb = &TTSOpsHeapTuple;
      59             :     }
      60             :     else
      61             :     {
      62             :         /*
      63             :          * These need to be supported, as some parts of the code (like COPY)
      64             :          * need to create slots for such relations too. It seems better to
      65             :          * centralize the knowledge that a heap slot is the right thing in
      66             :          * that case here.
      67             :          */
      68             :         Assert(relation->rd_rel->relkind == RELKIND_VIEW ||
      69             :                relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
      70        2550 :         tts_cb = &TTSOpsVirtual;
      71             :     }
      72             : 
      73    11750386 :     return tts_cb;
      74             : }
      75             : 
      76             : TupleTableSlot *
      77    11415028 : table_slot_create(Relation relation, List **reglist)
      78             : {
      79             :     const TupleTableSlotOps *tts_cb;
      80             :     TupleTableSlot *slot;
      81             : 
      82    11415028 :     tts_cb = table_slot_callbacks(relation);
      83    11415028 :     slot = MakeSingleTupleTableSlot(RelationGetDescr(relation), tts_cb);
      84             : 
      85    11415028 :     if (reglist)
      86        6164 :         *reglist = lappend(*reglist, slot);
      87             : 
      88    11415028 :     return slot;
      89             : }
      90             : 
      91             : 
      92             : /* ----------------------------------------------------------------------------
      93             :  * Table scan functions.
      94             :  * ----------------------------------------------------------------------------
      95             :  */
      96             : 
      97             : TableScanDesc
      98      129702 : table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
      99             : {
     100      129702 :     uint32      flags = SO_TYPE_SEQSCAN |
     101             :     SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE | SO_TEMP_SNAPSHOT;
     102      129702 :     Oid         relid = RelationGetRelid(relation);
     103      129702 :     Snapshot    snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
     104             : 
     105      129702 :     return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key,
     106             :                                             NULL, flags);
     107             : }
     108             : 
     109             : void
     110         176 : table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
     111             : {
     112             :     Assert(IsMVCCSnapshot(snapshot));
     113             : 
     114         176 :     RegisterSnapshot(snapshot);
     115         176 :     scan->rs_snapshot = snapshot;
     116         176 :     scan->rs_flags |= SO_TEMP_SNAPSHOT;
     117         176 : }
     118             : 
     119             : 
     120             : /* ----------------------------------------------------------------------------
     121             :  * Parallel table scan related functions.
     122             :  * ----------------------------------------------------------------------------
     123             :  */
     124             : 
     125             : Size
     126         576 : table_parallelscan_estimate(Relation rel, Snapshot snapshot)
     127             : {
     128         576 :     Size        sz = 0;
     129             : 
     130         576 :     if (IsMVCCSnapshot(snapshot))
     131         492 :         sz = add_size(sz, EstimateSnapshotSpace(snapshot));
     132             :     else
     133             :         Assert(snapshot == SnapshotAny);
     134             : 
     135         576 :     sz = add_size(sz, rel->rd_tableam->parallelscan_estimate(rel));
     136             : 
     137         576 :     return sz;
     138             : }
     139             : 
     140             : void
     141         576 : table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan,
     142             :                               Snapshot snapshot)
     143             : {
     144         576 :     Size        snapshot_off = rel->rd_tableam->parallelscan_initialize(rel, pscan);
     145             : 
     146         576 :     pscan->phs_snapshot_off = snapshot_off;
     147             : 
     148         576 :     if (IsMVCCSnapshot(snapshot))
     149             :     {
     150         492 :         SerializeSnapshot(snapshot, (char *) pscan + pscan->phs_snapshot_off);
     151         492 :         pscan->phs_snapshot_any = false;
     152             :     }
     153             :     else
     154             :     {
     155             :         Assert(snapshot == SnapshotAny);
     156          84 :         pscan->phs_snapshot_any = true;
     157             :     }
     158         576 : }
     159             : 
     160             : TableScanDesc
     161        2142 : table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
     162             : {
     163             :     Snapshot    snapshot;
     164        2142 :     uint32      flags = SO_TYPE_SEQSCAN |
     165             :     SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
     166             : 
     167             :     Assert(RelationGetRelid(relation) == parallel_scan->phs_relid);
     168             : 
     169        2142 :     if (!parallel_scan->phs_snapshot_any)
     170             :     {
     171             :         /* Snapshot was serialized -- restore it */
     172        1974 :         snapshot = RestoreSnapshot((char *) parallel_scan +
     173        1974 :                                    parallel_scan->phs_snapshot_off);
     174        1974 :         RegisterSnapshot(snapshot);
     175        1974 :         flags |= SO_TEMP_SNAPSHOT;
     176             :     }
     177             :     else
     178             :     {
     179             :         /* SnapshotAny passed by caller (not serialized) */
     180         168 :         snapshot = SnapshotAny;
     181             :     }
     182             : 
     183        2142 :     return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL,
     184             :                                             parallel_scan, flags);
     185             : }
     186             : 
     187             : 
     188             : /* ----------------------------------------------------------------------------
     189             :  * Index scan related functions.
     190             :  * ----------------------------------------------------------------------------
     191             :  */
     192             : 
     193             : /*
     194             :  * To perform that check simply start an index scan, create the necessary
     195             :  * slot, do the heap lookup, and shut everything down again. This could be
     196             :  * optimized, but is unlikely to matter from a performance POV. If there
     197             :  * frequently are live index pointers also matching a unique index key, the
     198             :  * CPU overhead of this routine is unlikely to matter.
     199             :  */
     200             : bool
     201      209654 : table_index_fetch_tuple_check(Relation rel,
     202             :                               ItemPointer tid,
     203             :                               Snapshot snapshot,
     204             :                               bool *all_dead)
     205             : {
     206             :     IndexFetchTableData *scan;
     207             :     TupleTableSlot *slot;
     208      209654 :     bool        call_again = false;
     209             :     bool        found;
     210             : 
     211      209654 :     slot = table_slot_create(rel, NULL);
     212      209654 :     scan = table_index_fetch_begin(rel);
     213      209654 :     found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again,
     214             :                                     all_dead);
     215      209654 :     table_index_fetch_end(scan);
     216      209654 :     ExecDropSingleTupleTableSlot(slot);
     217             : 
     218      209654 :     return found;
     219             : }
     220             : 
     221             : 
     222             : /* ------------------------------------------------------------------------
     223             :  * Functions for non-modifying operations on individual tuples
     224             :  * ------------------------------------------------------------------------
     225             :  */
     226             : 
     227             : void
     228         180 : table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid)
     229             : {
     230         180 :     Relation    rel = scan->rs_rd;
     231         180 :     const TableAmRoutine *tableam = rel->rd_tableam;
     232             : 
     233             :     /*
     234             :      * Since this can be called with user-supplied TID, don't trust the input
     235             :      * too much.
     236             :      */
     237         180 :     if (!tableam->tuple_tid_valid(scan, tid))
     238           0 :         ereport(ERROR,
     239             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     240             :                  errmsg("tid (%u, %u) is not valid for relation \"%s\"",
     241             :                         ItemPointerGetBlockNumberNoCheck(tid),
     242             :                         ItemPointerGetOffsetNumberNoCheck(tid),
     243             :                         RelationGetRelationName(rel))));
     244             : 
     245         180 :     tableam->tuple_get_latest_tid(scan, tid);
     246         180 : }
     247             : 
     248             : 
     249             : /* ----------------------------------------------------------------------------
     250             :  * Functions to make modifications a bit simpler.
     251             :  * ----------------------------------------------------------------------------
     252             :  */
     253             : 
     254             : /*
     255             :  * simple_table_tuple_insert - insert a tuple
     256             :  *
     257             :  * Currently, this routine differs from table_tuple_insert only in supplying a
     258             :  * default command ID and not allowing access to the speedup options.
     259             :  */
     260             : void
     261         702 : simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
     262             : {
     263         702 :     table_tuple_insert(rel, slot, GetCurrentCommandId(true), 0, NULL);
     264         702 : }
     265             : 
     266             : /*
     267             :  * simple_table_tuple_delete - delete a tuple
     268             :  *
     269             :  * This routine may be used to delete a tuple when concurrent updates of
     270             :  * the target tuple are not expected (for example, because we have a lock
     271             :  * on the relation associated with the tuple).  Any failure is reported
     272             :  * via ereport().
     273             :  */
     274             : void
     275         382 : simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
     276             : {
     277             :     TM_Result   result;
     278             :     TM_FailureData tmfd;
     279             : 
     280         382 :     result = table_tuple_delete(rel, tid,
     281             :                                 GetCurrentCommandId(true),
     282             :                                 snapshot, InvalidSnapshot,
     283             :                                 true /* wait for commit */ ,
     284             :                                 &tmfd, false /* changingPart */ );
     285             : 
     286         382 :     switch (result)
     287             :     {
     288             :         case TM_SelfModified:
     289             :             /* Tuple was already updated in current command? */
     290           0 :             elog(ERROR, "tuple already updated by self");
     291             :             break;
     292             : 
     293             :         case TM_Ok:
     294             :             /* done successfully */
     295         382 :             break;
     296             : 
     297             :         case TM_Updated:
     298           0 :             elog(ERROR, "tuple concurrently updated");
     299             :             break;
     300             : 
     301             :         case TM_Deleted:
     302           0 :             elog(ERROR, "tuple concurrently deleted");
     303             :             break;
     304             : 
     305             :         default:
     306           0 :             elog(ERROR, "unrecognized table_tuple_delete status: %u", result);
     307             :             break;
     308             :     }
     309         382 : }
     310             : 
     311             : /*
     312             :  * simple_table_tuple_update - replace a tuple
     313             :  *
     314             :  * This routine may be used to update a tuple when concurrent updates of
     315             :  * the target tuple are not expected (for example, because we have a lock
     316             :  * on the relation associated with the tuple).  Any failure is reported
     317             :  * via ereport().
     318             :  */
     319             : void
     320         214 : simple_table_tuple_update(Relation rel, ItemPointer otid,
     321             :                           TupleTableSlot *slot,
     322             :                           Snapshot snapshot,
     323             :                           bool *update_indexes)
     324             : {
     325             :     TM_Result   result;
     326             :     TM_FailureData tmfd;
     327             :     LockTupleMode lockmode;
     328             : 
     329         214 :     result = table_tuple_update(rel, otid, slot,
     330             :                                 GetCurrentCommandId(true),
     331             :                                 snapshot, InvalidSnapshot,
     332             :                                 true /* wait for commit */ ,
     333             :                                 &tmfd, &lockmode, update_indexes);
     334             : 
     335         214 :     switch (result)
     336             :     {
     337             :         case TM_SelfModified:
     338             :             /* Tuple was already updated in current command? */
     339           0 :             elog(ERROR, "tuple already updated by self");
     340             :             break;
     341             : 
     342             :         case TM_Ok:
     343             :             /* done successfully */
     344         214 :             break;
     345             : 
     346             :         case TM_Updated:
     347           0 :             elog(ERROR, "tuple concurrently updated");
     348             :             break;
     349             : 
     350             :         case TM_Deleted:
     351           0 :             elog(ERROR, "tuple concurrently deleted");
     352             :             break;
     353             : 
     354             :         default:
     355           0 :             elog(ERROR, "unrecognized table_tuple_update status: %u", result);
     356             :             break;
     357             :     }
     358             : 
     359         214 : }
     360             : 
     361             : 
     362             : /* ----------------------------------------------------------------------------
     363             :  * Helper functions to implement parallel scans for block oriented AMs.
     364             :  * ----------------------------------------------------------------------------
     365             :  */
     366             : 
     367             : Size
     368         576 : table_block_parallelscan_estimate(Relation rel)
     369             : {
     370         576 :     return sizeof(ParallelBlockTableScanDescData);
     371             : }
     372             : 
     373             : Size
     374         576 : table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
     375             : {
     376         576 :     ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan;
     377             : 
     378         576 :     bpscan->base.phs_relid = RelationGetRelid(rel);
     379         576 :     bpscan->phs_nblocks = RelationGetNumberOfBlocks(rel);
     380             :     /* compare phs_syncscan initialization to similar logic in initscan */
     381        1728 :     bpscan->base.phs_syncscan = synchronize_seqscans &&
     382        1152 :         !RelationUsesLocalBuffers(rel) &&
     383         576 :         bpscan->phs_nblocks > NBuffers / 4;
     384         576 :     SpinLockInit(&bpscan->phs_mutex);
     385         576 :     bpscan->phs_startblock = InvalidBlockNumber;
     386         576 :     pg_atomic_init_u64(&bpscan->phs_nallocated, 0);
     387             : 
     388         576 :     return sizeof(ParallelBlockTableScanDescData);
     389             : }
     390             : 
     391             : void
     392         148 : table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
     393             : {
     394         148 :     ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan;
     395             : 
     396         148 :     pg_atomic_write_u64(&bpscan->phs_nallocated, 0);
     397         148 : }
     398             : 
     399             : /*
     400             :  * find and set the scan's startblock
     401             :  *
     402             :  * Determine where the parallel seq scan should start.  This function may be
     403             :  * called many times, once by each parallel worker.  We must be careful only
     404             :  * to set the startblock once.
     405             :  */
     406             : void
     407        1424 : table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanDesc pbscan)
     408             : {
     409        1424 :     BlockNumber sync_startpage = InvalidBlockNumber;
     410             : 
     411             : retry:
     412             :     /* Grab the spinlock. */
     413        1426 :     SpinLockAcquire(&pbscan->phs_mutex);
     414             : 
     415             :     /*
     416             :      * If the scan's startblock has not yet been initialized, we must do so
     417             :      * now.  If this is not a synchronized scan, we just start at block 0, but
     418             :      * if it is a synchronized scan, we must get the starting position from
     419             :      * the synchronized scan machinery.  We can't hold the spinlock while
     420             :      * doing that, though, so release the spinlock, get the information we
     421             :      * need, and retry.  If nobody else has initialized the scan in the
     422             :      * meantime, we'll fill in the value we fetched on the second time
     423             :      * through.
     424             :      */
     425        1426 :     if (pbscan->phs_startblock == InvalidBlockNumber)
     426             :     {
     427         446 :         if (!pbscan->base.phs_syncscan)
     428         442 :             pbscan->phs_startblock = 0;
     429           4 :         else if (sync_startpage != InvalidBlockNumber)
     430           2 :             pbscan->phs_startblock = sync_startpage;
     431             :         else
     432             :         {
     433           2 :             SpinLockRelease(&pbscan->phs_mutex);
     434           2 :             sync_startpage = ss_get_location(rel, pbscan->phs_nblocks);
     435           2 :             goto retry;
     436             :         }
     437             :     }
     438        1424 :     SpinLockRelease(&pbscan->phs_mutex);
     439        1424 : }
     440             : 
     441             : /*
     442             :  * get the next page to scan
     443             :  *
     444             :  * Get the next page to scan.  Even if there are no pages left to scan,
     445             :  * another backend could have grabbed a page to scan and not yet finished
     446             :  * looking at it, so it doesn't follow that the scan is done when the first
     447             :  * backend gets an InvalidBlockNumber return.
     448             :  */
     449             : BlockNumber
     450      114008 : table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanDesc pbscan)
     451             : {
     452             :     BlockNumber page;
     453             :     uint64      nallocated;
     454             : 
     455             :     /*
     456             :      * phs_nallocated tracks how many pages have been allocated to workers
     457             :      * already.  When phs_nallocated >= rs_nblocks, all blocks have been
     458             :      * allocated.
     459             :      *
     460             :      * Because we use an atomic fetch-and-add to fetch the current value, the
     461             :      * phs_nallocated counter will exceed rs_nblocks, because workers will
     462             :      * still increment the value, when they try to allocate the next block but
     463             :      * all blocks have been allocated already. The counter must be 64 bits
     464             :      * wide because of that, to avoid wrapping around when rs_nblocks is close
     465             :      * to 2^32.
     466             :      *
     467             :      * The actual page to return is calculated by adding the counter to the
     468             :      * starting block number, modulo nblocks.
     469             :      */
     470      114008 :     nallocated = pg_atomic_fetch_add_u64(&pbscan->phs_nallocated, 1);
     471      114008 :     if (nallocated >= pbscan->phs_nblocks)
     472        1424 :         page = InvalidBlockNumber;  /* all blocks have been allocated */
     473             :     else
     474      112584 :         page = (nallocated + pbscan->phs_startblock) % pbscan->phs_nblocks;
     475             : 
     476             :     /*
     477             :      * Report scan location.  Normally, we report the current page number.
     478             :      * When we reach the end of the scan, though, we report the starting page,
     479             :      * not the ending page, just so the starting positions for later scans
     480             :      * doesn't slew backwards.  We only report the position at the end of the
     481             :      * scan once, though: subsequent callers will report nothing.
     482             :      */
     483      114008 :     if (pbscan->base.phs_syncscan)
     484             :     {
     485       17704 :         if (page != InvalidBlockNumber)
     486       17700 :             ss_report_location(rel, page);
     487           4 :         else if (nallocated == pbscan->phs_nblocks)
     488           2 :             ss_report_location(rel, pbscan->phs_startblock);
     489             :     }
     490             : 
     491      114008 :     return page;
     492             : }
     493             : 
     494             : /* ----------------------------------------------------------------------------
     495             :  * Helper functions to implement relation sizing for block oriented AMs.
     496             :  * ----------------------------------------------------------------------------
     497             :  */
     498             : 
     499             : /*
     500             :  * table_block_relation_size
     501             :  *
     502             :  * If a table AM uses the various relation forks as the sole place where data
     503             :  * is stored, and if it uses them in the expected manner (e.g. the actual data
     504             :  * is in the main fork rather than some other), it can use this implementation
     505             :  * of the relation_size callback rather than implementing its own.
     506             :  */
     507             : uint64
     508     2230100 : table_block_relation_size(Relation rel, ForkNumber forkNumber)
     509             : {
     510     2230100 :     uint64      nblocks = 0;
     511             : 
     512             :     /* Open it at the smgr level if not already done */
     513     2230100 :     RelationOpenSmgr(rel);
     514             : 
     515             :     /* InvalidForkNumber indicates returning the size for all forks */
     516     2230100 :     if (forkNumber == InvalidForkNumber)
     517             :     {
     518           0 :         for (int i = 0; i < MAX_FORKNUM; i++)
     519           0 :             nblocks += smgrnblocks(rel->rd_smgr, i);
     520             :     }
     521             :     else
     522     2230100 :         nblocks = smgrnblocks(rel->rd_smgr, forkNumber);
     523             : 
     524     2230100 :     return nblocks * BLCKSZ;
     525             : }
     526             : 
     527             : /*
     528             :  * table_block_relation_estimate_size
     529             :  *
     530             :  * This function can't be directly used as the implementation of the
     531             :  * relation_estimate_size callback, because it has a few additional parameters.
     532             :  * Instead, it is intended to be used as a helper function; the caller can
     533             :  * pass through the arguments to its relation_estimate_size function plus the
     534             :  * additional values required here.
     535             :  *
     536             :  * overhead_bytes_per_tuple should contain the approximate number of bytes
     537             :  * of storage required to store a tuple above and beyond what is required for
     538             :  * the tuple data proper. Typically, this would include things like the
     539             :  * size of the tuple header and item pointer. This is only used for query
     540             :  * planning, so a table AM where the value is not constant could choose to
     541             :  * pass a "best guess".
     542             :  *
     543             :  * usable_bytes_per_page should contain the approximate number of bytes per
     544             :  * page usable for tuple data, excluding the page header and any anticipated
     545             :  * special space.
     546             :  */
     547             : void
     548      252132 : table_block_relation_estimate_size(Relation rel, int32 *attr_widths,
     549             :                                    BlockNumber *pages, double *tuples,
     550             :                                    double *allvisfrac,
     551             :                                    Size overhead_bytes_per_tuple,
     552             :                                    Size usable_bytes_per_page)
     553             : {
     554             :     BlockNumber curpages;
     555             :     BlockNumber relpages;
     556             :     double      reltuples;
     557             :     BlockNumber relallvisible;
     558             :     double      density;
     559             : 
     560             :     /* it should have storage, so we can call the smgr */
     561      252132 :     curpages = RelationGetNumberOfBlocks(rel);
     562             : 
     563             :     /* coerce values in pg_class to more desirable types */
     564      252132 :     relpages = (BlockNumber) rel->rd_rel->relpages;
     565      252132 :     reltuples = (double) rel->rd_rel->reltuples;
     566      252132 :     relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
     567             : 
     568             :     /*
     569             :      * HACK: if the relation has never yet been vacuumed, use a minimum size
     570             :      * estimate of 10 pages.  The idea here is to avoid assuming a
     571             :      * newly-created table is really small, even if it currently is, because
     572             :      * that may not be true once some data gets loaded into it.  Once a vacuum
     573             :      * or analyze cycle has been done on it, it's more reasonable to believe
     574             :      * the size is somewhat stable.
     575             :      *
     576             :      * (Note that this is only an issue if the plan gets cached and used again
     577             :      * after the table has been filled.  What we're trying to avoid is using a
     578             :      * nestloop-type plan on a table that has grown substantially since the
     579             :      * plan was made.  Normally, autovacuum/autoanalyze will occur once enough
     580             :      * inserts have happened and cause cached-plan invalidation; but that
     581             :      * doesn't happen instantaneously, and it won't happen at all for cases
     582             :      * such as temporary tables.)
     583             :      *
     584             :      * We approximate "never vacuumed" by "has relpages = 0", which means this
     585             :      * will also fire on genuinely empty relations.  Not great, but
     586             :      * fortunately that's a seldom-seen case in the real world, and it
     587             :      * shouldn't degrade the quality of the plan too much anyway to err in
     588             :      * this direction.
     589             :      *
     590             :      * If the table has inheritance children, we don't apply this heuristic.
     591             :      * Totally empty parent tables are quite common, so we should be willing
     592             :      * to believe that they are empty.
     593             :      */
     594      252132 :     if (curpages < 10 &&
     595       92754 :         relpages == 0 &&
     596       92754 :         !rel->rd_rel->relhassubclass)
     597       90588 :         curpages = 10;
     598             : 
     599             :     /* report estimated # pages */
     600      252132 :     *pages = curpages;
     601             :     /* quick exit if rel is clearly empty */
     602      252132 :     if (curpages == 0)
     603             :     {
     604         530 :         *tuples = 0;
     605         530 :         *allvisfrac = 0;
     606         530 :         return;
     607             :     }
     608             : 
     609             :     /* estimate number of tuples from previous tuple density */
     610      251602 :     if (relpages > 0)
     611      154550 :         density = reltuples / (double) relpages;
     612             :     else
     613             :     {
     614             :         /*
     615             :          * When we have no data because the relation was truncated, estimate
     616             :          * tuple width from attribute datatypes.  We assume here that the
     617             :          * pages are completely full, which is OK for tables (since they've
     618             :          * presumably not been VACUUMed yet) but is probably an overestimate
     619             :          * for indexes.  Fortunately get_relation_info() can clamp the
     620             :          * overestimate to the parent table's size.
     621             :          *
     622             :          * Note: this code intentionally disregards alignment considerations,
     623             :          * because (a) that would be gilding the lily considering how crude
     624             :          * the estimate is, (b) it creates platform dependencies in the
     625             :          * default plans which are kind of a headache for regression testing,
     626             :          * and (c) different table AMs might use different padding schemes.
     627             :          */
     628             :         int32       tuple_width;
     629             : 
     630       97052 :         tuple_width = get_rel_data_width(rel, attr_widths);
     631       97052 :         tuple_width += overhead_bytes_per_tuple;
     632             :         /* note: integer division is intentional here */
     633       97052 :         density = usable_bytes_per_page / tuple_width;
     634             :     }
     635      251602 :     *tuples = rint(density * (double) curpages);
     636             : 
     637             :     /*
     638             :      * We use relallvisible as-is, rather than scaling it up like we do for
     639             :      * the pages and tuples counts, on the theory that any pages added since
     640             :      * the last VACUUM are most likely not marked all-visible.  But costsize.c
     641             :      * wants it converted to a fraction.
     642             :      */
     643      251602 :     if (relallvisible == 0 || curpages <= 0)
     644      149572 :         *allvisfrac = 0;
     645      102030 :     else if ((double) relallvisible >= curpages)
     646       53412 :         *allvisfrac = 1;
     647             :     else
     648       48618 :         *allvisfrac = (double) relallvisible / curpages;
     649             : }

Generated by: LCOV version 1.13