Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_relation.c
4 : * Implementation of relation statistics.
5 : *
6 : * This file contains the implementation of function relation. It is kept
7 : * separate from pgstat.c to enforce the line between the statistics access /
8 : * storage implementation and the details about individual types of
9 : * statistics.
10 : *
11 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
12 : *
13 : * IDENTIFICATION
14 : * src/backend/utils/activity/pgstat_relation.c
15 : * -------------------------------------------------------------------------
16 : */
17 :
18 : #include "postgres.h"
19 :
20 : #include "access/twophase_rmgr.h"
21 : #include "access/xact.h"
22 : #include "catalog/catalog.h"
23 : #include "utils/memutils.h"
24 : #include "utils/pgstat_internal.h"
25 : #include "utils/rel.h"
26 : #include "utils/timestamp.h"
27 :
28 :
29 : /* Record that's written to 2PC state file when pgstat state is persisted */
30 : typedef struct TwoPhasePgStatRecord
31 : {
32 : PgStat_Counter tuples_inserted; /* tuples inserted in xact */
33 : PgStat_Counter tuples_updated; /* tuples updated in xact */
34 : PgStat_Counter tuples_deleted; /* tuples deleted in xact */
35 : /* tuples i/u/d prior to truncate/drop */
36 : PgStat_Counter inserted_pre_truncdrop;
37 : PgStat_Counter updated_pre_truncdrop;
38 : PgStat_Counter deleted_pre_truncdrop;
39 : Oid id; /* table's OID */
40 : bool shared; /* is it a shared catalog? */
41 : bool truncdropped; /* was the relation truncated/dropped? */
42 : } TwoPhasePgStatRecord;
43 :
44 :
45 : static PgStat_TableStatus *pgstat_prep_relation_pending(Oid rel_id, bool isshared);
46 : static void add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level);
47 : static void ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info);
48 : static void save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop);
49 : static void restore_truncdrop_counters(PgStat_TableXactStatus *trans);
50 :
51 :
52 : /*
53 : * Copy stats between relations. This is used for things like REINDEX
54 : * CONCURRENTLY.
55 : */
56 : void
57 504 : pgstat_copy_relation_stats(Relation dst, Relation src)
58 : {
59 : PgStat_StatTabEntry *srcstats;
60 : PgStatShared_Relation *dstshstats;
61 : PgStat_EntryRef *dst_ref;
62 :
63 504 : srcstats = pgstat_fetch_stat_tabentry_ext(src->rd_rel->relisshared,
64 : RelationGetRelid(src));
65 504 : if (!srcstats)
66 288 : return;
67 :
68 216 : dst_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION,
69 216 : dst->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
70 216 : RelationGetRelid(dst),
71 : false);
72 :
73 216 : dstshstats = (PgStatShared_Relation *) dst_ref->shared_stats;
74 216 : dstshstats->stats = *srcstats;
75 :
76 216 : pgstat_unlock_entry(dst_ref);
77 : }
78 :
79 : /*
80 : * Initialize a relcache entry to count access statistics. Called whenever a
81 : * relation is opened.
82 : *
83 : * We assume that a relcache entry's pgstat_info field is zeroed by relcache.c
84 : * when the relcache entry is made; thereafter it is long-lived data.
85 : *
86 : * This does not create a reference to a stats entry in shared memory, nor
87 : * allocate memory for the pending stats. That happens in
88 : * pgstat_assoc_relation().
89 : */
90 : void
91 37901382 : pgstat_init_relation(Relation rel)
92 : {
93 37901382 : char relkind = rel->rd_rel->relkind;
94 :
95 : /*
96 : * We only count stats for relations with storage and partitioned tables
97 : */
98 37901382 : if (!RELKIND_HAS_STORAGE(relkind) && relkind != RELKIND_PARTITIONED_TABLE)
99 : {
100 172664 : rel->pgstat_enabled = false;
101 172664 : rel->pgstat_info = NULL;
102 172664 : return;
103 : }
104 :
105 37728718 : if (!pgstat_track_counts)
106 : {
107 360 : if (rel->pgstat_info)
108 20 : pgstat_unlink_relation(rel);
109 :
110 : /* We're not counting at all */
111 360 : rel->pgstat_enabled = false;
112 360 : rel->pgstat_info = NULL;
113 360 : return;
114 : }
115 :
116 37728358 : rel->pgstat_enabled = true;
117 : }
118 :
119 : /*
120 : * Prepare for statistics for this relation to be collected.
121 : *
122 : * This ensures we have a reference to the stats entry before stats can be
123 : * generated. That is important because a relation drop in another connection
124 : * could otherwise lead to the stats entry being dropped, which then later
125 : * would get recreated when flushing stats.
126 : *
127 : * This is separate from pgstat_init_relation() as it is not uncommon for
128 : * relcache entries to be opened without ever getting stats reported.
129 : */
130 : void
131 1681198 : pgstat_assoc_relation(Relation rel)
132 : {
133 : Assert(rel->pgstat_enabled);
134 : Assert(rel->pgstat_info == NULL);
135 :
136 : /* Else find or make the PgStat_TableStatus entry, and update link */
137 3362396 : rel->pgstat_info = pgstat_prep_relation_pending(RelationGetRelid(rel),
138 1681198 : rel->rd_rel->relisshared);
139 :
140 : /* don't allow link a stats to multiple relcache entries */
141 : Assert(rel->pgstat_info->relation == NULL);
142 :
143 : /* mark this relation as the owner */
144 1681198 : rel->pgstat_info->relation = rel;
145 1681198 : }
146 :
147 : /*
148 : * Break the mutual link between a relcache entry and pending stats entry.
149 : * This must be called whenever one end of the link is removed.
150 : */
151 : void
152 2640256 : pgstat_unlink_relation(Relation rel)
153 : {
154 : /* remove the link to stats info if any */
155 2640256 : if (rel->pgstat_info == NULL)
156 959058 : return;
157 :
158 : /* link sanity check */
159 : Assert(rel->pgstat_info->relation == rel);
160 1681198 : rel->pgstat_info->relation = NULL;
161 1681198 : rel->pgstat_info = NULL;
162 : }
163 :
164 : /*
165 : * Ensure that stats are dropped if transaction aborts.
166 : */
167 : void
168 129098 : pgstat_create_relation(Relation rel)
169 : {
170 129098 : pgstat_create_transactional(PGSTAT_KIND_RELATION,
171 129098 : rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
172 129098 : RelationGetRelid(rel));
173 129098 : }
174 :
175 : /*
176 : * Ensure that stats are dropped if transaction commits.
177 : */
178 : void
179 70356 : pgstat_drop_relation(Relation rel)
180 : {
181 70356 : int nest_level = GetCurrentTransactionNestLevel();
182 : PgStat_TableStatus *pgstat_info;
183 :
184 70356 : pgstat_drop_transactional(PGSTAT_KIND_RELATION,
185 70356 : rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
186 70356 : RelationGetRelid(rel));
187 :
188 70356 : if (!pgstat_should_count_relation(rel))
189 8184 : return;
190 :
191 : /*
192 : * Transactionally set counters to 0. That ensures that accesses to
193 : * pg_stat_xact_all_tables inside the transaction show 0.
194 : */
195 62172 : pgstat_info = rel->pgstat_info;
196 62172 : if (pgstat_info->trans &&
197 1212 : pgstat_info->trans->nest_level == nest_level)
198 : {
199 1206 : save_truncdrop_counters(pgstat_info->trans, true);
200 1206 : pgstat_info->trans->tuples_inserted = 0;
201 1206 : pgstat_info->trans->tuples_updated = 0;
202 1206 : pgstat_info->trans->tuples_deleted = 0;
203 : }
204 : }
205 :
206 : /*
207 : * Report that the table was just vacuumed and flush IO statistics.
208 : */
209 : void
210 117624 : pgstat_report_vacuum(Oid tableoid, bool shared,
211 : PgStat_Counter livetuples, PgStat_Counter deadtuples,
212 : TimestampTz starttime)
213 : {
214 : PgStat_EntryRef *entry_ref;
215 : PgStatShared_Relation *shtabentry;
216 : PgStat_StatTabEntry *tabentry;
217 117624 : Oid dboid = (shared ? InvalidOid : MyDatabaseId);
218 : TimestampTz ts;
219 : PgStat_Counter elapsedtime;
220 :
221 117624 : if (!pgstat_track_counts)
222 0 : return;
223 :
224 : /* Store the data in the table's hash table entry. */
225 117624 : ts = GetCurrentTimestamp();
226 117624 : elapsedtime = TimestampDifferenceMilliseconds(starttime, ts);
227 :
228 : /* block acquiring lock for the same reason as pgstat_report_autovac() */
229 117624 : entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION,
230 : dboid, tableoid, false);
231 :
232 117624 : shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats;
233 117624 : tabentry = &shtabentry->stats;
234 :
235 117624 : tabentry->live_tuples = livetuples;
236 117624 : tabentry->dead_tuples = deadtuples;
237 :
238 : /*
239 : * It is quite possible that a non-aggressive VACUUM ended up skipping
240 : * various pages, however, we'll zero the insert counter here regardless.
241 : * It's currently used only to track when we need to perform an "insert"
242 : * autovacuum, which are mainly intended to freeze newly inserted tuples.
243 : * Zeroing this may just mean we'll not try to vacuum the table again
244 : * until enough tuples have been inserted to trigger another insert
245 : * autovacuum. An anti-wraparound autovacuum will catch any persistent
246 : * stragglers.
247 : */
248 117624 : tabentry->ins_since_vacuum = 0;
249 :
250 117624 : if (AmAutoVacuumWorkerProcess())
251 : {
252 95990 : tabentry->last_autovacuum_time = ts;
253 95990 : tabentry->autovacuum_count++;
254 95990 : tabentry->total_autovacuum_time += elapsedtime;
255 : }
256 : else
257 : {
258 21634 : tabentry->last_vacuum_time = ts;
259 21634 : tabentry->vacuum_count++;
260 21634 : tabentry->total_vacuum_time += elapsedtime;
261 : }
262 :
263 117624 : pgstat_unlock_entry(entry_ref);
264 :
265 : /*
266 : * Flush IO statistics now. pgstat_report_stat() will flush IO stats,
267 : * however this will not be called until after an entire autovacuum cycle
268 : * is done -- which will likely vacuum many relations -- or until the
269 : * VACUUM command has processed all tables and committed.
270 : */
271 117624 : pgstat_flush_io(false);
272 117624 : (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
273 : }
274 :
275 : /*
276 : * Report that the table was just analyzed and flush IO statistics.
277 : *
278 : * Caller must provide new live- and dead-tuples estimates, as well as a
279 : * flag indicating whether to reset the mod_since_analyze counter.
280 : */
281 : void
282 13744 : pgstat_report_analyze(Relation rel,
283 : PgStat_Counter livetuples, PgStat_Counter deadtuples,
284 : bool resetcounter, TimestampTz starttime)
285 : {
286 : PgStat_EntryRef *entry_ref;
287 : PgStatShared_Relation *shtabentry;
288 : PgStat_StatTabEntry *tabentry;
289 13744 : Oid dboid = (rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId);
290 : TimestampTz ts;
291 : PgStat_Counter elapsedtime;
292 :
293 13744 : if (!pgstat_track_counts)
294 0 : return;
295 :
296 : /*
297 : * Unlike VACUUM, ANALYZE might be running inside a transaction that has
298 : * already inserted and/or deleted rows in the target table. ANALYZE will
299 : * have counted such rows as live or dead respectively. Because we will
300 : * report our counts of such rows at transaction end, we should subtract
301 : * off these counts from the update we're making now, else they'll be
302 : * double-counted after commit. (This approach also ensures that the
303 : * shared stats entry ends up with the right numbers if we abort instead
304 : * of committing.)
305 : *
306 : * Waste no time on partitioned tables, though.
307 : */
308 13744 : if (pgstat_should_count_relation(rel) &&
309 13690 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
310 : {
311 : PgStat_TableXactStatus *trans;
312 :
313 13156 : for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
314 : {
315 158 : livetuples -= trans->tuples_inserted - trans->tuples_deleted;
316 158 : deadtuples -= trans->tuples_updated + trans->tuples_deleted;
317 : }
318 : /* count stuff inserted by already-aborted subxacts, too */
319 12998 : deadtuples -= rel->pgstat_info->counts.delta_dead_tuples;
320 : /* Since ANALYZE's counts are estimates, we could have underflowed */
321 12998 : livetuples = Max(livetuples, 0);
322 12998 : deadtuples = Max(deadtuples, 0);
323 : }
324 :
325 : /* Store the data in the table's hash table entry. */
326 13744 : ts = GetCurrentTimestamp();
327 13744 : elapsedtime = TimestampDifferenceMilliseconds(starttime, ts);
328 :
329 : /* block acquiring lock for the same reason as pgstat_report_autovac() */
330 13744 : entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, dboid,
331 13744 : RelationGetRelid(rel),
332 : false);
333 : /* can't get dropped while accessed */
334 : Assert(entry_ref != NULL && entry_ref->shared_stats != NULL);
335 :
336 13744 : shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats;
337 13744 : tabentry = &shtabentry->stats;
338 :
339 13744 : tabentry->live_tuples = livetuples;
340 13744 : tabentry->dead_tuples = deadtuples;
341 :
342 : /*
343 : * If commanded, reset mod_since_analyze to zero. This forgets any
344 : * changes that were committed while the ANALYZE was in progress, but we
345 : * have no good way to estimate how many of those there were.
346 : */
347 13744 : if (resetcounter)
348 13694 : tabentry->mod_since_analyze = 0;
349 :
350 13744 : if (AmAutoVacuumWorkerProcess())
351 : {
352 268 : tabentry->last_autoanalyze_time = ts;
353 268 : tabentry->autoanalyze_count++;
354 268 : tabentry->total_autoanalyze_time += elapsedtime;
355 : }
356 : else
357 : {
358 13476 : tabentry->last_analyze_time = ts;
359 13476 : tabentry->analyze_count++;
360 13476 : tabentry->total_analyze_time += elapsedtime;
361 : }
362 :
363 13744 : pgstat_unlock_entry(entry_ref);
364 :
365 : /* see pgstat_report_vacuum() */
366 13744 : pgstat_flush_io(false);
367 13744 : (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
368 : }
369 :
370 : /*
371 : * count a tuple insertion of n tuples
372 : */
373 : void
374 16298302 : pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
375 : {
376 16298302 : if (pgstat_should_count_relation(rel))
377 : {
378 15964128 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
379 :
380 15964128 : ensure_tabstat_xact_level(pgstat_info);
381 15964128 : pgstat_info->trans->tuples_inserted += n;
382 : }
383 16298302 : }
384 :
385 : /*
386 : * count a tuple update
387 : */
388 : void
389 579194 : pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
390 : {
391 : Assert(!(hot && newpage));
392 :
393 579194 : if (pgstat_should_count_relation(rel))
394 : {
395 579190 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
396 :
397 579190 : ensure_tabstat_xact_level(pgstat_info);
398 579190 : pgstat_info->trans->tuples_updated++;
399 :
400 : /*
401 : * tuples_hot_updated and tuples_newpage_updated counters are
402 : * nontransactional, so just advance them
403 : */
404 579190 : if (hot)
405 270782 : pgstat_info->counts.tuples_hot_updated++;
406 308408 : else if (newpage)
407 284382 : pgstat_info->counts.tuples_newpage_updated++;
408 : }
409 579194 : }
410 :
411 : /*
412 : * count a tuple deletion
413 : */
414 : void
415 2929462 : pgstat_count_heap_delete(Relation rel)
416 : {
417 2929462 : if (pgstat_should_count_relation(rel))
418 : {
419 2929462 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
420 :
421 2929462 : ensure_tabstat_xact_level(pgstat_info);
422 2929462 : pgstat_info->trans->tuples_deleted++;
423 : }
424 2929462 : }
425 :
426 : /*
427 : * update tuple counters due to truncate
428 : */
429 : void
430 3478 : pgstat_count_truncate(Relation rel)
431 : {
432 3478 : if (pgstat_should_count_relation(rel))
433 : {
434 3478 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
435 :
436 3478 : ensure_tabstat_xact_level(pgstat_info);
437 3478 : save_truncdrop_counters(pgstat_info->trans, false);
438 3478 : pgstat_info->trans->tuples_inserted = 0;
439 3478 : pgstat_info->trans->tuples_updated = 0;
440 3478 : pgstat_info->trans->tuples_deleted = 0;
441 : }
442 3478 : }
443 :
444 : /*
445 : * update dead-tuples count
446 : *
447 : * The semantics of this are that we are reporting the nontransactional
448 : * recovery of "delta" dead tuples; so delta_dead_tuples decreases
449 : * rather than increasing, and the change goes straight into the per-table
450 : * counter, not into transactional state.
451 : */
452 : void
453 34110 : pgstat_update_heap_dead_tuples(Relation rel, int delta)
454 : {
455 34110 : if (pgstat_should_count_relation(rel))
456 : {
457 34108 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
458 :
459 34108 : pgstat_info->counts.delta_dead_tuples -= delta;
460 : }
461 34110 : }
462 :
463 : /*
464 : * Support function for the SQL-callable pgstat* functions. Returns
465 : * the collected statistics for one table or NULL. NULL doesn't mean
466 : * that the table doesn't exist, just that there are no statistics, so the
467 : * caller is better off to report ZERO instead.
468 : */
469 : PgStat_StatTabEntry *
470 8808 : pgstat_fetch_stat_tabentry(Oid relid)
471 : {
472 8808 : return pgstat_fetch_stat_tabentry_ext(IsSharedRelation(relid), relid);
473 : }
474 :
475 : /*
476 : * More efficient version of pgstat_fetch_stat_tabentry(), allowing to specify
477 : * whether the to-be-accessed table is a shared relation or not.
478 : */
479 : PgStat_StatTabEntry *
480 361754 : pgstat_fetch_stat_tabentry_ext(bool shared, Oid reloid)
481 : {
482 361754 : Oid dboid = (shared ? InvalidOid : MyDatabaseId);
483 :
484 361754 : return (PgStat_StatTabEntry *)
485 361754 : pgstat_fetch_entry(PGSTAT_KIND_RELATION, dboid, reloid);
486 : }
487 :
488 : /*
489 : * find any existing PgStat_TableStatus entry for rel
490 : *
491 : * Find any existing PgStat_TableStatus entry for rel_id in the current
492 : * database. If not found, try finding from shared tables.
493 : *
494 : * If an entry is found, copy it and increment the copy's counters with their
495 : * subtransaction counterparts, then return the copy. The caller may need to
496 : * pfree() the copy.
497 : *
498 : * If no entry found, return NULL, don't create a new one.
499 : */
500 : PgStat_TableStatus *
501 48 : find_tabstat_entry(Oid rel_id)
502 : {
503 : PgStat_EntryRef *entry_ref;
504 : PgStat_TableXactStatus *trans;
505 48 : PgStat_TableStatus *tabentry = NULL;
506 48 : PgStat_TableStatus *tablestatus = NULL;
507 :
508 48 : entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, MyDatabaseId, rel_id);
509 48 : if (!entry_ref)
510 : {
511 12 : entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, InvalidOid, rel_id);
512 12 : if (!entry_ref)
513 12 : return tablestatus;
514 : }
515 :
516 36 : tabentry = (PgStat_TableStatus *) entry_ref->pending;
517 36 : tablestatus = palloc(sizeof(PgStat_TableStatus));
518 36 : *tablestatus = *tabentry;
519 :
520 : /*
521 : * Reset tablestatus->trans in the copy of PgStat_TableStatus as it may
522 : * point to a shared memory area. Its data is saved below, so removing it
523 : * does not matter.
524 : */
525 36 : tablestatus->trans = NULL;
526 :
527 : /*
528 : * Live subtransaction counts are not included yet. This is not a hot
529 : * code path so reconcile tuples_inserted, tuples_updated and
530 : * tuples_deleted even if the caller may not be interested in this data.
531 : */
532 84 : for (trans = tabentry->trans; trans != NULL; trans = trans->upper)
533 : {
534 48 : tablestatus->counts.tuples_inserted += trans->tuples_inserted;
535 48 : tablestatus->counts.tuples_updated += trans->tuples_updated;
536 48 : tablestatus->counts.tuples_deleted += trans->tuples_deleted;
537 : }
538 :
539 36 : return tablestatus;
540 : }
541 :
542 : /*
543 : * Perform relation stats specific end-of-transaction work. Helper for
544 : * AtEOXact_PgStat.
545 : *
546 : * Transfer transactional insert/update counts into the base tabstat entries.
547 : * We don't bother to free any of the transactional state, since it's all in
548 : * TopTransactionContext and will go away anyway.
549 : */
550 : void
551 238548 : AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit)
552 : {
553 : PgStat_TableXactStatus *trans;
554 :
555 888902 : for (trans = xact_state->first; trans != NULL; trans = trans->next)
556 : {
557 : PgStat_TableStatus *tabstat;
558 :
559 : Assert(trans->nest_level == 1);
560 : Assert(trans->upper == NULL);
561 650354 : tabstat = trans->parent;
562 : Assert(tabstat->trans == trans);
563 : /* restore pre-truncate/drop stats (if any) in case of aborted xact */
564 650354 : if (!isCommit)
565 21656 : restore_truncdrop_counters(trans);
566 : /* count attempted actions regardless of commit/abort */
567 650354 : tabstat->counts.tuples_inserted += trans->tuples_inserted;
568 650354 : tabstat->counts.tuples_updated += trans->tuples_updated;
569 650354 : tabstat->counts.tuples_deleted += trans->tuples_deleted;
570 650354 : if (isCommit)
571 : {
572 628698 : tabstat->counts.truncdropped = trans->truncdropped;
573 628698 : if (trans->truncdropped)
574 : {
575 : /* forget live/dead stats seen by backend thus far */
576 4292 : tabstat->counts.delta_live_tuples = 0;
577 4292 : tabstat->counts.delta_dead_tuples = 0;
578 : }
579 : /* insert adds a live tuple, delete removes one */
580 628698 : tabstat->counts.delta_live_tuples +=
581 628698 : trans->tuples_inserted - trans->tuples_deleted;
582 : /* update and delete each create a dead tuple */
583 628698 : tabstat->counts.delta_dead_tuples +=
584 628698 : trans->tuples_updated + trans->tuples_deleted;
585 : /* insert, update, delete each count as one change event */
586 628698 : tabstat->counts.changed_tuples +=
587 628698 : trans->tuples_inserted + trans->tuples_updated +
588 628698 : trans->tuples_deleted;
589 : }
590 : else
591 : {
592 : /* inserted tuples are dead, deleted tuples are unaffected */
593 21656 : tabstat->counts.delta_dead_tuples +=
594 21656 : trans->tuples_inserted + trans->tuples_updated;
595 : /* an aborted xact generates no changed_tuple events */
596 : }
597 650354 : tabstat->trans = NULL;
598 : }
599 238548 : }
600 :
601 : /*
602 : * Perform relation stats specific end-of-sub-transaction work. Helper for
603 : * AtEOSubXact_PgStat.
604 : *
605 : * Transfer transactional insert/update counts into the next higher
606 : * subtransaction state.
607 : */
608 : void
609 8464 : AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth)
610 : {
611 : PgStat_TableXactStatus *trans;
612 : PgStat_TableXactStatus *next_trans;
613 :
614 17546 : for (trans = xact_state->first; trans != NULL; trans = next_trans)
615 : {
616 : PgStat_TableStatus *tabstat;
617 :
618 9082 : next_trans = trans->next;
619 : Assert(trans->nest_level == nestDepth);
620 9082 : tabstat = trans->parent;
621 : Assert(tabstat->trans == trans);
622 :
623 9082 : if (isCommit)
624 : {
625 7556 : if (trans->upper && trans->upper->nest_level == nestDepth - 1)
626 : {
627 5280 : if (trans->truncdropped)
628 : {
629 : /* propagate the truncate/drop status one level up */
630 24 : save_truncdrop_counters(trans->upper, false);
631 : /* replace upper xact stats with ours */
632 24 : trans->upper->tuples_inserted = trans->tuples_inserted;
633 24 : trans->upper->tuples_updated = trans->tuples_updated;
634 24 : trans->upper->tuples_deleted = trans->tuples_deleted;
635 : }
636 : else
637 : {
638 5256 : trans->upper->tuples_inserted += trans->tuples_inserted;
639 5256 : trans->upper->tuples_updated += trans->tuples_updated;
640 5256 : trans->upper->tuples_deleted += trans->tuples_deleted;
641 : }
642 5280 : tabstat->trans = trans->upper;
643 5280 : pfree(trans);
644 : }
645 : else
646 : {
647 : /*
648 : * When there isn't an immediate parent state, we can just
649 : * reuse the record instead of going through a palloc/pfree
650 : * pushup (this works since it's all in TopTransactionContext
651 : * anyway). We have to re-link it into the parent level,
652 : * though, and that might mean pushing a new entry into the
653 : * pgStatXactStack.
654 : */
655 : PgStat_SubXactStatus *upper_xact_state;
656 :
657 2276 : upper_xact_state = pgstat_get_xact_stack_level(nestDepth - 1);
658 2276 : trans->next = upper_xact_state->first;
659 2276 : upper_xact_state->first = trans;
660 2276 : trans->nest_level = nestDepth - 1;
661 : }
662 : }
663 : else
664 : {
665 : /*
666 : * On abort, update top-level tabstat counts, then forget the
667 : * subtransaction
668 : */
669 :
670 : /* first restore values obliterated by truncate/drop */
671 1526 : restore_truncdrop_counters(trans);
672 : /* count attempted actions regardless of commit/abort */
673 1526 : tabstat->counts.tuples_inserted += trans->tuples_inserted;
674 1526 : tabstat->counts.tuples_updated += trans->tuples_updated;
675 1526 : tabstat->counts.tuples_deleted += trans->tuples_deleted;
676 : /* inserted tuples are dead, deleted tuples are unaffected */
677 1526 : tabstat->counts.delta_dead_tuples +=
678 1526 : trans->tuples_inserted + trans->tuples_updated;
679 1526 : tabstat->trans = trans->upper;
680 1526 : pfree(trans);
681 : }
682 : }
683 8464 : }
684 :
685 : /*
686 : * Generate 2PC records for all the pending transaction-dependent relation
687 : * stats.
688 : */
689 : void
690 716 : AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
691 : {
692 : PgStat_TableXactStatus *trans;
693 :
694 1580 : for (trans = xact_state->first; trans != NULL; trans = trans->next)
695 : {
696 : PgStat_TableStatus *tabstat PG_USED_FOR_ASSERTS_ONLY;
697 : TwoPhasePgStatRecord record;
698 :
699 : Assert(trans->nest_level == 1);
700 : Assert(trans->upper == NULL);
701 864 : tabstat = trans->parent;
702 : Assert(tabstat->trans == trans);
703 :
704 864 : record.tuples_inserted = trans->tuples_inserted;
705 864 : record.tuples_updated = trans->tuples_updated;
706 864 : record.tuples_deleted = trans->tuples_deleted;
707 864 : record.inserted_pre_truncdrop = trans->inserted_pre_truncdrop;
708 864 : record.updated_pre_truncdrop = trans->updated_pre_truncdrop;
709 864 : record.deleted_pre_truncdrop = trans->deleted_pre_truncdrop;
710 864 : record.id = tabstat->id;
711 864 : record.shared = tabstat->shared;
712 864 : record.truncdropped = trans->truncdropped;
713 :
714 864 : RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0,
715 : &record, sizeof(TwoPhasePgStatRecord));
716 : }
717 716 : }
718 :
719 : /*
720 : * All we need do here is unlink the transaction stats state from the
721 : * nontransactional state. The nontransactional action counts will be
722 : * reported to the stats system immediately, while the effects on live and
723 : * dead tuple counts are preserved in the 2PC state file.
724 : *
725 : * Note: AtEOXact_PgStat_Relations is not called during PREPARE.
726 : */
727 : void
728 716 : PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
729 : {
730 : PgStat_TableXactStatus *trans;
731 :
732 1580 : for (trans = xact_state->first; trans != NULL; trans = trans->next)
733 : {
734 : PgStat_TableStatus *tabstat;
735 :
736 864 : tabstat = trans->parent;
737 864 : tabstat->trans = NULL;
738 : }
739 716 : }
740 :
741 : /*
742 : * 2PC processing routine for COMMIT PREPARED case.
743 : *
744 : * Load the saved counts into our local pgstats state.
745 : */
746 : void
747 760 : pgstat_twophase_postcommit(TransactionId xid, uint16 info,
748 : void *recdata, uint32 len)
749 : {
750 760 : TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
751 : PgStat_TableStatus *pgstat_info;
752 :
753 : /* Find or create a tabstat entry for the rel */
754 760 : pgstat_info = pgstat_prep_relation_pending(rec->id, rec->shared);
755 :
756 : /* Same math as in AtEOXact_PgStat, commit case */
757 760 : pgstat_info->counts.tuples_inserted += rec->tuples_inserted;
758 760 : pgstat_info->counts.tuples_updated += rec->tuples_updated;
759 760 : pgstat_info->counts.tuples_deleted += rec->tuples_deleted;
760 760 : pgstat_info->counts.truncdropped = rec->truncdropped;
761 760 : if (rec->truncdropped)
762 : {
763 : /* forget live/dead stats seen by backend thus far */
764 4 : pgstat_info->counts.delta_live_tuples = 0;
765 4 : pgstat_info->counts.delta_dead_tuples = 0;
766 : }
767 760 : pgstat_info->counts.delta_live_tuples +=
768 760 : rec->tuples_inserted - rec->tuples_deleted;
769 760 : pgstat_info->counts.delta_dead_tuples +=
770 760 : rec->tuples_updated + rec->tuples_deleted;
771 760 : pgstat_info->counts.changed_tuples +=
772 760 : rec->tuples_inserted + rec->tuples_updated +
773 760 : rec->tuples_deleted;
774 760 : }
775 :
776 : /*
777 : * 2PC processing routine for ROLLBACK PREPARED case.
778 : *
779 : * Load the saved counts into our local pgstats state, but treat them
780 : * as aborted.
781 : */
782 : void
783 126 : pgstat_twophase_postabort(TransactionId xid, uint16 info,
784 : void *recdata, uint32 len)
785 : {
786 126 : TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
787 : PgStat_TableStatus *pgstat_info;
788 :
789 : /* Find or create a tabstat entry for the rel */
790 126 : pgstat_info = pgstat_prep_relation_pending(rec->id, rec->shared);
791 :
792 : /* Same math as in AtEOXact_PgStat, abort case */
793 126 : if (rec->truncdropped)
794 : {
795 8 : rec->tuples_inserted = rec->inserted_pre_truncdrop;
796 8 : rec->tuples_updated = rec->updated_pre_truncdrop;
797 8 : rec->tuples_deleted = rec->deleted_pre_truncdrop;
798 : }
799 126 : pgstat_info->counts.tuples_inserted += rec->tuples_inserted;
800 126 : pgstat_info->counts.tuples_updated += rec->tuples_updated;
801 126 : pgstat_info->counts.tuples_deleted += rec->tuples_deleted;
802 126 : pgstat_info->counts.delta_dead_tuples +=
803 126 : rec->tuples_inserted + rec->tuples_updated;
804 126 : }
805 :
806 : /*
807 : * Flush out pending stats for the entry
808 : *
809 : * If nowait is true, this function returns false if lock could not
810 : * immediately acquired, otherwise true is returned.
811 : *
812 : * Some of the stats are copied to the corresponding pending database stats
813 : * entry when successfully flushing.
814 : */
815 : bool
816 1530120 : pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
817 : {
818 : Oid dboid;
819 : PgStat_TableStatus *lstats; /* pending stats entry */
820 : PgStatShared_Relation *shtabstats;
821 : PgStat_StatTabEntry *tabentry; /* table entry of shared stats */
822 : PgStat_StatDBEntry *dbentry; /* pending database entry */
823 :
824 1530120 : dboid = entry_ref->shared_entry->key.dboid;
825 1530120 : lstats = (PgStat_TableStatus *) entry_ref->pending;
826 1530120 : shtabstats = (PgStatShared_Relation *) entry_ref->shared_stats;
827 :
828 : /*
829 : * Ignore entries that didn't accumulate any actual counts, such as
830 : * indexes that were opened by the planner but not used.
831 : */
832 1530120 : if (pg_memory_is_all_zeros(&lstats->counts,
833 : sizeof(struct PgStat_TableCounts)))
834 4542 : return true;
835 :
836 1525578 : if (!pgstat_lock_entry(entry_ref, nowait))
837 8 : return false;
838 :
839 : /* add the values to the shared entry. */
840 1525570 : tabentry = &shtabstats->stats;
841 :
842 1525570 : tabentry->numscans += lstats->counts.numscans;
843 1525570 : if (lstats->counts.numscans)
844 : {
845 855770 : TimestampTz t = GetCurrentTransactionStopTimestamp();
846 :
847 855770 : if (t > tabentry->lastscan)
848 842012 : tabentry->lastscan = t;
849 : }
850 1525570 : tabentry->tuples_returned += lstats->counts.tuples_returned;
851 1525570 : tabentry->tuples_fetched += lstats->counts.tuples_fetched;
852 1525570 : tabentry->tuples_inserted += lstats->counts.tuples_inserted;
853 1525570 : tabentry->tuples_updated += lstats->counts.tuples_updated;
854 1525570 : tabentry->tuples_deleted += lstats->counts.tuples_deleted;
855 1525570 : tabentry->tuples_hot_updated += lstats->counts.tuples_hot_updated;
856 1525570 : tabentry->tuples_newpage_updated += lstats->counts.tuples_newpage_updated;
857 :
858 : /*
859 : * If table was truncated/dropped, first reset the live/dead counters.
860 : */
861 1525570 : if (lstats->counts.truncdropped)
862 : {
863 638 : tabentry->live_tuples = 0;
864 638 : tabentry->dead_tuples = 0;
865 638 : tabentry->ins_since_vacuum = 0;
866 : }
867 :
868 1525570 : tabentry->live_tuples += lstats->counts.delta_live_tuples;
869 1525570 : tabentry->dead_tuples += lstats->counts.delta_dead_tuples;
870 1525570 : tabentry->mod_since_analyze += lstats->counts.changed_tuples;
871 1525570 : tabentry->ins_since_vacuum += lstats->counts.tuples_inserted;
872 1525570 : tabentry->blocks_fetched += lstats->counts.blocks_fetched;
873 1525570 : tabentry->blocks_hit += lstats->counts.blocks_hit;
874 :
875 : /* Clamp live_tuples in case of negative delta_live_tuples */
876 1525570 : tabentry->live_tuples = Max(tabentry->live_tuples, 0);
877 : /* Likewise for dead_tuples */
878 1525570 : tabentry->dead_tuples = Max(tabentry->dead_tuples, 0);
879 :
880 1525570 : pgstat_unlock_entry(entry_ref);
881 :
882 : /* The entry was successfully flushed, add the same to database stats */
883 1525570 : dbentry = pgstat_prep_database_pending(dboid);
884 1525570 : dbentry->tuples_returned += lstats->counts.tuples_returned;
885 1525570 : dbentry->tuples_fetched += lstats->counts.tuples_fetched;
886 1525570 : dbentry->tuples_inserted += lstats->counts.tuples_inserted;
887 1525570 : dbentry->tuples_updated += lstats->counts.tuples_updated;
888 1525570 : dbentry->tuples_deleted += lstats->counts.tuples_deleted;
889 1525570 : dbentry->blocks_fetched += lstats->counts.blocks_fetched;
890 1525570 : dbentry->blocks_hit += lstats->counts.blocks_hit;
891 :
892 1525570 : return true;
893 : }
894 :
895 : void
896 1593188 : pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref)
897 : {
898 1593188 : PgStat_TableStatus *pending = (PgStat_TableStatus *) entry_ref->pending;
899 :
900 1593188 : if (pending->relation)
901 1471206 : pgstat_unlink_relation(pending->relation);
902 1593188 : }
903 :
904 : /*
905 : * Find or create a PgStat_TableStatus entry for rel. New entry is created and
906 : * initialized if not exists.
907 : */
908 : static PgStat_TableStatus *
909 1682084 : pgstat_prep_relation_pending(Oid rel_id, bool isshared)
910 : {
911 : PgStat_EntryRef *entry_ref;
912 : PgStat_TableStatus *pending;
913 :
914 1682084 : entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_RELATION,
915 : isshared ? InvalidOid : MyDatabaseId,
916 : rel_id, NULL);
917 1682084 : pending = entry_ref->pending;
918 1682084 : pending->id = rel_id;
919 1682084 : pending->shared = isshared;
920 :
921 1682084 : return pending;
922 : }
923 :
924 : /*
925 : * add a new (sub)transaction state record
926 : */
927 : static void
928 658024 : add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
929 : {
930 : PgStat_SubXactStatus *xact_state;
931 : PgStat_TableXactStatus *trans;
932 :
933 : /*
934 : * If this is the first rel to be modified at the current nest level, we
935 : * first have to push a transaction stack entry.
936 : */
937 658024 : xact_state = pgstat_get_xact_stack_level(nest_level);
938 :
939 : /* Now make a per-table stack entry */
940 : trans = (PgStat_TableXactStatus *)
941 658024 : MemoryContextAllocZero(TopTransactionContext,
942 : sizeof(PgStat_TableXactStatus));
943 658024 : trans->nest_level = nest_level;
944 658024 : trans->upper = pgstat_info->trans;
945 658024 : trans->parent = pgstat_info;
946 658024 : trans->next = xact_state->first;
947 658024 : xact_state->first = trans;
948 658024 : pgstat_info->trans = trans;
949 658024 : }
950 :
951 : /*
952 : * Add a new (sub)transaction record if needed.
953 : */
954 : static void
955 19476258 : ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info)
956 : {
957 19476258 : int nest_level = GetCurrentTransactionNestLevel();
958 :
959 19476258 : if (pgstat_info->trans == NULL ||
960 18824632 : pgstat_info->trans->nest_level != nest_level)
961 658024 : add_tabstat_xact_level(pgstat_info, nest_level);
962 19476258 : }
963 :
964 : /*
965 : * Whenever a table is truncated/dropped, we save its i/u/d counters so that
966 : * they can be cleared, and if the (sub)xact that executed the truncate/drop
967 : * later aborts, the counters can be restored to the saved (pre-truncate/drop)
968 : * values.
969 : *
970 : * Note that for truncate we do this on the first truncate in any particular
971 : * subxact level only.
972 : */
973 : static void
974 4708 : save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop)
975 : {
976 4708 : if (!trans->truncdropped || is_drop)
977 : {
978 4610 : trans->inserted_pre_truncdrop = trans->tuples_inserted;
979 4610 : trans->updated_pre_truncdrop = trans->tuples_updated;
980 4610 : trans->deleted_pre_truncdrop = trans->tuples_deleted;
981 4610 : trans->truncdropped = true;
982 : }
983 4708 : }
984 :
985 : /*
986 : * restore counters when a truncate aborts
987 : */
988 : static void
989 23182 : restore_truncdrop_counters(PgStat_TableXactStatus *trans)
990 : {
991 23182 : if (trans->truncdropped)
992 : {
993 262 : trans->tuples_inserted = trans->inserted_pre_truncdrop;
994 262 : trans->tuples_updated = trans->updated_pre_truncdrop;
995 262 : trans->tuples_deleted = trans->deleted_pre_truncdrop;
996 : }
997 23182 : }
|