LCOV - code coverage report
Current view: top level - contrib/pg_stat_statements - pg_stat_statements.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 72.8 % 931 678
Test Date: 2026-04-17 15:16:26 Functions: 86.0 % 50 43
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pg_stat_statements.c
       4              :  *      Track statement planning and execution times as well as resource
       5              :  *      usage across a whole database cluster.
       6              :  *
       7              :  * Execution costs are totaled for each distinct source query, and kept in
       8              :  * a shared hashtable.  (We track only as many distinct queries as will fit
       9              :  * in the designated amount of shared memory.)
      10              :  *
      11              :  * Starting in Postgres 9.2, this module normalized query entries.  As of
      12              :  * Postgres 14, the normalization is done by the core if compute_query_id is
      13              :  * enabled, or optionally by third-party modules.
      14              :  *
      15              :  * To facilitate presenting entries to users, we create "representative" query
      16              :  * strings in which constants are replaced with parameter symbols ($n), to
      17              :  * make it clearer what a normalized entry can represent.  To save on shared
      18              :  * memory, and to avoid having to truncate oversized query strings, we store
      19              :  * these strings in a temporary external query-texts file.  Offsets into this
      20              :  * file are kept in shared memory.
      21              :  *
      22              :  * Note about locking issues: to create or delete an entry in the shared
      23              :  * hashtable, one must hold pgss->lock exclusively.  Modifying any field
      24              :  * in an entry except the counters requires the same.  To look up an entry,
      25              :  * one must hold the lock shared.  To read or update the counters within
      26              :  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
      27              :  * disappear!) and also take the entry's mutex spinlock.
      28              :  * The shared state variable pgss->extent (the next free spot in the external
      29              :  * query-text file) should be accessed only while holding either the
      30              :  * pgss->mutex spinlock, or exclusive lock on pgss->lock.  We use the mutex to
      31              :  * allow reserving file space while holding only shared lock on pgss->lock.
      32              :  * Rewriting the entire external query-text file, eg for garbage collection,
      33              :  * requires holding pgss->lock exclusively; this allows individual entries
      34              :  * in the file to be read or written while holding only shared lock.
      35              :  *
      36              :  *
      37              :  * Copyright (c) 2008-2026, PostgreSQL Global Development Group
      38              :  *
      39              :  * IDENTIFICATION
      40              :  *    contrib/pg_stat_statements/pg_stat_statements.c
      41              :  *
      42              :  *-------------------------------------------------------------------------
      43              :  */
      44              : #include "postgres.h"
      45              : 
      46              : #include <math.h>
      47              : #include <sys/stat.h>
      48              : #include <unistd.h>
      49              : 
      50              : #include "access/htup_details.h"
      51              : #include "access/parallel.h"
      52              : #include "catalog/pg_authid.h"
      53              : #include "executor/instrument.h"
      54              : #include "funcapi.h"
      55              : #include "jit/jit.h"
      56              : #include "mb/pg_wchar.h"
      57              : #include "miscadmin.h"
      58              : #include "nodes/queryjumble.h"
      59              : #include "optimizer/planner.h"
      60              : #include "parser/analyze.h"
      61              : #include "pgstat.h"
      62              : #include "storage/fd.h"
      63              : #include "storage/ipc.h"
      64              : #include "storage/lwlock.h"
      65              : #include "storage/shmem.h"
      66              : #include "storage/spin.h"
      67              : #include "tcop/utility.h"
      68              : #include "utils/acl.h"
      69              : #include "utils/builtins.h"
      70              : #include "utils/memutils.h"
      71              : #include "utils/timestamp.h"
      72              : #include "utils/tuplestore.h"
      73              : 
      74           10 : PG_MODULE_MAGIC_EXT(
      75              :                     .name = "pg_stat_statements",
      76              :                     .version = PG_VERSION
      77              : );
      78              : 
      79              : /* Location of permanent stats file (valid when database is shut down) */
      80              : #define PGSS_DUMP_FILE  PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
      81              : 
      82              : /*
      83              :  * Location of external query text file.
      84              :  */
      85              : #define PGSS_TEXT_FILE  PG_STAT_TMP_DIR "/pgss_query_texts.stat"
      86              : 
      87              : /* Magic number identifying the stats file format */
      88              : static const uint32 PGSS_FILE_HEADER = 0x20250731;
      89              : 
      90              : /* PostgreSQL major version number, changes in which invalidate all entries */
      91              : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
      92              : 
      93              : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
      94              : #define USAGE_EXEC(duration)    (1.0)
      95              : #define USAGE_INIT              (1.0)   /* including initial planning */
      96              : #define ASSUMED_MEDIAN_INIT     (10.0)  /* initial assumed median usage */
      97              : #define ASSUMED_LENGTH_INIT     1024    /* initial assumed mean query length */
      98              : #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
      99              : #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
     100              : #define USAGE_DEALLOC_PERCENT   5   /* free this % of entries at once */
     101              : #define IS_STICKY(c)    ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
     102              : 
     103              : /*
     104              :  * Extension version number, for supporting older extension versions' objects
     105              :  */
     106              : typedef enum pgssVersion
     107              : {
     108              :     PGSS_V1_0 = 0,
     109              :     PGSS_V1_1,
     110              :     PGSS_V1_2,
     111              :     PGSS_V1_3,
     112              :     PGSS_V1_8,
     113              :     PGSS_V1_9,
     114              :     PGSS_V1_10,
     115              :     PGSS_V1_11,
     116              :     PGSS_V1_12,
     117              :     PGSS_V1_13,
     118              : } pgssVersion;
     119              : 
     120              : typedef enum pgssStoreKind
     121              : {
     122              :     PGSS_INVALID = -1,
     123              : 
     124              :     /*
     125              :      * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
     126              :      * reference the underlying values in the arrays in the Counters struct,
     127              :      * and this order is required in pg_stat_statements_internal().
     128              :      */
     129              :     PGSS_PLAN = 0,
     130              :     PGSS_EXEC,
     131              : } pgssStoreKind;
     132              : 
     133              : #define PGSS_NUMKIND (PGSS_EXEC + 1)
     134              : 
     135              : /*
     136              :  * Hashtable key that defines the identity of a hashtable entry.  We separate
     137              :  * queries by user and by database even if they are otherwise identical.
     138              :  *
     139              :  * If you add a new key to this struct, make sure to teach pgss_store() to
     140              :  * zero the padding bytes.  Otherwise, things will break, because pgss_hash is
     141              :  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
     142              :  */
     143              : typedef struct pgssHashKey
     144              : {
     145              :     Oid         userid;         /* user OID */
     146              :     Oid         dbid;           /* database OID */
     147              :     int64       queryid;        /* query identifier */
     148              :     bool        toplevel;       /* query executed at top level */
     149              : } pgssHashKey;
     150              : 
     151              : /*
     152              :  * The actual stats counters kept within pgssEntry.
     153              :  */
     154              : typedef struct Counters
     155              : {
     156              :     int64       calls[PGSS_NUMKIND];    /* # of times planned/executed */
     157              :     double      total_time[PGSS_NUMKIND];   /* total planning/execution time,
     158              :                                              * in msec */
     159              :     double      min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
     160              :                                          * msec since min/max reset */
     161              :     double      max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
     162              :                                          * msec since min/max reset */
     163              :     double      mean_time[PGSS_NUMKIND];    /* mean planning/execution time in
     164              :                                              * msec */
     165              :     double      sum_var_time[PGSS_NUMKIND]; /* sum of variances in
     166              :                                              * planning/execution time in msec */
     167              :     int64       rows;           /* total # of retrieved or affected rows */
     168              :     int64       shared_blks_hit;    /* # of shared buffer hits */
     169              :     int64       shared_blks_read;   /* # of shared disk blocks read */
     170              :     int64       shared_blks_dirtied;    /* # of shared disk blocks dirtied */
     171              :     int64       shared_blks_written;    /* # of shared disk blocks written */
     172              :     int64       local_blks_hit; /* # of local buffer hits */
     173              :     int64       local_blks_read;    /* # of local disk blocks read */
     174              :     int64       local_blks_dirtied; /* # of local disk blocks dirtied */
     175              :     int64       local_blks_written; /* # of local disk blocks written */
     176              :     int64       temp_blks_read; /* # of temp blocks read */
     177              :     int64       temp_blks_written;  /* # of temp blocks written */
     178              :     double      shared_blk_read_time;   /* time spent reading shared blocks,
     179              :                                          * in msec */
     180              :     double      shared_blk_write_time;  /* time spent writing shared blocks,
     181              :                                          * in msec */
     182              :     double      local_blk_read_time;    /* time spent reading local blocks, in
     183              :                                          * msec */
     184              :     double      local_blk_write_time;   /* time spent writing local blocks, in
     185              :                                          * msec */
     186              :     double      temp_blk_read_time; /* time spent reading temp blocks, in msec */
     187              :     double      temp_blk_write_time;    /* time spent writing temp blocks, in
     188              :                                          * msec */
     189              :     double      usage;          /* usage factor */
     190              :     int64       wal_records;    /* # of WAL records generated */
     191              :     int64       wal_fpi;        /* # of WAL full page images generated */
     192              :     uint64      wal_bytes;      /* total amount of WAL generated in bytes */
     193              :     int64       wal_buffers_full;   /* # of times the WAL buffers became full */
     194              :     int64       jit_functions;  /* total number of JIT functions emitted */
     195              :     double      jit_generation_time;    /* total time to generate jit code */
     196              :     int64       jit_inlining_count; /* number of times inlining time has been
     197              :                                      * > 0 */
     198              :     double      jit_deform_time;    /* total time to deform tuples in jit code */
     199              :     int64       jit_deform_count;   /* number of times deform time has been >
     200              :                                      * 0 */
     201              : 
     202              :     double      jit_inlining_time;  /* total time to inline jit code */
     203              :     int64       jit_optimization_count; /* number of times optimization time
     204              :                                          * has been > 0 */
     205              :     double      jit_optimization_time;  /* total time to optimize jit code */
     206              :     int64       jit_emission_count; /* number of times emission time has been
     207              :                                      * > 0 */
     208              :     double      jit_emission_time;  /* total time to emit jit code */
     209              :     int64       parallel_workers_to_launch; /* # of parallel workers planned
     210              :                                              * to be launched */
     211              :     int64       parallel_workers_launched;  /* # of parallel workers actually
     212              :                                              * launched */
     213              :     int64       generic_plan_calls; /* number of calls using a generic plan */
     214              :     int64       custom_plan_calls;  /* number of calls using a custom plan */
     215              : } Counters;
     216              : 
     217              : /*
     218              :  * Global statistics for pg_stat_statements
     219              :  */
     220              : typedef struct pgssGlobalStats
     221              : {
     222              :     int64       dealloc;        /* # of times entries were deallocated */
     223              :     TimestampTz stats_reset;    /* timestamp with all stats reset */
     224              : } pgssGlobalStats;
     225              : 
     226              : /*
     227              :  * Statistics per statement
     228              :  *
     229              :  * Note: in event of a failure in garbage collection of the query text file,
     230              :  * we reset query_offset to zero and query_len to -1.  This will be seen as
     231              :  * an invalid state by qtext_fetch().
     232              :  */
     233              : typedef struct pgssEntry
     234              : {
     235              :     pgssHashKey key;            /* hash key of entry - MUST BE FIRST */
     236              :     Counters    counters;       /* the statistics for this query */
     237              :     Size        query_offset;   /* query text offset in external file */
     238              :     int         query_len;      /* # of valid bytes in query string, or -1 */
     239              :     int         encoding;       /* query text encoding */
     240              :     TimestampTz stats_since;    /* timestamp of entry allocation */
     241              :     TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
     242              :     slock_t     mutex;          /* protects the counters only */
     243              : } pgssEntry;
     244              : 
     245              : /*
     246              :  * Global shared state
     247              :  */
     248              : typedef struct pgssSharedState
     249              : {
     250              :     LWLockPadded lock;          /* protects hashtable search/modification */
     251              :     double      cur_median_usage;   /* current median usage in hashtable */
     252              :     Size        mean_query_len; /* current mean entry text length */
     253              :     slock_t     mutex;          /* protects following fields only: */
     254              :     Size        extent;         /* current extent of query file */
     255              :     int         n_writers;      /* number of active writers to query file */
     256              :     int         gc_count;       /* query file garbage collection cycle count */
     257              :     pgssGlobalStats stats;      /* global statistics for pgss */
     258              : } pgssSharedState;
     259              : 
     260              : /* Links to shared memory state */
     261              : static pgssSharedState *pgss;
     262              : static HTAB *pgss_hash;
     263              : 
     264              : static void pgss_shmem_request(void *arg);
     265              : static void pgss_shmem_init(void *arg);
     266              : 
     267              : static const ShmemCallbacks pgss_shmem_callbacks = {
     268              :     .request_fn = pgss_shmem_request,
     269              :     .init_fn = pgss_shmem_init,
     270              : };
     271              : 
     272              : /*---- Local variables ----*/
     273              : 
     274              : /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
     275              : static int  nesting_level = 0;
     276              : 
     277              : /* Saved hook values */
     278              : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
     279              : static planner_hook_type prev_planner_hook = NULL;
     280              : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
     281              : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
     282              : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
     283              : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
     284              : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
     285              : 
     286              : /*---- GUC variables ----*/
     287              : 
     288              : typedef enum
     289              : {
     290              :     PGSS_TRACK_NONE,            /* track no statements */
     291              :     PGSS_TRACK_TOP,             /* only top level statements */
     292              :     PGSS_TRACK_ALL,             /* all statements, including nested ones */
     293              : }           PGSSTrackLevel;
     294              : 
     295              : static const struct config_enum_entry track_options[] =
     296              : {
     297              :     {"none", PGSS_TRACK_NONE, false},
     298              :     {"top", PGSS_TRACK_TOP, false},
     299              :     {"all", PGSS_TRACK_ALL, false},
     300              :     {NULL, 0, false}
     301              : };
     302              : 
     303              : static int  pgss_max = 5000;    /* max # statements to track */
     304              : static int  pgss_track = PGSS_TRACK_TOP;    /* tracking level */
     305              : static bool pgss_track_utility = true;  /* whether to track utility commands */
     306              : static bool pgss_track_planning = false;    /* whether to track planning
     307              :                                              * duration */
     308              : static bool pgss_save = true;   /* whether to save stats across shutdown */
     309              : 
     310              : #define pgss_enabled(level) \
     311              :     (!IsParallelWorker() && \
     312              :     (pgss_track == PGSS_TRACK_ALL || \
     313              :     (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
     314              : 
     315              : #define record_gc_qtexts() \
     316              :     do { \
     317              :         SpinLockAcquire(&pgss->mutex); \
     318              :         pgss->gc_count++; \
     319              :         SpinLockRelease(&pgss->mutex); \
     320              :     } while(0)
     321              : 
     322              : /*---- Function declarations ----*/
     323              : 
     324            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
     325            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
     326           20 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_11);
     327            0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
     328            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
     329            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
     330            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
     331            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
     332            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_11);
     333            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_12);
     334           24 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_13);
     335            0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
     336            8 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
     337              : 
     338              : static void pgss_shmem_shutdown(int code, Datum arg);
     339              : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
     340              :                                     const JumbleState *jstate);
     341              : static PlannedStmt *pgss_planner(Query *parse,
     342              :                                  const char *query_string,
     343              :                                  int cursorOptions,
     344              :                                  ParamListInfo boundParams,
     345              :                                  ExplainState *es);
     346              : static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
     347              : static void pgss_ExecutorRun(QueryDesc *queryDesc,
     348              :                              ScanDirection direction,
     349              :                              uint64 count);
     350              : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
     351              : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
     352              : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
     353              :                                 bool readOnlyTree,
     354              :                                 ProcessUtilityContext context, ParamListInfo params,
     355              :                                 QueryEnvironment *queryEnv,
     356              :                                 DestReceiver *dest, QueryCompletion *qc);
     357              : static void pgss_store(const char *query, int64 queryId,
     358              :                        int query_location, int query_len,
     359              :                        pgssStoreKind kind,
     360              :                        double total_time, uint64 rows,
     361              :                        const BufferUsage *bufusage,
     362              :                        const WalUsage *walusage,
     363              :                        const struct JitInstrumentation *jitusage,
     364              :                        const JumbleState *jstate,
     365              :                        int parallel_workers_to_launch,
     366              :                        int parallel_workers_launched,
     367              :                        PlannedStmtOrigin planOrigin);
     368              : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
     369              :                                         pgssVersion api_version,
     370              :                                         bool showtext);
     371              : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
     372              :                               int encoding, bool sticky);
     373              : static void entry_dealloc(void);
     374              : static bool qtext_store(const char *query, int query_len,
     375              :                         Size *query_offset, int *gc_count);
     376              : static char *qtext_load_file(Size *buffer_size);
     377              : static char *qtext_fetch(Size query_offset, int query_len,
     378              :                          char *buffer, Size buffer_size);
     379              : static bool need_gc_qtexts(void);
     380              : static void gc_qtexts(void);
     381              : static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
     382              : static char *generate_normalized_query(const JumbleState *jstate,
     383              :                                        const char *query,
     384              :                                        int query_loc, int *query_len_p);
     385              : 
     386              : /*
     387              :  * Module load callback
     388              :  */
     389              : void
     390           10 : _PG_init(void)
     391              : {
     392              :     /*
     393              :      * In order to create our shared memory area, we have to be loaded via
     394              :      * shared_preload_libraries.  If not, fall out without hooking into any of
     395              :      * the main system.  (We don't throw error here because it seems useful to
     396              :      * allow the pg_stat_statements functions to be created even when the
     397              :      * module isn't active.  The functions must protect themselves against
     398              :      * being called then, however.)
     399              :      */
     400           10 :     if (!process_shared_preload_libraries_in_progress)
     401            1 :         return;
     402              : 
     403              :     /*
     404              :      * Inform the postmaster that we want to enable query_id calculation if
     405              :      * compute_query_id is set to auto.
     406              :      */
     407            9 :     EnableQueryId();
     408              : 
     409              :     /*
     410              :      * Define (or redefine) custom GUC variables.
     411              :      */
     412            9 :     DefineCustomIntVariable("pg_stat_statements.max",
     413              :                             "Sets the maximum number of statements tracked by pg_stat_statements.",
     414              :                             NULL,
     415              :                             &pgss_max,
     416              :                             5000,
     417              :                             100,
     418              :                             INT_MAX / 2,
     419              :                             PGC_POSTMASTER,
     420              :                             0,
     421              :                             NULL,
     422              :                             NULL,
     423              :                             NULL);
     424              : 
     425            9 :     DefineCustomEnumVariable("pg_stat_statements.track",
     426              :                              "Selects which statements are tracked by pg_stat_statements.",
     427              :                              NULL,
     428              :                              &pgss_track,
     429              :                              PGSS_TRACK_TOP,
     430              :                              track_options,
     431              :                              PGC_SUSET,
     432              :                              0,
     433              :                              NULL,
     434              :                              NULL,
     435              :                              NULL);
     436              : 
     437            9 :     DefineCustomBoolVariable("pg_stat_statements.track_utility",
     438              :                              "Selects whether utility commands are tracked by pg_stat_statements.",
     439              :                              NULL,
     440              :                              &pgss_track_utility,
     441              :                              true,
     442              :                              PGC_SUSET,
     443              :                              0,
     444              :                              NULL,
     445              :                              NULL,
     446              :                              NULL);
     447              : 
     448            9 :     DefineCustomBoolVariable("pg_stat_statements.track_planning",
     449              :                              "Selects whether planning duration is tracked by pg_stat_statements.",
     450              :                              NULL,
     451              :                              &pgss_track_planning,
     452              :                              false,
     453              :                              PGC_SUSET,
     454              :                              0,
     455              :                              NULL,
     456              :                              NULL,
     457              :                              NULL);
     458              : 
     459            9 :     DefineCustomBoolVariable("pg_stat_statements.save",
     460              :                              "Save pg_stat_statements statistics across server shutdowns.",
     461              :                              NULL,
     462              :                              &pgss_save,
     463              :                              true,
     464              :                              PGC_SIGHUP,
     465              :                              0,
     466              :                              NULL,
     467              :                              NULL,
     468              :                              NULL);
     469              : 
     470            9 :     MarkGUCPrefixReserved("pg_stat_statements");
     471              : 
     472              :     /*
     473              :      * Register our shared memory needs.
     474              :      */
     475            9 :     RegisterShmemCallbacks(&pgss_shmem_callbacks);
     476              : 
     477              :     /*
     478              :      * Install hooks.
     479              :      */
     480            9 :     prev_post_parse_analyze_hook = post_parse_analyze_hook;
     481            9 :     post_parse_analyze_hook = pgss_post_parse_analyze;
     482            9 :     prev_planner_hook = planner_hook;
     483            9 :     planner_hook = pgss_planner;
     484            9 :     prev_ExecutorStart = ExecutorStart_hook;
     485            9 :     ExecutorStart_hook = pgss_ExecutorStart;
     486            9 :     prev_ExecutorRun = ExecutorRun_hook;
     487            9 :     ExecutorRun_hook = pgss_ExecutorRun;
     488            9 :     prev_ExecutorFinish = ExecutorFinish_hook;
     489            9 :     ExecutorFinish_hook = pgss_ExecutorFinish;
     490            9 :     prev_ExecutorEnd = ExecutorEnd_hook;
     491            9 :     ExecutorEnd_hook = pgss_ExecutorEnd;
     492            9 :     prev_ProcessUtility = ProcessUtility_hook;
     493            9 :     ProcessUtility_hook = pgss_ProcessUtility;
     494              : }
     495              : 
     496              : /*
     497              :  * shmem request callback: Request shared memory resources.
     498              :  *
     499              :  * This is called at postmaster startup.  Note that the shared memory isn't
     500              :  * allocated here yet, this merely register our needs.
     501              :  *
     502              :  * In EXEC_BACKEND mode, this is also called in each backend, to re-attach to
     503              :  * the shared memory area that was already initialized.
     504              :  */
     505              : static void
     506           11 : pgss_shmem_request(void *arg)
     507              : {
     508           11 :     ShmemRequestHash(.name = "pg_stat_statements hash",
     509              :                      .nelems = pgss_max,
     510              :                      .hash_info.keysize = sizeof(pgssHashKey),
     511              :                      .hash_info.entrysize = sizeof(pgssEntry),
     512              :                      .hash_flags = HASH_ELEM | HASH_BLOBS,
     513              :                      .ptr = &pgss_hash,
     514              :         );
     515           11 :     ShmemRequestStruct(.name = "pg_stat_statements",
     516              :                        .size = sizeof(pgssSharedState),
     517              :                        .ptr = (void **) &pgss,
     518              :         );
     519           11 : }
     520              : 
     521              : /*
     522              :  * shmem init callback: Initialize our shared memory data structures at
     523              :  * postmaster startup.
     524              :  *
     525              :  * Load any pre-existing statistics from file.  Also create and load the
     526              :  * query-texts file, which is expected to exist (even if empty) while the
     527              :  * module is enabled.
     528              :  */
     529              : static void
     530           11 : pgss_shmem_init(void *arg)
     531              : {
     532              :     int         tranche_id;
     533           11 :     FILE       *file = NULL;
     534           11 :     FILE       *qfile = NULL;
     535              :     uint32      header;
     536              :     int32       num;
     537              :     int32       pgver;
     538              :     int32       i;
     539              :     int         buffer_size;
     540           11 :     char       *buffer = NULL;
     541              : 
     542              :     /*
     543              :      * We already checked that we're loaded from shared_preload_libraries in
     544              :      * _PG_init(), so we should not get here after postmaster startup.
     545              :      */
     546              :     Assert(!IsUnderPostmaster);
     547              : 
     548              :     /*
     549              :      * Initialize the shmem area with no statistics.
     550              :      */
     551           11 :     tranche_id = LWLockNewTrancheId("pg_stat_statements");
     552           11 :     LWLockInitialize(&pgss->lock.lock, tranche_id);
     553           11 :     pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
     554           11 :     pgss->mean_query_len = ASSUMED_LENGTH_INIT;
     555           11 :     SpinLockInit(&pgss->mutex);
     556           11 :     pgss->extent = 0;
     557           11 :     pgss->n_writers = 0;
     558           11 :     pgss->gc_count = 0;
     559           11 :     pgss->stats.dealloc = 0;
     560           11 :     pgss->stats.stats_reset = GetCurrentTimestamp();
     561              : 
     562              :     /* The hash table must've also been initialized by now */
     563              :     Assert(pgss_hash != NULL);
     564              : 
     565              :     /*
     566              :      * Set up a shmem exit hook to dump the statistics to disk on postmaster
     567              :      * (or standalone backend) exit.
     568              :      */
     569           11 :     on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
     570              : 
     571              :     /*
     572              :      * Load any pre-existing statistics from file.
     573              :      *
     574              :      * Note: we don't bother with locks here, because there should be no other
     575              :      * processes running when this code is reached.
     576              :      */
     577              : 
     578              :     /* Unlink query text file possibly left over from crash */
     579           11 :     unlink(PGSS_TEXT_FILE);
     580              : 
     581              :     /* Allocate new query text temp file */
     582           11 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
     583           11 :     if (qfile == NULL)
     584            0 :         goto write_error;
     585              : 
     586              :     /*
     587              :      * If we were told not to load old statistics, we're done.  (Note we do
     588              :      * not try to unlink any old dump file in this case.  This seems a bit
     589              :      * questionable but it's the historical behavior.)
     590              :      */
     591           11 :     if (!pgss_save)
     592              :     {
     593            1 :         FreeFile(qfile);
     594           11 :         return;
     595              :     }
     596              : 
     597              :     /*
     598              :      * Attempt to load old statistics from the dump file.
     599              :      */
     600           10 :     file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
     601           10 :     if (file == NULL)
     602              :     {
     603            7 :         if (errno != ENOENT)
     604            0 :             goto read_error;
     605              :         /* No existing persisted stats file, so we're done */
     606            7 :         FreeFile(qfile);
     607            7 :         return;
     608              :     }
     609              : 
     610            3 :     buffer_size = 2048;
     611            3 :     buffer = (char *) palloc(buffer_size);
     612              : 
     613            6 :     if (fread(&header, sizeof(uint32), 1, file) != 1 ||
     614            6 :         fread(&pgver, sizeof(uint32), 1, file) != 1 ||
     615            3 :         fread(&num, sizeof(int32), 1, file) != 1)
     616            0 :         goto read_error;
     617              : 
     618            3 :     if (header != PGSS_FILE_HEADER ||
     619            3 :         pgver != PGSS_PG_MAJOR_VERSION)
     620            0 :         goto data_error;
     621              : 
     622        28779 :     for (i = 0; i < num; i++)
     623              :     {
     624              :         pgssEntry   temp;
     625              :         pgssEntry  *entry;
     626              :         Size        query_offset;
     627              : 
     628        28776 :         if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
     629            0 :             goto read_error;
     630              : 
     631              :         /* Encoding is the only field we can easily sanity-check */
     632        28776 :         if (!PG_VALID_BE_ENCODING(temp.encoding))
     633            0 :             goto data_error;
     634              : 
     635              :         /* Resize buffer as needed */
     636        28776 :         if (temp.query_len >= buffer_size)
     637              :         {
     638            4 :             buffer_size = Max(buffer_size * 2, temp.query_len + 1);
     639            4 :             buffer = repalloc(buffer, buffer_size);
     640              :         }
     641              : 
     642        28776 :         if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
     643            0 :             goto read_error;
     644              : 
     645              :         /* Should have a trailing null, but let's make sure */
     646        28776 :         buffer[temp.query_len] = '\0';
     647              : 
     648              :         /* Skip loading "sticky" entries */
     649        28776 :         if (IS_STICKY(temp.counters))
     650          784 :             continue;
     651              : 
     652              :         /* Store the query text */
     653        27992 :         query_offset = pgss->extent;
     654        27992 :         if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
     655            0 :             goto write_error;
     656        27992 :         pgss->extent += temp.query_len + 1;
     657              : 
     658              :         /* make the hashtable entry (discards old entries if too many) */
     659        27992 :         entry = entry_alloc(&temp.key, query_offset, temp.query_len,
     660              :                             temp.encoding,
     661              :                             false);
     662              : 
     663              :         /* copy in the actual stats */
     664        27992 :         entry->counters = temp.counters;
     665        27992 :         entry->stats_since = temp.stats_since;
     666        27992 :         entry->minmax_stats_since = temp.minmax_stats_since;
     667              :     }
     668              : 
     669              :     /* Read global statistics for pg_stat_statements */
     670            3 :     if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     671            0 :         goto read_error;
     672              : 
     673            3 :     pfree(buffer);
     674            3 :     FreeFile(file);
     675            3 :     FreeFile(qfile);
     676              : 
     677              :     /*
     678              :      * Remove the persisted stats file so it's not included in
     679              :      * backups/replication standbys, etc.  A new file will be written on next
     680              :      * shutdown.
     681              :      *
     682              :      * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
     683              :      * because we remove that file on startup; it acts inversely to
     684              :      * PGSS_DUMP_FILE, in that it is only supposed to be around when the
     685              :      * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
     686              :      * when the server is not running.  Leaving the file creates no danger of
     687              :      * a newly restored database having a spurious record of execution costs,
     688              :      * which is what we're really concerned about here.
     689              :      */
     690            3 :     unlink(PGSS_DUMP_FILE);
     691              : 
     692            3 :     return;
     693              : 
     694            0 : read_error:
     695            0 :     ereport(LOG,
     696              :             (errcode_for_file_access(),
     697              :              errmsg("could not read file \"%s\": %m",
     698              :                     PGSS_DUMP_FILE)));
     699            0 :     goto fail;
     700            0 : data_error:
     701            0 :     ereport(LOG,
     702              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     703              :              errmsg("ignoring invalid data in file \"%s\"",
     704              :                     PGSS_DUMP_FILE)));
     705            0 :     goto fail;
     706            0 : write_error:
     707            0 :     ereport(LOG,
     708              :             (errcode_for_file_access(),
     709              :              errmsg("could not write file \"%s\": %m",
     710              :                     PGSS_TEXT_FILE)));
     711            0 : fail:
     712            0 :     if (buffer)
     713            0 :         pfree(buffer);
     714            0 :     if (file)
     715            0 :         FreeFile(file);
     716            0 :     if (qfile)
     717            0 :         FreeFile(qfile);
     718              :     /* If possible, throw away the bogus file; ignore any error */
     719            0 :     unlink(PGSS_DUMP_FILE);
     720              : 
     721              :     /*
     722              :      * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
     723              :      * server is running with pg_stat_statements enabled
     724              :      */
     725              : }
     726              : 
     727              : /*
     728              :  * shmem_shutdown hook: Dump statistics into file.
     729              :  *
     730              :  * Note: we don't bother with acquiring lock, because there should be no
     731              :  * other processes running when this is called.
     732              :  */
     733              : static void
     734           11 : pgss_shmem_shutdown(int code, Datum arg)
     735              : {
     736              :     FILE       *file;
     737           11 :     char       *qbuffer = NULL;
     738           11 :     Size        qbuffer_size = 0;
     739              :     HASH_SEQ_STATUS hash_seq;
     740              :     int32       num_entries;
     741              :     pgssEntry  *entry;
     742              : 
     743              :     /* Don't try to dump during a crash. */
     744           11 :     if (code)
     745           11 :         return;
     746              : 
     747              :     /* Safety check ... shouldn't get here unless shmem is set up. */
     748            9 :     if (!pgss || !pgss_hash)
     749            0 :         return;
     750              : 
     751              :     /* Don't dump if told not to. */
     752            9 :     if (!pgss_save)
     753            2 :         return;
     754              : 
     755            7 :     file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
     756            7 :     if (file == NULL)
     757            0 :         goto error;
     758              : 
     759            7 :     if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
     760            0 :         goto error;
     761            7 :     if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
     762            0 :         goto error;
     763            7 :     num_entries = hash_get_num_entries(pgss_hash);
     764            7 :     if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
     765            0 :         goto error;
     766              : 
     767            7 :     qbuffer = qtext_load_file(&qbuffer_size);
     768            7 :     if (qbuffer == NULL)
     769            0 :         goto error;
     770              : 
     771              :     /*
     772              :      * When serializing to disk, we store query texts immediately after their
     773              :      * entry data.  Any orphaned query texts are thereby excluded.
     774              :      */
     775            7 :     hash_seq_init(&hash_seq, pgss_hash);
     776        57852 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
     777              :     {
     778        57845 :         int         len = entry->query_len;
     779        57845 :         char       *qstr = qtext_fetch(entry->query_offset, len,
     780              :                                        qbuffer, qbuffer_size);
     781              : 
     782        57845 :         if (qstr == NULL)
     783            0 :             continue;           /* Ignore any entries with bogus texts */
     784              : 
     785        57845 :         if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
     786        57845 :             fwrite(qstr, 1, len + 1, file) != len + 1)
     787              :         {
     788              :             /* note: we assume hash_seq_term won't change errno */
     789            0 :             hash_seq_term(&hash_seq);
     790            0 :             goto error;
     791              :         }
     792              :     }
     793              : 
     794              :     /* Dump global statistics for pg_stat_statements */
     795            7 :     if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     796            0 :         goto error;
     797              : 
     798            7 :     pfree(qbuffer);
     799            7 :     qbuffer = NULL;
     800              : 
     801            7 :     if (FreeFile(file))
     802              :     {
     803            0 :         file = NULL;
     804            0 :         goto error;
     805              :     }
     806              : 
     807              :     /*
     808              :      * Rename file into place, so we atomically replace any old one.
     809              :      */
     810            7 :     (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
     811              : 
     812              :     /* Unlink query-texts file; it's not needed while shutdown */
     813            7 :     unlink(PGSS_TEXT_FILE);
     814              : 
     815            7 :     return;
     816              : 
     817            0 : error:
     818            0 :     ereport(LOG,
     819              :             (errcode_for_file_access(),
     820              :              errmsg("could not write file \"%s\": %m",
     821              :                     PGSS_DUMP_FILE ".tmp")));
     822            0 :     if (qbuffer)
     823            0 :         pfree(qbuffer);
     824            0 :     if (file)
     825            0 :         FreeFile(file);
     826            0 :     unlink(PGSS_DUMP_FILE ".tmp");
     827            0 :     unlink(PGSS_TEXT_FILE);
     828              : }
     829              : 
     830              : /*
     831              :  * Post-parse-analysis hook: mark query with a queryId
     832              :  */
     833              : static void
     834        83834 : pgss_post_parse_analyze(ParseState *pstate, Query *query, const JumbleState *jstate)
     835              : {
     836        83834 :     if (prev_post_parse_analyze_hook)
     837            0 :         prev_post_parse_analyze_hook(pstate, query, jstate);
     838              : 
     839              :     /* Safety check... */
     840        83834 :     if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
     841        12962 :         return;
     842              : 
     843              :     /*
     844              :      * If it's EXECUTE, clear the queryId so that stats will accumulate for
     845              :      * the underlying PREPARE.  But don't do this if we're not tracking
     846              :      * utility statements, to avoid messing up another extension that might be
     847              :      * tracking them.
     848              :      */
     849        70872 :     if (query->utilityStmt)
     850              :     {
     851        31801 :         if (pgss_track_utility && IsA(query->utilityStmt, ExecuteStmt))
     852              :         {
     853         3374 :             query->queryId = INT64CONST(0);
     854         3374 :             return;
     855              :         }
     856              :     }
     857              : 
     858              :     /*
     859              :      * If query jumbling were able to identify any ignorable constants, we
     860              :      * immediately create a hash table entry for the query, so that we can
     861              :      * record the normalized form of the query string.  If there were no such
     862              :      * constants, the normalized string would be the same as the query text
     863              :      * anyway, so there's no need for an early entry.
     864              :      */
     865        67498 :     if (jstate && jstate->clocations_count > 0)
     866        38915 :         pgss_store(pstate->p_sourcetext,
     867              :                    query->queryId,
     868              :                    query->stmt_location,
     869              :                    query->stmt_len,
     870              :                    PGSS_INVALID,
     871              :                    0,
     872              :                    0,
     873              :                    NULL,
     874              :                    NULL,
     875              :                    NULL,
     876              :                    jstate,
     877              :                    0,
     878              :                    0,
     879              :                    PLAN_STMT_UNKNOWN);
     880              : }
     881              : 
     882              : /*
     883              :  * Planner hook: forward to regular planner, but measure planning time
     884              :  * if needed.
     885              :  */
     886              : static PlannedStmt *
     887        50329 : pgss_planner(Query *parse,
     888              :              const char *query_string,
     889              :              int cursorOptions,
     890              :              ParamListInfo boundParams,
     891              :              ExplainState *es)
     892              : {
     893              :     PlannedStmt *result;
     894              : 
     895              :     /*
     896              :      * We can't process the query if no query_string is provided, as
     897              :      * pgss_store needs it.  We also ignore query without queryid, as it would
     898              :      * be treated as a utility statement, which may not be the case.
     899              :      */
     900        50329 :     if (pgss_enabled(nesting_level)
     901        39223 :         && pgss_track_planning && query_string
     902          150 :         && parse->queryId != INT64CONST(0))
     903          150 :     {
     904              :         instr_time  start;
     905              :         instr_time  duration;
     906              :         BufferUsage bufusage_start,
     907              :                     bufusage;
     908              :         WalUsage    walusage_start,
     909              :                     walusage;
     910              : 
     911              :         /* We need to track buffer usage as the planner can access them. */
     912          150 :         bufusage_start = pgBufferUsage;
     913              : 
     914              :         /*
     915              :          * Similarly the planner could write some WAL records in some cases
     916              :          * (e.g. setting a hint bit with those being WAL-logged)
     917              :          */
     918          150 :         walusage_start = pgWalUsage;
     919          150 :         INSTR_TIME_SET_CURRENT(start);
     920              : 
     921          150 :         nesting_level++;
     922          150 :         PG_TRY();
     923              :         {
     924          150 :             if (prev_planner_hook)
     925            0 :                 result = prev_planner_hook(parse, query_string, cursorOptions,
     926              :                                            boundParams, es);
     927              :             else
     928          150 :                 result = standard_planner(parse, query_string, cursorOptions,
     929              :                                           boundParams, es);
     930              :         }
     931            0 :         PG_FINALLY();
     932              :         {
     933          150 :             nesting_level--;
     934              :         }
     935          150 :         PG_END_TRY();
     936              : 
     937          150 :         INSTR_TIME_SET_CURRENT(duration);
     938          150 :         INSTR_TIME_SUBTRACT(duration, start);
     939              : 
     940              :         /* calc differences of buffer counters. */
     941          150 :         memset(&bufusage, 0, sizeof(BufferUsage));
     942          150 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
     943              : 
     944              :         /* calc differences of WAL counters. */
     945          150 :         memset(&walusage, 0, sizeof(WalUsage));
     946          150 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
     947              : 
     948          300 :         pgss_store(query_string,
     949              :                    parse->queryId,
     950              :                    parse->stmt_location,
     951              :                    parse->stmt_len,
     952              :                    PGSS_PLAN,
     953          150 :                    INSTR_TIME_GET_MILLISEC(duration),
     954              :                    0,
     955              :                    &bufusage,
     956              :                    &walusage,
     957              :                    NULL,
     958              :                    NULL,
     959              :                    0,
     960              :                    0,
     961              :                    result->planOrigin);
     962              :     }
     963              :     else
     964              :     {
     965              :         /*
     966              :          * Even though we're not tracking plan time for this statement, we
     967              :          * must still increment the nesting level, to ensure that functions
     968              :          * evaluated during planning are not seen as top-level calls.
     969              :          */
     970        50179 :         nesting_level++;
     971        50179 :         PG_TRY();
     972              :         {
     973        50179 :             if (prev_planner_hook)
     974            0 :                 result = prev_planner_hook(parse, query_string, cursorOptions,
     975              :                                            boundParams, es);
     976              :             else
     977        50179 :                 result = standard_planner(parse, query_string, cursorOptions,
     978              :                                           boundParams, es);
     979              :         }
     980          771 :         PG_FINALLY();
     981              :         {
     982        50179 :             nesting_level--;
     983              :         }
     984        50179 :         PG_END_TRY();
     985              :     }
     986              : 
     987        49558 :     return result;
     988              : }
     989              : 
     990              : /*
     991              :  * ExecutorStart hook: start up tracking if needed
     992              :  */
     993              : static void
     994        60438 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
     995              : {
     996              :     /*
     997              :      * If query has queryId zero, don't track it.  This prevents double
     998              :      * counting of optimizable statements that are directly contained in
     999              :      * utility statements.
    1000              :      */
    1001        60438 :     if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
    1002              :     {
    1003              :         /* Request all summary instrumentation, i.e. timing, buffers and WAL */
    1004        41417 :         queryDesc->query_instr_options |= INSTRUMENT_ALL;
    1005              :     }
    1006              : 
    1007        60438 :     if (prev_ExecutorStart)
    1008            0 :         prev_ExecutorStart(queryDesc, eflags);
    1009              :     else
    1010        60438 :         standard_ExecutorStart(queryDesc, eflags);
    1011        60156 : }
    1012              : 
    1013              : /*
    1014              :  * ExecutorRun hook: all we need do is track nesting depth
    1015              :  */
    1016              : static void
    1017        58821 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
    1018              : {
    1019        58821 :     nesting_level++;
    1020        58821 :     PG_TRY();
    1021              :     {
    1022        58821 :         if (prev_ExecutorRun)
    1023            0 :             prev_ExecutorRun(queryDesc, direction, count);
    1024              :         else
    1025        58821 :             standard_ExecutorRun(queryDesc, direction, count);
    1026              :     }
    1027         3414 :     PG_FINALLY();
    1028              :     {
    1029        58821 :         nesting_level--;
    1030              :     }
    1031        58821 :     PG_END_TRY();
    1032        55407 : }
    1033              : 
    1034              : /*
    1035              :  * ExecutorFinish hook: all we need do is track nesting depth
    1036              :  */
    1037              : static void
    1038        53346 : pgss_ExecutorFinish(QueryDesc *queryDesc)
    1039              : {
    1040        53346 :     nesting_level++;
    1041        53346 :     PG_TRY();
    1042              :     {
    1043        53346 :         if (prev_ExecutorFinish)
    1044            0 :             prev_ExecutorFinish(queryDesc);
    1045              :         else
    1046        53346 :             standard_ExecutorFinish(queryDesc);
    1047              :     }
    1048          179 :     PG_FINALLY();
    1049              :     {
    1050        53346 :         nesting_level--;
    1051              :     }
    1052        53346 :     PG_END_TRY();
    1053        53167 : }
    1054              : 
    1055              : /*
    1056              :  * ExecutorEnd hook: store results if needed
    1057              :  */
    1058              : static void
    1059        56237 : pgss_ExecutorEnd(QueryDesc *queryDesc)
    1060              : {
    1061        56237 :     int64       queryId = queryDesc->plannedstmt->queryId;
    1062              : 
    1063        56237 :     if (queryId != INT64CONST(0) && queryDesc->query_instr &&
    1064        39469 :         pgss_enabled(nesting_level))
    1065              :     {
    1066        39469 :         pgss_store(queryDesc->sourceText,
    1067              :                    queryId,
    1068        39469 :                    queryDesc->plannedstmt->stmt_location,
    1069        39469 :                    queryDesc->plannedstmt->stmt_len,
    1070              :                    PGSS_EXEC,
    1071        39469 :                    INSTR_TIME_GET_MILLISEC(queryDesc->query_instr->total),
    1072        39469 :                    queryDesc->estate->es_total_processed,
    1073        39469 :                    &queryDesc->query_instr->bufusage,
    1074        39469 :                    &queryDesc->query_instr->walusage,
    1075            0 :                    queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
    1076              :                    NULL,
    1077        39469 :                    queryDesc->estate->es_parallel_workers_to_launch,
    1078        39469 :                    queryDesc->estate->es_parallel_workers_launched,
    1079        39469 :                    queryDesc->plannedstmt->planOrigin);
    1080              :     }
    1081              : 
    1082        56237 :     if (prev_ExecutorEnd)
    1083            0 :         prev_ExecutorEnd(queryDesc);
    1084              :     else
    1085        56237 :         standard_ExecutorEnd(queryDesc);
    1086        56237 : }
    1087              : 
    1088              : /*
    1089              :  * ProcessUtility hook
    1090              :  */
    1091              : static void
    1092        37433 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
    1093              :                     bool readOnlyTree,
    1094              :                     ProcessUtilityContext context,
    1095              :                     ParamListInfo params, QueryEnvironment *queryEnv,
    1096              :                     DestReceiver *dest, QueryCompletion *qc)
    1097              : {
    1098        37433 :     Node       *parsetree = pstmt->utilityStmt;
    1099        37433 :     int64       saved_queryId = pstmt->queryId;
    1100        37433 :     int         saved_stmt_location = pstmt->stmt_location;
    1101        37433 :     int         saved_stmt_len = pstmt->stmt_len;
    1102        37433 :     bool        enabled = pgss_track_utility && pgss_enabled(nesting_level);
    1103              : 
    1104              :     /*
    1105              :      * Force utility statements to get queryId zero.  We do this even in cases
    1106              :      * where the statement contains an optimizable statement for which a
    1107              :      * queryId could be derived (such as EXPLAIN or DECLARE CURSOR).  For such
    1108              :      * cases, runtime control will first go through ProcessUtility and then
    1109              :      * the executor, and we don't want the executor hooks to do anything,
    1110              :      * since we are already measuring the statement's costs at the utility
    1111              :      * level.
    1112              :      *
    1113              :      * Note that this is only done if pg_stat_statements is enabled and
    1114              :      * configured to track utility statements, in the unlikely possibility
    1115              :      * that user configured another extension to handle utility statements
    1116              :      * only.
    1117              :      */
    1118        37433 :     if (enabled)
    1119        31686 :         pstmt->queryId = INT64CONST(0);
    1120              : 
    1121              :     /*
    1122              :      * If it's an EXECUTE statement, we don't track it and don't increment the
    1123              :      * nesting level.  This allows the cycles to be charged to the underlying
    1124              :      * PREPARE instead (by the Executor hooks), which is much more useful.
    1125              :      *
    1126              :      * We also don't track execution of PREPARE.  If we did, we would get one
    1127              :      * hash table entry for the PREPARE (with hash calculated from the query
    1128              :      * string), and then a different one with the same query string (but hash
    1129              :      * calculated from the query tree) would be used to accumulate costs of
    1130              :      * ensuing EXECUTEs.  This would be confusing.  Since PREPARE doesn't
    1131              :      * actually run the planner (only parse+rewrite), its costs are generally
    1132              :      * pretty negligible and it seems okay to just ignore it.
    1133              :      */
    1134        37433 :     if (enabled &&
    1135        31686 :         !IsA(parsetree, ExecuteStmt) &&
    1136        28318 :         !IsA(parsetree, PrepareStmt))
    1137        25532 :     {
    1138              :         instr_time  start;
    1139              :         instr_time  duration;
    1140              :         uint64      rows;
    1141              :         BufferUsage bufusage_start,
    1142              :                     bufusage;
    1143              :         WalUsage    walusage_start,
    1144              :                     walusage;
    1145              : 
    1146        28190 :         bufusage_start = pgBufferUsage;
    1147        28190 :         walusage_start = pgWalUsage;
    1148        28190 :         INSTR_TIME_SET_CURRENT(start);
    1149              : 
    1150        28190 :         nesting_level++;
    1151        28190 :         PG_TRY();
    1152              :         {
    1153        28190 :             if (prev_ProcessUtility)
    1154            0 :                 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1155              :                                     context, params, queryEnv,
    1156              :                                     dest, qc);
    1157              :             else
    1158        28190 :                 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1159              :                                         context, params, queryEnv,
    1160              :                                         dest, qc);
    1161              :         }
    1162         2658 :         PG_FINALLY();
    1163              :         {
    1164        28190 :             nesting_level--;
    1165              :         }
    1166        28190 :         PG_END_TRY();
    1167              : 
    1168              :         /*
    1169              :          * CAUTION: do not access the *pstmt data structure again below here.
    1170              :          * If it was a ROLLBACK or similar, that data structure may have been
    1171              :          * freed.  We must copy everything we still need into local variables,
    1172              :          * which we did above.
    1173              :          *
    1174              :          * For the same reason, we can't risk restoring pstmt->queryId to its
    1175              :          * former value, which'd otherwise be a good idea.
    1176              :          */
    1177              : 
    1178        25532 :         INSTR_TIME_SET_CURRENT(duration);
    1179        25532 :         INSTR_TIME_SUBTRACT(duration, start);
    1180              : 
    1181              :         /*
    1182              :          * Track the total number of rows retrieved or affected by the utility
    1183              :          * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
    1184              :          * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
    1185              :          */
    1186        25529 :         rows = (qc && (qc->commandTag == CMDTAG_COPY ||
    1187        23726 :                        qc->commandTag == CMDTAG_FETCH ||
    1188        23466 :                        qc->commandTag == CMDTAG_SELECT ||
    1189        23273 :                        qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
    1190        51061 :             qc->nprocessed : 0;
    1191              : 
    1192              :         /* calc differences of buffer counters. */
    1193        25532 :         memset(&bufusage, 0, sizeof(BufferUsage));
    1194        25532 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
    1195              : 
    1196              :         /* calc differences of WAL counters. */
    1197        25532 :         memset(&walusage, 0, sizeof(WalUsage));
    1198        25532 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
    1199              : 
    1200        51064 :         pgss_store(queryString,
    1201              :                    saved_queryId,
    1202              :                    saved_stmt_location,
    1203              :                    saved_stmt_len,
    1204              :                    PGSS_EXEC,
    1205        25532 :                    INSTR_TIME_GET_MILLISEC(duration),
    1206              :                    rows,
    1207              :                    &bufusage,
    1208              :                    &walusage,
    1209              :                    NULL,
    1210              :                    NULL,
    1211              :                    0,
    1212              :                    0,
    1213              :                    pstmt->planOrigin);
    1214              :     }
    1215              :     else
    1216              :     {
    1217              :         /*
    1218              :          * Even though we're not tracking execution time for this statement,
    1219              :          * we must still increment the nesting level, to ensure that functions
    1220              :          * evaluated within it are not seen as top-level calls.  But don't do
    1221              :          * so for EXECUTE; that way, when control reaches pgss_planner or
    1222              :          * pgss_ExecutorStart, we will treat the costs as top-level if
    1223              :          * appropriate.  Likewise, don't bump for PREPARE, so that parse
    1224              :          * analysis will treat the statement as top-level if appropriate.
    1225              :          *
    1226              :          * To be absolutely certain we don't mess up the nesting level,
    1227              :          * evaluate the bump_level condition just once.
    1228              :          */
    1229         9243 :         bool        bump_level =
    1230        15117 :             !IsA(parsetree, ExecuteStmt) &&
    1231         5874 :             !IsA(parsetree, PrepareStmt);
    1232              : 
    1233         9243 :         if (bump_level)
    1234         5745 :             nesting_level++;
    1235         9243 :         PG_TRY();
    1236              :         {
    1237         9243 :             if (prev_ProcessUtility)
    1238            0 :                 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1239              :                                     context, params, queryEnv,
    1240              :                                     dest, qc);
    1241              :             else
    1242         9243 :                 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1243              :                                         context, params, queryEnv,
    1244              :                                         dest, qc);
    1245              :         }
    1246          138 :         PG_FINALLY();
    1247              :         {
    1248         9243 :             if (bump_level)
    1249         5745 :                 nesting_level--;
    1250              :         }
    1251         9243 :         PG_END_TRY();
    1252              :     }
    1253        34637 : }
    1254              : 
    1255              : /*
    1256              :  * Store some statistics for a statement.
    1257              :  *
    1258              :  * If jstate is not NULL then we're trying to create an entry for which
    1259              :  * we have no statistics as yet; we just want to record the normalized
    1260              :  * query string.  total_time, rows, bufusage and walusage are ignored in this
    1261              :  * case.
    1262              :  *
    1263              :  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
    1264              :  * for the arrays in the Counters field.
    1265              :  */
    1266              : static void
    1267       104066 : pgss_store(const char *query, int64 queryId,
    1268              :            int query_location, int query_len,
    1269              :            pgssStoreKind kind,
    1270              :            double total_time, uint64 rows,
    1271              :            const BufferUsage *bufusage,
    1272              :            const WalUsage *walusage,
    1273              :            const struct JitInstrumentation *jitusage,
    1274              :            const JumbleState *jstate,
    1275              :            int parallel_workers_to_launch,
    1276              :            int parallel_workers_launched,
    1277              :            PlannedStmtOrigin planOrigin)
    1278              : {
    1279              :     pgssHashKey key;
    1280              :     pgssEntry  *entry;
    1281       104066 :     char       *norm_query = NULL;
    1282       104066 :     int         encoding = GetDatabaseEncoding();
    1283              : 
    1284              :     Assert(query != NULL);
    1285              : 
    1286              :     /* Safety check... */
    1287       104066 :     if (!pgss || !pgss_hash)
    1288            0 :         return;
    1289              : 
    1290              :     /*
    1291              :      * Nothing to do if compute_query_id isn't enabled and no other module
    1292              :      * computed a query identifier.
    1293              :      */
    1294       104066 :     if (queryId == INT64CONST(0))
    1295            0 :         return;
    1296              : 
    1297              :     /*
    1298              :      * Confine our attention to the relevant part of the string, if the query
    1299              :      * is a portion of a multi-statement source string, and update query
    1300              :      * location and length if needed.
    1301              :      */
    1302       104066 :     query = CleanQuerytext(query, &query_location, &query_len);
    1303              : 
    1304              :     /* Set up key for hashtable search */
    1305              : 
    1306              :     /* clear padding */
    1307       104066 :     memset(&key, 0, sizeof(pgssHashKey));
    1308              : 
    1309       104066 :     key.userid = GetUserId();
    1310       104066 :     key.dbid = MyDatabaseId;
    1311       104066 :     key.queryid = queryId;
    1312       104066 :     key.toplevel = (nesting_level == 0);
    1313              : 
    1314              :     /* Lookup the hash table entry with shared lock. */
    1315       104066 :     LWLockAcquire(&pgss->lock.lock, LW_SHARED);
    1316              : 
    1317       104066 :     entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    1318              : 
    1319              :     /* Create new entry, if not present */
    1320       104066 :     if (!entry)
    1321              :     {
    1322              :         Size        query_offset;
    1323              :         int         gc_count;
    1324              :         bool        stored;
    1325              :         bool        do_gc;
    1326              : 
    1327              :         /*
    1328              :          * Create a new, normalized query string if caller asked.  We don't
    1329              :          * need to hold the lock while doing this work.  (Note: in any case,
    1330              :          * it's possible that someone else creates a duplicate hashtable entry
    1331              :          * in the interval where we don't hold the lock below.  That case is
    1332              :          * handled by entry_alloc.)
    1333              :          */
    1334        30775 :         if (jstate)
    1335              :         {
    1336        11403 :             LWLockRelease(&pgss->lock.lock);
    1337        11403 :             norm_query = generate_normalized_query(jstate, query,
    1338              :                                                    query_location,
    1339              :                                                    &query_len);
    1340        11403 :             LWLockAcquire(&pgss->lock.lock, LW_SHARED);
    1341              :         }
    1342              : 
    1343              :         /* Append new query text to file with only shared lock held */
    1344        30775 :         stored = qtext_store(norm_query ? norm_query : query, query_len,
    1345              :                              &query_offset, &gc_count);
    1346              : 
    1347              :         /*
    1348              :          * Determine whether we need to garbage collect external query texts
    1349              :          * while the shared lock is still held.  This micro-optimization
    1350              :          * avoids taking the time to decide this while holding exclusive lock.
    1351              :          */
    1352        30775 :         do_gc = need_gc_qtexts();
    1353              : 
    1354              :         /* Need exclusive lock to make a new hashtable entry - promote */
    1355        30775 :         LWLockRelease(&pgss->lock.lock);
    1356        30775 :         LWLockAcquire(&pgss->lock.lock, LW_EXCLUSIVE);
    1357              : 
    1358              :         /*
    1359              :          * A garbage collection may have occurred while we weren't holding the
    1360              :          * lock.  In the unlikely event that this happens, the query text we
    1361              :          * stored above will have been garbage collected, so write it again.
    1362              :          * This should be infrequent enough that doing it while holding
    1363              :          * exclusive lock isn't a performance problem.
    1364              :          */
    1365        30775 :         if (!stored || pgss->gc_count != gc_count)
    1366            0 :             stored = qtext_store(norm_query ? norm_query : query, query_len,
    1367              :                                  &query_offset, NULL);
    1368              : 
    1369              :         /* If we failed to write to the text file, give up */
    1370        30775 :         if (!stored)
    1371            0 :             goto done;
    1372              : 
    1373              :         /* OK to create a new hashtable entry */
    1374        30775 :         entry = entry_alloc(&key, query_offset, query_len, encoding,
    1375              :                             jstate != NULL);
    1376              : 
    1377              :         /* If needed, perform garbage collection while exclusive lock held */
    1378        30775 :         if (do_gc)
    1379            0 :             gc_qtexts();
    1380              :     }
    1381              : 
    1382              :     /* Increment the counts, except when jstate is not NULL */
    1383       104066 :     if (!jstate)
    1384              :     {
    1385              :         Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
    1386              : 
    1387              :         /*
    1388              :          * Grab the spinlock while updating the counters (see comment about
    1389              :          * locking rules at the head of the file)
    1390              :          */
    1391        65151 :         SpinLockAcquire(&entry->mutex);
    1392              : 
    1393              :         /* "Unstick" entry if it was previously sticky */
    1394        65151 :         if (IS_STICKY(entry->counters))
    1395        29958 :             entry->counters.usage = USAGE_INIT;
    1396              : 
    1397        65151 :         entry->counters.calls[kind] += 1;
    1398        65151 :         entry->counters.total_time[kind] += total_time;
    1399              : 
    1400        65151 :         if (entry->counters.calls[kind] == 1)
    1401              :         {
    1402        30052 :             entry->counters.min_time[kind] = total_time;
    1403        30052 :             entry->counters.max_time[kind] = total_time;
    1404        30052 :             entry->counters.mean_time[kind] = total_time;
    1405              :         }
    1406              :         else
    1407              :         {
    1408              :             /*
    1409              :              * Welford's method for accurately computing variance. See
    1410              :              * <http://www.johndcook.com/blog/standard_deviation/>
    1411              :              */
    1412        35099 :             double      old_mean = entry->counters.mean_time[kind];
    1413              : 
    1414        35099 :             entry->counters.mean_time[kind] +=
    1415        35099 :                 (total_time - old_mean) / entry->counters.calls[kind];
    1416        35099 :             entry->counters.sum_var_time[kind] +=
    1417        35099 :                 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
    1418              : 
    1419              :             /*
    1420              :              * Calculate min and max time. min = 0 and max = 0 means that the
    1421              :              * min/max statistics were reset
    1422              :              */
    1423        35099 :             if (entry->counters.min_time[kind] == 0
    1424            6 :                 && entry->counters.max_time[kind] == 0)
    1425              :             {
    1426            3 :                 entry->counters.min_time[kind] = total_time;
    1427            3 :                 entry->counters.max_time[kind] = total_time;
    1428              :             }
    1429              :             else
    1430              :             {
    1431        35096 :                 if (entry->counters.min_time[kind] > total_time)
    1432         6618 :                     entry->counters.min_time[kind] = total_time;
    1433        35096 :                 if (entry->counters.max_time[kind] < total_time)
    1434         3588 :                     entry->counters.max_time[kind] = total_time;
    1435              :             }
    1436              :         }
    1437        65151 :         entry->counters.rows += rows;
    1438        65151 :         entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
    1439        65151 :         entry->counters.shared_blks_read += bufusage->shared_blks_read;
    1440        65151 :         entry->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
    1441        65151 :         entry->counters.shared_blks_written += bufusage->shared_blks_written;
    1442        65151 :         entry->counters.local_blks_hit += bufusage->local_blks_hit;
    1443        65151 :         entry->counters.local_blks_read += bufusage->local_blks_read;
    1444        65151 :         entry->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
    1445        65151 :         entry->counters.local_blks_written += bufusage->local_blks_written;
    1446        65151 :         entry->counters.temp_blks_read += bufusage->temp_blks_read;
    1447        65151 :         entry->counters.temp_blks_written += bufusage->temp_blks_written;
    1448        65151 :         entry->counters.shared_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_read_time);
    1449        65151 :         entry->counters.shared_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_write_time);
    1450        65151 :         entry->counters.local_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_read_time);
    1451        65151 :         entry->counters.local_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_write_time);
    1452        65151 :         entry->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
    1453        65151 :         entry->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
    1454        65151 :         entry->counters.usage += USAGE_EXEC(total_time);
    1455        65151 :         entry->counters.wal_records += walusage->wal_records;
    1456        65151 :         entry->counters.wal_fpi += walusage->wal_fpi;
    1457        65151 :         entry->counters.wal_bytes += walusage->wal_bytes;
    1458        65151 :         entry->counters.wal_buffers_full += walusage->wal_buffers_full;
    1459        65151 :         if (jitusage)
    1460              :         {
    1461            0 :             entry->counters.jit_functions += jitusage->created_functions;
    1462            0 :             entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
    1463              : 
    1464            0 :             if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
    1465            0 :                 entry->counters.jit_deform_count++;
    1466            0 :             entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
    1467              : 
    1468            0 :             if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
    1469            0 :                 entry->counters.jit_inlining_count++;
    1470            0 :             entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
    1471              : 
    1472            0 :             if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
    1473            0 :                 entry->counters.jit_optimization_count++;
    1474            0 :             entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
    1475              : 
    1476            0 :             if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
    1477            0 :                 entry->counters.jit_emission_count++;
    1478            0 :             entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
    1479              :         }
    1480              : 
    1481              :         /* parallel worker counters */
    1482        65151 :         entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
    1483        65151 :         entry->counters.parallel_workers_launched += parallel_workers_launched;
    1484              : 
    1485              :         /* plan cache counters */
    1486        65151 :         if (planOrigin == PLAN_STMT_CACHE_GENERIC)
    1487         3151 :             entry->counters.generic_plan_calls++;
    1488        62000 :         else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
    1489          375 :             entry->counters.custom_plan_calls++;
    1490              : 
    1491        65151 :         SpinLockRelease(&entry->mutex);
    1492              :     }
    1493              : 
    1494        38915 : done:
    1495       104066 :     LWLockRelease(&pgss->lock.lock);
    1496              : 
    1497              :     /* We postpone this clean-up until we're out of the lock */
    1498       104066 :     if (norm_query)
    1499        11403 :         pfree(norm_query);
    1500              : }
    1501              : 
    1502              : /*
    1503              :  * Reset statement statistics corresponding to userid, dbid, and queryid.
    1504              :  */
    1505              : Datum
    1506            1 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
    1507              : {
    1508              :     Oid         userid;
    1509              :     Oid         dbid;
    1510              :     int64       queryid;
    1511              : 
    1512            1 :     userid = PG_GETARG_OID(0);
    1513            1 :     dbid = PG_GETARG_OID(1);
    1514            1 :     queryid = PG_GETARG_INT64(2);
    1515              : 
    1516            1 :     entry_reset(userid, dbid, queryid, false);
    1517              : 
    1518            1 :     PG_RETURN_VOID();
    1519              : }
    1520              : 
    1521              : Datum
    1522          119 : pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
    1523              : {
    1524              :     Oid         userid;
    1525              :     Oid         dbid;
    1526              :     int64       queryid;
    1527              :     bool        minmax_only;
    1528              : 
    1529          119 :     userid = PG_GETARG_OID(0);
    1530          119 :     dbid = PG_GETARG_OID(1);
    1531          119 :     queryid = PG_GETARG_INT64(2);
    1532          119 :     minmax_only = PG_GETARG_BOOL(3);
    1533              : 
    1534          119 :     PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
    1535              : }
    1536              : 
    1537              : /*
    1538              :  * Reset statement statistics.
    1539              :  */
    1540              : Datum
    1541            1 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
    1542              : {
    1543            1 :     entry_reset(0, 0, 0, false);
    1544              : 
    1545            1 :     PG_RETURN_VOID();
    1546              : }
    1547              : 
    1548              : /* Number of output arguments (columns) for various API versions */
    1549              : #define PG_STAT_STATEMENTS_COLS_V1_0    14
    1550              : #define PG_STAT_STATEMENTS_COLS_V1_1    18
    1551              : #define PG_STAT_STATEMENTS_COLS_V1_2    19
    1552              : #define PG_STAT_STATEMENTS_COLS_V1_3    23
    1553              : #define PG_STAT_STATEMENTS_COLS_V1_8    32
    1554              : #define PG_STAT_STATEMENTS_COLS_V1_9    33
    1555              : #define PG_STAT_STATEMENTS_COLS_V1_10   43
    1556              : #define PG_STAT_STATEMENTS_COLS_V1_11   49
    1557              : #define PG_STAT_STATEMENTS_COLS_V1_12   52
    1558              : #define PG_STAT_STATEMENTS_COLS_V1_13   54
    1559              : #define PG_STAT_STATEMENTS_COLS         54  /* maximum of above */
    1560              : 
    1561              : /*
    1562              :  * Retrieve statement statistics.
    1563              :  *
    1564              :  * The SQL API of this function has changed multiple times, and will likely
    1565              :  * do so again in future.  To support the case where a newer version of this
    1566              :  * loadable module is being used with an old SQL declaration of the function,
    1567              :  * we continue to support the older API versions.  For 1.2 and later, the
    1568              :  * expected API version is identified by embedding it in the C name of the
    1569              :  * function.  Unfortunately we weren't bright enough to do that for 1.1.
    1570              :  */
    1571              : Datum
    1572          129 : pg_stat_statements_1_13(PG_FUNCTION_ARGS)
    1573              : {
    1574          129 :     bool        showtext = PG_GETARG_BOOL(0);
    1575              : 
    1576          129 :     pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext);
    1577              : 
    1578          129 :     return (Datum) 0;
    1579              : }
    1580              : 
    1581              : Datum
    1582            1 : pg_stat_statements_1_12(PG_FUNCTION_ARGS)
    1583              : {
    1584            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1585              : 
    1586            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
    1587              : 
    1588            1 :     return (Datum) 0;
    1589              : }
    1590              : 
    1591              : Datum
    1592            1 : pg_stat_statements_1_11(PG_FUNCTION_ARGS)
    1593              : {
    1594            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1595              : 
    1596            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
    1597              : 
    1598            1 :     return (Datum) 0;
    1599              : }
    1600              : 
    1601              : Datum
    1602            1 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
    1603              : {
    1604            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1605              : 
    1606            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
    1607              : 
    1608            1 :     return (Datum) 0;
    1609              : }
    1610              : 
    1611              : Datum
    1612            1 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
    1613              : {
    1614            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1615              : 
    1616            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
    1617              : 
    1618            1 :     return (Datum) 0;
    1619              : }
    1620              : 
    1621              : Datum
    1622            1 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
    1623              : {
    1624            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1625              : 
    1626            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
    1627              : 
    1628            1 :     return (Datum) 0;
    1629              : }
    1630              : 
    1631              : Datum
    1632            1 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
    1633              : {
    1634            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1635              : 
    1636            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
    1637              : 
    1638            1 :     return (Datum) 0;
    1639              : }
    1640              : 
    1641              : Datum
    1642            0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
    1643              : {
    1644            0 :     bool        showtext = PG_GETARG_BOOL(0);
    1645              : 
    1646            0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
    1647              : 
    1648            0 :     return (Datum) 0;
    1649              : }
    1650              : 
    1651              : /*
    1652              :  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
    1653              :  * This can be removed someday, perhaps.
    1654              :  */
    1655              : Datum
    1656            0 : pg_stat_statements(PG_FUNCTION_ARGS)
    1657              : {
    1658              :     /* If it's really API 1.1, we'll figure that out below */
    1659            0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
    1660              : 
    1661            0 :     return (Datum) 0;
    1662              : }
    1663              : 
    1664              : /* Common code for all versions of pg_stat_statements() */
    1665              : static void
    1666          135 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
    1667              :                             pgssVersion api_version,
    1668              :                             bool showtext)
    1669              : {
    1670          135 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
    1671          135 :     Oid         userid = GetUserId();
    1672          135 :     bool        is_allowed_role = false;
    1673          135 :     char       *qbuffer = NULL;
    1674          135 :     Size        qbuffer_size = 0;
    1675          135 :     Size        extent = 0;
    1676          135 :     int         gc_count = 0;
    1677              :     HASH_SEQ_STATUS hash_seq;
    1678              :     pgssEntry  *entry;
    1679              : 
    1680              :     /*
    1681              :      * Superusers or roles with the privileges of pg_read_all_stats members
    1682              :      * are allowed
    1683              :      */
    1684          135 :     is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
    1685              : 
    1686              :     /* hash table must exist already */
    1687          135 :     if (!pgss || !pgss_hash)
    1688            0 :         ereport(ERROR,
    1689              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1690              :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    1691              : 
    1692          135 :     InitMaterializedSRF(fcinfo, 0);
    1693              : 
    1694              :     /*
    1695              :      * Check we have the expected number of output arguments.  Aside from
    1696              :      * being a good safety check, we need a kluge here to detect API version
    1697              :      * 1.1, which was wedged into the code in an ill-considered way.
    1698              :      */
    1699          135 :     switch (rsinfo->setDesc->natts)
    1700              :     {
    1701            0 :         case PG_STAT_STATEMENTS_COLS_V1_0:
    1702            0 :             if (api_version != PGSS_V1_0)
    1703            0 :                 elog(ERROR, "incorrect number of output arguments");
    1704            0 :             break;
    1705            0 :         case PG_STAT_STATEMENTS_COLS_V1_1:
    1706              :             /* pg_stat_statements() should have told us 1.0 */
    1707            0 :             if (api_version != PGSS_V1_0)
    1708            0 :                 elog(ERROR, "incorrect number of output arguments");
    1709            0 :             api_version = PGSS_V1_1;
    1710            0 :             break;
    1711            0 :         case PG_STAT_STATEMENTS_COLS_V1_2:
    1712            0 :             if (api_version != PGSS_V1_2)
    1713            0 :                 elog(ERROR, "incorrect number of output arguments");
    1714            0 :             break;
    1715            1 :         case PG_STAT_STATEMENTS_COLS_V1_3:
    1716            1 :             if (api_version != PGSS_V1_3)
    1717            0 :                 elog(ERROR, "incorrect number of output arguments");
    1718            1 :             break;
    1719            1 :         case PG_STAT_STATEMENTS_COLS_V1_8:
    1720            1 :             if (api_version != PGSS_V1_8)
    1721            0 :                 elog(ERROR, "incorrect number of output arguments");
    1722            1 :             break;
    1723            1 :         case PG_STAT_STATEMENTS_COLS_V1_9:
    1724            1 :             if (api_version != PGSS_V1_9)
    1725            0 :                 elog(ERROR, "incorrect number of output arguments");
    1726            1 :             break;
    1727            1 :         case PG_STAT_STATEMENTS_COLS_V1_10:
    1728            1 :             if (api_version != PGSS_V1_10)
    1729            0 :                 elog(ERROR, "incorrect number of output arguments");
    1730            1 :             break;
    1731            1 :         case PG_STAT_STATEMENTS_COLS_V1_11:
    1732            1 :             if (api_version != PGSS_V1_11)
    1733            0 :                 elog(ERROR, "incorrect number of output arguments");
    1734            1 :             break;
    1735            1 :         case PG_STAT_STATEMENTS_COLS_V1_12:
    1736            1 :             if (api_version != PGSS_V1_12)
    1737            0 :                 elog(ERROR, "incorrect number of output arguments");
    1738            1 :             break;
    1739          129 :         case PG_STAT_STATEMENTS_COLS_V1_13:
    1740          129 :             if (api_version != PGSS_V1_13)
    1741            0 :                 elog(ERROR, "incorrect number of output arguments");
    1742          129 :             break;
    1743            0 :         default:
    1744            0 :             elog(ERROR, "incorrect number of output arguments");
    1745              :     }
    1746              : 
    1747              :     /*
    1748              :      * We'd like to load the query text file (if needed) while not holding any
    1749              :      * lock on pgss->lock.  In the worst case we'll have to do this again
    1750              :      * after we have the lock, but it's unlikely enough to make this a win
    1751              :      * despite occasional duplicated work.  We need to reload if anybody
    1752              :      * writes to the file (either a retail qtext_store(), or a garbage
    1753              :      * collection) between this point and where we've gotten shared lock.  If
    1754              :      * a qtext_store is actually in progress when we look, we might as well
    1755              :      * skip the speculative load entirely.
    1756              :      */
    1757          135 :     if (showtext)
    1758              :     {
    1759              :         int         n_writers;
    1760              : 
    1761              :         /* Take the mutex so we can examine variables */
    1762          135 :         SpinLockAcquire(&pgss->mutex);
    1763          135 :         extent = pgss->extent;
    1764          135 :         n_writers = pgss->n_writers;
    1765          135 :         gc_count = pgss->gc_count;
    1766          135 :         SpinLockRelease(&pgss->mutex);
    1767              : 
    1768              :         /* No point in loading file now if there are active writers */
    1769          135 :         if (n_writers == 0)
    1770          135 :             qbuffer = qtext_load_file(&qbuffer_size);
    1771              :     }
    1772              : 
    1773              :     /*
    1774              :      * Get shared lock, load or reload the query text file if we must, and
    1775              :      * iterate over the hashtable entries.
    1776              :      *
    1777              :      * With a large hash table, we might be holding the lock rather longer
    1778              :      * than one could wish.  However, this only blocks creation of new hash
    1779              :      * table entries, and the larger the hash table the less likely that is to
    1780              :      * be needed.  So we can hope this is okay.  Perhaps someday we'll decide
    1781              :      * we need to partition the hash table to limit the time spent holding any
    1782              :      * one lock.
    1783              :      */
    1784          135 :     LWLockAcquire(&pgss->lock.lock, LW_SHARED);
    1785              : 
    1786          135 :     if (showtext)
    1787              :     {
    1788              :         /*
    1789              :          * Here it is safe to examine extent and gc_count without taking the
    1790              :          * mutex.  Note that although other processes might change
    1791              :          * pgss->extent just after we look at it, the strings they then write
    1792              :          * into the file cannot yet be referenced in the hashtable, so we
    1793              :          * don't care whether we see them or not.
    1794              :          *
    1795              :          * If qtext_load_file fails, we just press on; we'll return NULL for
    1796              :          * every query text.
    1797              :          */
    1798          135 :         if (qbuffer == NULL ||
    1799          135 :             pgss->extent != extent ||
    1800          135 :             pgss->gc_count != gc_count)
    1801              :         {
    1802            0 :             if (qbuffer)
    1803            0 :                 pfree(qbuffer);
    1804            0 :             qbuffer = qtext_load_file(&qbuffer_size);
    1805              :         }
    1806              :     }
    1807              : 
    1808          135 :     hash_seq_init(&hash_seq, pgss_hash);
    1809        29146 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    1810              :     {
    1811              :         Datum       values[PG_STAT_STATEMENTS_COLS];
    1812              :         bool        nulls[PG_STAT_STATEMENTS_COLS];
    1813        29011 :         int         i = 0;
    1814              :         Counters    tmp;
    1815              :         double      stddev;
    1816        29011 :         int64       queryid = entry->key.queryid;
    1817              :         TimestampTz stats_since;
    1818              :         TimestampTz minmax_stats_since;
    1819              : 
    1820        29011 :         memset(values, 0, sizeof(values));
    1821        29011 :         memset(nulls, 0, sizeof(nulls));
    1822              : 
    1823        29011 :         values[i++] = ObjectIdGetDatum(entry->key.userid);
    1824        29011 :         values[i++] = ObjectIdGetDatum(entry->key.dbid);
    1825        29011 :         if (api_version >= PGSS_V1_9)
    1826        28999 :             values[i++] = BoolGetDatum(entry->key.toplevel);
    1827              : 
    1828        29011 :         if (is_allowed_role || entry->key.userid == userid)
    1829              :         {
    1830        29007 :             if (api_version >= PGSS_V1_2)
    1831        29007 :                 values[i++] = Int64GetDatumFast(queryid);
    1832              : 
    1833        29007 :             if (showtext)
    1834              :             {
    1835        29007 :                 char       *qstr = qtext_fetch(entry->query_offset,
    1836              :                                                entry->query_len,
    1837              :                                                qbuffer,
    1838              :                                                qbuffer_size);
    1839              : 
    1840        29007 :                 if (qstr)
    1841              :                 {
    1842              :                     char       *enc;
    1843              : 
    1844        29007 :                     enc = pg_any_to_server(qstr,
    1845              :                                            entry->query_len,
    1846              :                                            entry->encoding);
    1847              : 
    1848        29007 :                     values[i++] = CStringGetTextDatum(enc);
    1849              : 
    1850        29007 :                     if (enc != qstr)
    1851            0 :                         pfree(enc);
    1852              :                 }
    1853              :                 else
    1854              :                 {
    1855              :                     /* Just return a null if we fail to find the text */
    1856            0 :                     nulls[i++] = true;
    1857              :                 }
    1858              :             }
    1859              :             else
    1860              :             {
    1861              :                 /* Query text not requested */
    1862            0 :                 nulls[i++] = true;
    1863              :             }
    1864              :         }
    1865              :         else
    1866              :         {
    1867              :             /* Don't show queryid */
    1868            4 :             if (api_version >= PGSS_V1_2)
    1869            4 :                 nulls[i++] = true;
    1870              : 
    1871              :             /*
    1872              :              * Don't show query text, but hint as to the reason for not doing
    1873              :              * so if it was requested
    1874              :              */
    1875            4 :             if (showtext)
    1876            4 :                 values[i++] = CStringGetTextDatum("<insufficient privilege>");
    1877              :             else
    1878            0 :                 nulls[i++] = true;
    1879              :         }
    1880              : 
    1881              :         /* copy counters to a local variable to keep locking time short */
    1882        29011 :         SpinLockAcquire(&entry->mutex);
    1883        29011 :         tmp = entry->counters;
    1884        29011 :         SpinLockRelease(&entry->mutex);
    1885              : 
    1886              :         /*
    1887              :          * The spinlock is not required when reading these two as they are
    1888              :          * always updated when holding pgss->lock exclusively.
    1889              :          */
    1890        29011 :         stats_since = entry->stats_since;
    1891        29011 :         minmax_stats_since = entry->minmax_stats_since;
    1892              : 
    1893              :         /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
    1894        29011 :         if (IS_STICKY(tmp))
    1895           45 :             continue;
    1896              : 
    1897              :         /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
    1898        86898 :         for (int kind = 0; kind < PGSS_NUMKIND; kind++)
    1899              :         {
    1900        57932 :             if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
    1901              :             {
    1902        57928 :                 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
    1903        57928 :                 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
    1904              :             }
    1905              : 
    1906        57932 :             if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
    1907              :                 api_version >= PGSS_V1_8)
    1908              :             {
    1909        57928 :                 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
    1910        57928 :                 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
    1911        57928 :                 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
    1912              : 
    1913              :                 /*
    1914              :                  * Note we are calculating the population variance here, not
    1915              :                  * the sample variance, as we have data for the whole
    1916              :                  * population, so Bessel's correction is not used, and we
    1917              :                  * don't divide by tmp.calls - 1.
    1918              :                  */
    1919        57928 :                 if (tmp.calls[kind] > 1)
    1920         5394 :                     stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
    1921              :                 else
    1922        52534 :                     stddev = 0.0;
    1923        57928 :                 values[i++] = Float8GetDatumFast(stddev);
    1924              :             }
    1925              :         }
    1926        28966 :         values[i++] = Int64GetDatumFast(tmp.rows);
    1927        28966 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
    1928        28966 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
    1929        28966 :         if (api_version >= PGSS_V1_1)
    1930        28966 :             values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
    1931        28966 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
    1932        28966 :         values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
    1933        28966 :         values[i++] = Int64GetDatumFast(tmp.local_blks_read);
    1934        28966 :         if (api_version >= PGSS_V1_1)
    1935        28966 :             values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
    1936        28966 :         values[i++] = Int64GetDatumFast(tmp.local_blks_written);
    1937        28966 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
    1938        28966 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
    1939        28966 :         if (api_version >= PGSS_V1_1)
    1940              :         {
    1941        28966 :             values[i++] = Float8GetDatumFast(tmp.shared_blk_read_time);
    1942        28966 :             values[i++] = Float8GetDatumFast(tmp.shared_blk_write_time);
    1943              :         }
    1944        28966 :         if (api_version >= PGSS_V1_11)
    1945              :         {
    1946        28938 :             values[i++] = Float8GetDatumFast(tmp.local_blk_read_time);
    1947        28938 :             values[i++] = Float8GetDatumFast(tmp.local_blk_write_time);
    1948              :         }
    1949        28966 :         if (api_version >= PGSS_V1_10)
    1950              :         {
    1951        28947 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
    1952        28947 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
    1953              :         }
    1954        28966 :         if (api_version >= PGSS_V1_8)
    1955              :         {
    1956              :             char        buf[256];
    1957              :             Datum       wal_bytes;
    1958              : 
    1959        28962 :             values[i++] = Int64GetDatumFast(tmp.wal_records);
    1960        28962 :             values[i++] = Int64GetDatumFast(tmp.wal_fpi);
    1961              : 
    1962        28962 :             snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
    1963              : 
    1964              :             /* Convert to numeric. */
    1965        28962 :             wal_bytes = DirectFunctionCall3(numeric_in,
    1966              :                                             CStringGetDatum(buf),
    1967              :                                             ObjectIdGetDatum(0),
    1968              :                                             Int32GetDatum(-1));
    1969        28962 :             values[i++] = wal_bytes;
    1970              :         }
    1971        28966 :         if (api_version >= PGSS_V1_12)
    1972              :         {
    1973        28928 :             values[i++] = Int64GetDatumFast(tmp.wal_buffers_full);
    1974              :         }
    1975        28966 :         if (api_version >= PGSS_V1_10)
    1976              :         {
    1977        28947 :             values[i++] = Int64GetDatumFast(tmp.jit_functions);
    1978        28947 :             values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
    1979        28947 :             values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
    1980        28947 :             values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
    1981        28947 :             values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
    1982        28947 :             values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
    1983        28947 :             values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
    1984        28947 :             values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
    1985              :         }
    1986        28966 :         if (api_version >= PGSS_V1_11)
    1987              :         {
    1988        28938 :             values[i++] = Int64GetDatumFast(tmp.jit_deform_count);
    1989        28938 :             values[i++] = Float8GetDatumFast(tmp.jit_deform_time);
    1990              :         }
    1991        28966 :         if (api_version >= PGSS_V1_12)
    1992              :         {
    1993        28928 :             values[i++] = Int64GetDatumFast(tmp.parallel_workers_to_launch);
    1994        28928 :             values[i++] = Int64GetDatumFast(tmp.parallel_workers_launched);
    1995              :         }
    1996        28966 :         if (api_version >= PGSS_V1_13)
    1997              :         {
    1998        28923 :             values[i++] = Int64GetDatumFast(tmp.generic_plan_calls);
    1999        28923 :             values[i++] = Int64GetDatumFast(tmp.custom_plan_calls);
    2000              :         }
    2001        28966 :         if (api_version >= PGSS_V1_11)
    2002              :         {
    2003        28938 :             values[i++] = TimestampTzGetDatum(stats_since);
    2004        28938 :             values[i++] = TimestampTzGetDatum(minmax_stats_since);
    2005              :         }
    2006              : 
    2007              :         Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
    2008              :                      api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
    2009              :                      api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
    2010              :                      api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
    2011              :                      api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
    2012              :                      api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
    2013              :                      api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
    2014              :                      api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
    2015              :                      api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
    2016              :                      api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
    2017              :                      -1 /* fail if you forget to update this assert */ ));
    2018              : 
    2019        28966 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
    2020              :     }
    2021              : 
    2022          135 :     LWLockRelease(&pgss->lock.lock);
    2023              : 
    2024          135 :     if (qbuffer)
    2025          135 :         pfree(qbuffer);
    2026          135 : }
    2027              : 
    2028              : /* Number of output arguments (columns) for pg_stat_statements_info */
    2029              : #define PG_STAT_STATEMENTS_INFO_COLS    2
    2030              : 
    2031              : /*
    2032              :  * Return statistics of pg_stat_statements.
    2033              :  */
    2034              : Datum
    2035            3 : pg_stat_statements_info(PG_FUNCTION_ARGS)
    2036              : {
    2037              :     pgssGlobalStats stats;
    2038              :     TupleDesc   tupdesc;
    2039            3 :     Datum       values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    2040            3 :     bool        nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    2041              : 
    2042            3 :     if (!pgss || !pgss_hash)
    2043            0 :         ereport(ERROR,
    2044              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    2045              :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    2046              : 
    2047              :     /* Build a tuple descriptor for our result type */
    2048            3 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    2049            0 :         elog(ERROR, "return type must be a row type");
    2050              : 
    2051              :     /* Read global statistics for pg_stat_statements */
    2052            3 :     SpinLockAcquire(&pgss->mutex);
    2053            3 :     stats = pgss->stats;
    2054            3 :     SpinLockRelease(&pgss->mutex);
    2055              : 
    2056            3 :     values[0] = Int64GetDatum(stats.dealloc);
    2057            3 :     values[1] = TimestampTzGetDatum(stats.stats_reset);
    2058              : 
    2059            3 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
    2060              : }
    2061              : 
    2062              : /*
    2063              :  * Allocate a new hashtable entry.
    2064              :  * caller must hold an exclusive lock on pgss->lock
    2065              :  *
    2066              :  * "query" need not be null-terminated; we rely on query_len instead
    2067              :  *
    2068              :  * If "sticky" is true, make the new entry artificially sticky so that it will
    2069              :  * probably still be there when the query finishes execution.  We do this by
    2070              :  * giving it a median usage value rather than the normal value.  (Strictly
    2071              :  * speaking, query strings are normalized on a best effort basis, though it
    2072              :  * would be difficult to demonstrate this even under artificial conditions.)
    2073              :  *
    2074              :  * Note: despite needing exclusive lock, it's not an error for the target
    2075              :  * entry to already exist.  This is because pgss_store releases and
    2076              :  * reacquires lock after failing to find a match; so someone else could
    2077              :  * have made the entry while we waited to get exclusive lock.
    2078              :  */
    2079              : static pgssEntry *
    2080        58767 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
    2081              :             bool sticky)
    2082              : {
    2083              :     pgssEntry  *entry;
    2084              :     bool        found;
    2085              : 
    2086              :     /* Make space if needed */
    2087        58767 :     while (hash_get_num_entries(pgss_hash) >= pgss_max)
    2088            0 :         entry_dealloc();
    2089              : 
    2090              :     /* Find or create an entry with desired hash code */
    2091        58767 :     entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
    2092              : 
    2093        58767 :     if (!found)
    2094              :     {
    2095              :         /* New entry, initialize it */
    2096              : 
    2097              :         /* reset the statistics */
    2098        58767 :         memset(&entry->counters, 0, sizeof(Counters));
    2099              :         /* set the appropriate initial usage count */
    2100        58767 :         entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
    2101              :         /* re-initialize the mutex each time ... we assume no one using it */
    2102        58767 :         SpinLockInit(&entry->mutex);
    2103              :         /* ... and don't forget the query text metadata */
    2104              :         Assert(query_len >= 0);
    2105        58767 :         entry->query_offset = query_offset;
    2106        58767 :         entry->query_len = query_len;
    2107        58767 :         entry->encoding = encoding;
    2108        58767 :         entry->stats_since = GetCurrentTimestamp();
    2109        58767 :         entry->minmax_stats_since = entry->stats_since;
    2110              :     }
    2111              : 
    2112        58767 :     return entry;
    2113              : }
    2114              : 
    2115              : /*
    2116              :  * qsort comparator for sorting into increasing usage order
    2117              :  */
    2118              : static int
    2119            0 : entry_cmp(const void *lhs, const void *rhs)
    2120              : {
    2121            0 :     double      l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
    2122            0 :     double      r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
    2123              : 
    2124            0 :     if (l_usage < r_usage)
    2125            0 :         return -1;
    2126            0 :     else if (l_usage > r_usage)
    2127            0 :         return +1;
    2128              :     else
    2129            0 :         return 0;
    2130              : }
    2131              : 
    2132              : /*
    2133              :  * Deallocate least-used entries.
    2134              :  *
    2135              :  * Caller must hold an exclusive lock on pgss->lock.
    2136              :  */
    2137              : static void
    2138            0 : entry_dealloc(void)
    2139              : {
    2140              :     HASH_SEQ_STATUS hash_seq;
    2141              :     pgssEntry **entries;
    2142              :     pgssEntry  *entry;
    2143              :     int         nvictims;
    2144              :     int         i;
    2145              :     Size        tottextlen;
    2146              :     int         nvalidtexts;
    2147              : 
    2148              :     /*
    2149              :      * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
    2150              :      * While we're scanning the table, apply the decay factor to the usage
    2151              :      * values, and update the mean query length.
    2152              :      *
    2153              :      * Note that the mean query length is almost immediately obsolete, since
    2154              :      * we compute it before not after discarding the least-used entries.
    2155              :      * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
    2156              :      * making two passes to get a more current result.  Likewise, the new
    2157              :      * cur_median_usage includes the entries we're about to zap.
    2158              :      */
    2159              : 
    2160            0 :     entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
    2161              : 
    2162            0 :     i = 0;
    2163            0 :     tottextlen = 0;
    2164            0 :     nvalidtexts = 0;
    2165              : 
    2166            0 :     hash_seq_init(&hash_seq, pgss_hash);
    2167            0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2168              :     {
    2169            0 :         entries[i++] = entry;
    2170              :         /* "Sticky" entries get a different usage decay rate. */
    2171            0 :         if (IS_STICKY(entry->counters))
    2172            0 :             entry->counters.usage *= STICKY_DECREASE_FACTOR;
    2173              :         else
    2174            0 :             entry->counters.usage *= USAGE_DECREASE_FACTOR;
    2175              :         /* In the mean length computation, ignore dropped texts. */
    2176            0 :         if (entry->query_len >= 0)
    2177              :         {
    2178            0 :             tottextlen += entry->query_len + 1;
    2179            0 :             nvalidtexts++;
    2180              :         }
    2181              :     }
    2182              : 
    2183              :     /* Sort into increasing order by usage */
    2184            0 :     qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
    2185              : 
    2186              :     /* Record the (approximate) median usage */
    2187            0 :     if (i > 0)
    2188            0 :         pgss->cur_median_usage = entries[i / 2]->counters.usage;
    2189              :     /* Record the mean query length */
    2190            0 :     if (nvalidtexts > 0)
    2191            0 :         pgss->mean_query_len = tottextlen / nvalidtexts;
    2192              :     else
    2193            0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2194              : 
    2195              :     /* Now zap an appropriate fraction of lowest-usage entries */
    2196            0 :     nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
    2197            0 :     nvictims = Min(nvictims, i);
    2198              : 
    2199            0 :     for (i = 0; i < nvictims; i++)
    2200              :     {
    2201            0 :         hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
    2202              :     }
    2203              : 
    2204            0 :     pfree(entries);
    2205              : 
    2206              :     /* Increment the number of times entries are deallocated */
    2207            0 :     SpinLockAcquire(&pgss->mutex);
    2208            0 :     pgss->stats.dealloc += 1;
    2209            0 :     SpinLockRelease(&pgss->mutex);
    2210            0 : }
    2211              : 
    2212              : /*
    2213              :  * Given a query string (not necessarily null-terminated), allocate a new
    2214              :  * entry in the external query text file and store the string there.
    2215              :  *
    2216              :  * If successful, returns true, and stores the new entry's offset in the file
    2217              :  * into *query_offset.  Also, if gc_count isn't NULL, *gc_count is set to the
    2218              :  * number of garbage collections that have occurred so far.
    2219              :  *
    2220              :  * On failure, returns false.
    2221              :  *
    2222              :  * At least a shared lock on pgss->lock must be held by the caller, so as
    2223              :  * to prevent a concurrent garbage collection.  Share-lock-holding callers
    2224              :  * should pass a gc_count pointer to obtain the number of garbage collections,
    2225              :  * so that they can recheck the count after obtaining exclusive lock to
    2226              :  * detect whether a garbage collection occurred (and removed this entry).
    2227              :  */
    2228              : static bool
    2229        30775 : qtext_store(const char *query, int query_len,
    2230              :             Size *query_offset, int *gc_count)
    2231              : {
    2232              :     Size        off;
    2233              :     int         fd;
    2234              : 
    2235              :     /*
    2236              :      * We use a spinlock to protect extent/n_writers/gc_count, so that
    2237              :      * multiple processes may execute this function concurrently.
    2238              :      */
    2239        30775 :     SpinLockAcquire(&pgss->mutex);
    2240        30775 :     off = pgss->extent;
    2241        30775 :     pgss->extent += query_len + 1;
    2242        30775 :     pgss->n_writers++;
    2243        30775 :     if (gc_count)
    2244        30775 :         *gc_count = pgss->gc_count;
    2245        30775 :     SpinLockRelease(&pgss->mutex);
    2246              : 
    2247        30775 :     *query_offset = off;
    2248              : 
    2249              :     /*
    2250              :      * Don't allow the file to grow larger than what qtext_load_file can
    2251              :      * (theoretically) handle.  This has been seen to be reachable on 32-bit
    2252              :      * platforms.
    2253              :      */
    2254        30775 :     if (unlikely(query_len >= MaxAllocHugeSize - off))
    2255              :     {
    2256            0 :         errno = EFBIG;          /* not quite right, but it'll do */
    2257            0 :         fd = -1;
    2258            0 :         goto error;
    2259              :     }
    2260              : 
    2261              :     /* Now write the data into the successfully-reserved part of the file */
    2262        30775 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
    2263        30775 :     if (fd < 0)
    2264            0 :         goto error;
    2265              : 
    2266        30775 :     if (pg_pwrite(fd, query, query_len, off) != query_len)
    2267            0 :         goto error;
    2268        30775 :     if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
    2269            0 :         goto error;
    2270              : 
    2271        30775 :     CloseTransientFile(fd);
    2272              : 
    2273              :     /* Mark our write complete */
    2274        30775 :     SpinLockAcquire(&pgss->mutex);
    2275        30775 :     pgss->n_writers--;
    2276        30775 :     SpinLockRelease(&pgss->mutex);
    2277              : 
    2278        30775 :     return true;
    2279              : 
    2280            0 : error:
    2281            0 :     ereport(LOG,
    2282              :             (errcode_for_file_access(),
    2283              :              errmsg("could not write file \"%s\": %m",
    2284              :                     PGSS_TEXT_FILE)));
    2285              : 
    2286            0 :     if (fd >= 0)
    2287            0 :         CloseTransientFile(fd);
    2288              : 
    2289              :     /* Mark our write complete */
    2290            0 :     SpinLockAcquire(&pgss->mutex);
    2291            0 :     pgss->n_writers--;
    2292            0 :     SpinLockRelease(&pgss->mutex);
    2293              : 
    2294            0 :     return false;
    2295              : }
    2296              : 
    2297              : /*
    2298              :  * Read the external query text file into a palloc'd buffer.
    2299              :  *
    2300              :  * Returns NULL (without throwing an error) if unable to read, eg
    2301              :  * file not there or insufficient memory.
    2302              :  *
    2303              :  * On success, the buffer size is also returned into *buffer_size.
    2304              :  *
    2305              :  * This can be called without any lock on pgss->lock, but in that case
    2306              :  * the caller is responsible for verifying that the result is sane.
    2307              :  */
    2308              : static char *
    2309          142 : qtext_load_file(Size *buffer_size)
    2310              : {
    2311              :     char       *buf;
    2312              :     int         fd;
    2313              :     struct stat stat;
    2314              :     Size        nread;
    2315              : 
    2316          142 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
    2317          142 :     if (fd < 0)
    2318              :     {
    2319            0 :         if (errno != ENOENT)
    2320            0 :             ereport(LOG,
    2321              :                     (errcode_for_file_access(),
    2322              :                      errmsg("could not read file \"%s\": %m",
    2323              :                             PGSS_TEXT_FILE)));
    2324            0 :         return NULL;
    2325              :     }
    2326              : 
    2327              :     /* Get file length */
    2328          142 :     if (fstat(fd, &stat))
    2329              :     {
    2330            0 :         ereport(LOG,
    2331              :                 (errcode_for_file_access(),
    2332              :                  errmsg("could not stat file \"%s\": %m",
    2333              :                         PGSS_TEXT_FILE)));
    2334            0 :         CloseTransientFile(fd);
    2335            0 :         return NULL;
    2336              :     }
    2337              : 
    2338              :     /* Allocate buffer; beware that off_t might be wider than size_t */
    2339          142 :     if (stat.st_size <= MaxAllocHugeSize)
    2340          142 :         buf = (char *) palloc_extended(stat.st_size, MCXT_ALLOC_HUGE | MCXT_ALLOC_NO_OOM);
    2341              :     else
    2342            0 :         buf = NULL;
    2343          142 :     if (buf == NULL)
    2344              :     {
    2345            0 :         ereport(LOG,
    2346              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    2347              :                  errmsg("out of memory"),
    2348              :                  errdetail("Could not allocate enough memory to read file \"%s\".",
    2349              :                            PGSS_TEXT_FILE)));
    2350            0 :         CloseTransientFile(fd);
    2351            0 :         return NULL;
    2352              :     }
    2353              : 
    2354              :     /*
    2355              :      * OK, slurp in the file.  Windows fails if we try to read more than
    2356              :      * INT_MAX bytes at once, and other platforms might not like that either,
    2357              :      * so read a very large file in 1GB segments.
    2358              :      */
    2359          142 :     nread = 0;
    2360          283 :     while (nread < stat.st_size)
    2361              :     {
    2362          141 :         int         toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
    2363              : 
    2364              :         /*
    2365              :          * If we get a short read and errno doesn't get set, the reason is
    2366              :          * probably that garbage collection truncated the file since we did
    2367              :          * the fstat(), so we don't log a complaint --- but we don't return
    2368              :          * the data, either, since it's most likely corrupt due to concurrent
    2369              :          * writes from garbage collection.
    2370              :          */
    2371          141 :         errno = 0;
    2372          141 :         if (read(fd, buf + nread, toread) != toread)
    2373              :         {
    2374            0 :             if (errno)
    2375            0 :                 ereport(LOG,
    2376              :                         (errcode_for_file_access(),
    2377              :                          errmsg("could not read file \"%s\": %m",
    2378              :                                 PGSS_TEXT_FILE)));
    2379            0 :             pfree(buf);
    2380            0 :             CloseTransientFile(fd);
    2381            0 :             return NULL;
    2382              :         }
    2383          141 :         nread += toread;
    2384              :     }
    2385              : 
    2386          142 :     if (CloseTransientFile(fd) != 0)
    2387            0 :         ereport(LOG,
    2388              :                 (errcode_for_file_access(),
    2389              :                  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
    2390              : 
    2391          142 :     *buffer_size = nread;
    2392          142 :     return buf;
    2393              : }
    2394              : 
    2395              : /*
    2396              :  * Locate a query text in the file image previously read by qtext_load_file().
    2397              :  *
    2398              :  * We validate the given offset/length, and return NULL if bogus.  Otherwise,
    2399              :  * the result points to a null-terminated string within the buffer.
    2400              :  */
    2401              : static char *
    2402        86852 : qtext_fetch(Size query_offset, int query_len,
    2403              :             char *buffer, Size buffer_size)
    2404              : {
    2405              :     /* File read failed? */
    2406        86852 :     if (buffer == NULL)
    2407            0 :         return NULL;
    2408              :     /* Bogus offset/length? */
    2409        86852 :     if (query_len < 0 ||
    2410        86852 :         query_offset + query_len >= buffer_size)
    2411            0 :         return NULL;
    2412              :     /* As a further sanity check, make sure there's a trailing null */
    2413        86852 :     if (buffer[query_offset + query_len] != '\0')
    2414            0 :         return NULL;
    2415              :     /* Looks OK */
    2416        86852 :     return buffer + query_offset;
    2417              : }
    2418              : 
    2419              : /*
    2420              :  * Do we need to garbage-collect the external query text file?
    2421              :  *
    2422              :  * Caller should hold at least a shared lock on pgss->lock.
    2423              :  */
    2424              : static bool
    2425        30775 : need_gc_qtexts(void)
    2426              : {
    2427              :     Size        extent;
    2428              : 
    2429              :     /* Read shared extent pointer */
    2430        30775 :     SpinLockAcquire(&pgss->mutex);
    2431        30775 :     extent = pgss->extent;
    2432        30775 :     SpinLockRelease(&pgss->mutex);
    2433              : 
    2434              :     /*
    2435              :      * Don't proceed if file does not exceed 512 bytes per possible entry.
    2436              :      *
    2437              :      * Here and in the next test, 32-bit machines have overflow hazards if
    2438              :      * pgss_max and/or mean_query_len are large.  Force the multiplications
    2439              :      * and comparisons to be done in uint64 arithmetic to forestall trouble.
    2440              :      */
    2441        30775 :     if ((uint64) extent < (uint64) 512 * pgss_max)
    2442        30775 :         return false;
    2443              : 
    2444              :     /*
    2445              :      * Don't proceed if file is less than about 50% bloat.  Nothing can or
    2446              :      * should be done in the event of unusually large query texts accounting
    2447              :      * for file's large size.  We go to the trouble of maintaining the mean
    2448              :      * query length in order to prevent garbage collection from thrashing
    2449              :      * uselessly.
    2450              :      */
    2451            0 :     if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
    2452            0 :         return false;
    2453              : 
    2454            0 :     return true;
    2455              : }
    2456              : 
    2457              : /*
    2458              :  * Garbage-collect orphaned query texts in external file.
    2459              :  *
    2460              :  * This won't be called often in the typical case, since it's likely that
    2461              :  * there won't be too much churn, and besides, a similar compaction process
    2462              :  * occurs when serializing to disk at shutdown or as part of resetting.
    2463              :  * Despite this, it seems prudent to plan for the edge case where the file
    2464              :  * becomes unreasonably large, with no other method of compaction likely to
    2465              :  * occur in the foreseeable future.
    2466              :  *
    2467              :  * The caller must hold an exclusive lock on pgss->lock.
    2468              :  *
    2469              :  * At the first sign of trouble we unlink the query text file to get a clean
    2470              :  * slate (although existing statistics are retained), rather than risk
    2471              :  * thrashing by allowing the same problem case to recur indefinitely.
    2472              :  */
    2473              : static void
    2474            0 : gc_qtexts(void)
    2475              : {
    2476              :     char       *qbuffer;
    2477              :     Size        qbuffer_size;
    2478            0 :     FILE       *qfile = NULL;
    2479              :     HASH_SEQ_STATUS hash_seq;
    2480              :     pgssEntry  *entry;
    2481              :     Size        extent;
    2482              :     int         nentries;
    2483              : 
    2484              :     /*
    2485              :      * When called from pgss_store, some other session might have proceeded
    2486              :      * with garbage collection in the no-lock-held interim of lock strength
    2487              :      * escalation.  Check once more that this is actually necessary.
    2488              :      */
    2489            0 :     if (!need_gc_qtexts())
    2490            0 :         return;
    2491              : 
    2492              :     /*
    2493              :      * Load the old texts file.  If we fail (out of memory, for instance),
    2494              :      * invalidate query texts.  Hopefully this is rare.  It might seem better
    2495              :      * to leave things alone on an OOM failure, but the problem is that the
    2496              :      * file is only going to get bigger; hoping for a future non-OOM result is
    2497              :      * risky and can easily lead to complete denial of service.
    2498              :      */
    2499            0 :     qbuffer = qtext_load_file(&qbuffer_size);
    2500            0 :     if (qbuffer == NULL)
    2501            0 :         goto gc_fail;
    2502              : 
    2503              :     /*
    2504              :      * We overwrite the query texts file in place, so as to reduce the risk of
    2505              :      * an out-of-disk-space failure.  Since the file is guaranteed not to get
    2506              :      * larger, this should always work on traditional filesystems; though we
    2507              :      * could still lose on copy-on-write filesystems.
    2508              :      */
    2509            0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2510            0 :     if (qfile == NULL)
    2511              :     {
    2512            0 :         ereport(LOG,
    2513              :                 (errcode_for_file_access(),
    2514              :                  errmsg("could not write file \"%s\": %m",
    2515              :                         PGSS_TEXT_FILE)));
    2516            0 :         goto gc_fail;
    2517              :     }
    2518              : 
    2519            0 :     extent = 0;
    2520            0 :     nentries = 0;
    2521              : 
    2522            0 :     hash_seq_init(&hash_seq, pgss_hash);
    2523            0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2524              :     {
    2525            0 :         int         query_len = entry->query_len;
    2526            0 :         char       *qry = qtext_fetch(entry->query_offset,
    2527              :                                       query_len,
    2528              :                                       qbuffer,
    2529              :                                       qbuffer_size);
    2530              : 
    2531            0 :         if (qry == NULL)
    2532              :         {
    2533              :             /* Trouble ... drop the text */
    2534            0 :             entry->query_offset = 0;
    2535            0 :             entry->query_len = -1;
    2536              :             /* entry will not be counted in mean query length computation */
    2537            0 :             continue;
    2538              :         }
    2539              : 
    2540            0 :         if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
    2541              :         {
    2542            0 :             ereport(LOG,
    2543              :                     (errcode_for_file_access(),
    2544              :                      errmsg("could not write file \"%s\": %m",
    2545              :                             PGSS_TEXT_FILE)));
    2546            0 :             hash_seq_term(&hash_seq);
    2547            0 :             goto gc_fail;
    2548              :         }
    2549              : 
    2550            0 :         entry->query_offset = extent;
    2551            0 :         extent += query_len + 1;
    2552            0 :         nentries++;
    2553              :     }
    2554              : 
    2555              :     /*
    2556              :      * Truncate away any now-unused space.  If this fails for some odd reason,
    2557              :      * we log it, but there's no need to fail.
    2558              :      */
    2559            0 :     if (ftruncate(fileno(qfile), extent) != 0)
    2560            0 :         ereport(LOG,
    2561              :                 (errcode_for_file_access(),
    2562              :                  errmsg("could not truncate file \"%s\": %m",
    2563              :                         PGSS_TEXT_FILE)));
    2564              : 
    2565            0 :     if (FreeFile(qfile))
    2566              :     {
    2567            0 :         ereport(LOG,
    2568              :                 (errcode_for_file_access(),
    2569              :                  errmsg("could not write file \"%s\": %m",
    2570              :                         PGSS_TEXT_FILE)));
    2571            0 :         qfile = NULL;
    2572            0 :         goto gc_fail;
    2573              :     }
    2574              : 
    2575            0 :     elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
    2576              :          pgss->extent, extent);
    2577              : 
    2578              :     /* Reset the shared extent pointer */
    2579            0 :     pgss->extent = extent;
    2580              : 
    2581              :     /*
    2582              :      * Also update the mean query length, to be sure that need_gc_qtexts()
    2583              :      * won't still think we have a problem.
    2584              :      */
    2585            0 :     if (nentries > 0)
    2586            0 :         pgss->mean_query_len = extent / nentries;
    2587              :     else
    2588            0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2589              : 
    2590            0 :     pfree(qbuffer);
    2591              : 
    2592              :     /*
    2593              :      * OK, count a garbage collection cycle.  (Note: even though we have
    2594              :      * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
    2595              :      * other processes may examine gc_count while holding only the mutex.
    2596              :      * Also, we have to advance the count *after* we've rewritten the file,
    2597              :      * else other processes might not realize they read a stale file.)
    2598              :      */
    2599            0 :     record_gc_qtexts();
    2600              : 
    2601            0 :     return;
    2602              : 
    2603            0 : gc_fail:
    2604              :     /* clean up resources */
    2605            0 :     if (qfile)
    2606            0 :         FreeFile(qfile);
    2607            0 :     if (qbuffer)
    2608            0 :         pfree(qbuffer);
    2609              : 
    2610              :     /*
    2611              :      * Since the contents of the external file are now uncertain, mark all
    2612              :      * hashtable entries as having invalid texts.
    2613              :      */
    2614            0 :     hash_seq_init(&hash_seq, pgss_hash);
    2615            0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2616              :     {
    2617            0 :         entry->query_offset = 0;
    2618            0 :         entry->query_len = -1;
    2619              :     }
    2620              : 
    2621              :     /*
    2622              :      * Destroy the query text file and create a new, empty one
    2623              :      */
    2624            0 :     (void) unlink(PGSS_TEXT_FILE);
    2625            0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2626            0 :     if (qfile == NULL)
    2627            0 :         ereport(LOG,
    2628              :                 (errcode_for_file_access(),
    2629              :                  errmsg("could not recreate file \"%s\": %m",
    2630              :                         PGSS_TEXT_FILE)));
    2631              :     else
    2632            0 :         FreeFile(qfile);
    2633              : 
    2634              :     /* Reset the shared extent pointer */
    2635            0 :     pgss->extent = 0;
    2636              : 
    2637              :     /* Reset mean_query_len to match the new state */
    2638            0 :     pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2639              : 
    2640              :     /*
    2641              :      * Bump the GC count even though we failed.
    2642              :      *
    2643              :      * This is needed to make concurrent readers of file without any lock on
    2644              :      * pgss->lock notice existence of new version of file.  Once readers
    2645              :      * subsequently observe a change in GC count with pgss->lock held, that
    2646              :      * forces a safe reopen of file.  Writers also require that we bump here,
    2647              :      * of course.  (As required by locking protocol, readers and writers don't
    2648              :      * trust earlier file contents until gc_count is found unchanged after
    2649              :      * pgss->lock acquired in shared or exclusive mode respectively.)
    2650              :      */
    2651            0 :     record_gc_qtexts();
    2652              : }
    2653              : 
    2654              : #define SINGLE_ENTRY_RESET(e) \
    2655              : if (e) { \
    2656              :     if (minmax_only) { \
    2657              :         /* When requested reset only min/max statistics of an entry */ \
    2658              :         for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
    2659              :         { \
    2660              :             e->counters.max_time[kind] = 0; \
    2661              :             e->counters.min_time[kind] = 0; \
    2662              :         } \
    2663              :         e->minmax_stats_since = stats_reset; \
    2664              :     } \
    2665              :     else \
    2666              :     { \
    2667              :         /* Remove the key otherwise  */ \
    2668              :         hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
    2669              :         num_remove++; \
    2670              :     } \
    2671              : }
    2672              : 
    2673              : /*
    2674              :  * Reset entries corresponding to parameters passed.
    2675              :  */
    2676              : static TimestampTz
    2677          121 : entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
    2678              : {
    2679              :     HASH_SEQ_STATUS hash_seq;
    2680              :     pgssEntry  *entry;
    2681              :     FILE       *qfile;
    2682              :     int64       num_entries;
    2683          121 :     int64       num_remove = 0;
    2684              :     pgssHashKey key;
    2685              :     TimestampTz stats_reset;
    2686              : 
    2687          121 :     if (!pgss || !pgss_hash)
    2688            0 :         ereport(ERROR,
    2689              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    2690              :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    2691              : 
    2692          121 :     LWLockAcquire(&pgss->lock.lock, LW_EXCLUSIVE);
    2693          121 :     num_entries = hash_get_num_entries(pgss_hash);
    2694              : 
    2695          121 :     stats_reset = GetCurrentTimestamp();
    2696              : 
    2697          121 :     if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
    2698              :     {
    2699              :         /* If all the parameters are available, use the fast path. */
    2700            1 :         memset(&key, 0, sizeof(pgssHashKey));
    2701            1 :         key.userid = userid;
    2702            1 :         key.dbid = dbid;
    2703            1 :         key.queryid = queryid;
    2704              : 
    2705              :         /*
    2706              :          * Reset the entry if it exists, starting with the non-top-level
    2707              :          * entry.
    2708              :          */
    2709            1 :         key.toplevel = false;
    2710            1 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    2711              : 
    2712            1 :         SINGLE_ENTRY_RESET(entry);
    2713              : 
    2714              :         /* Also reset the top-level entry if it exists. */
    2715            1 :         key.toplevel = true;
    2716            1 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    2717              : 
    2718            1 :         SINGLE_ENTRY_RESET(entry);
    2719              :     }
    2720          120 :     else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
    2721              :     {
    2722              :         /* Reset entries corresponding to valid parameters. */
    2723            4 :         hash_seq_init(&hash_seq, pgss_hash);
    2724           51 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2725              :         {
    2726           47 :             if ((!userid || entry->key.userid == userid) &&
    2727           36 :                 (!dbid || entry->key.dbid == dbid) &&
    2728           34 :                 (!queryid || entry->key.queryid == queryid))
    2729              :             {
    2730            7 :                 SINGLE_ENTRY_RESET(entry);
    2731              :             }
    2732              :         }
    2733              :     }
    2734              :     else
    2735              :     {
    2736              :         /* Reset all entries. */
    2737          116 :         hash_seq_init(&hash_seq, pgss_hash);
    2738         1151 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2739              :         {
    2740          941 :             SINGLE_ENTRY_RESET(entry);
    2741              :         }
    2742              :     }
    2743              : 
    2744              :     /* All entries are removed? */
    2745          121 :     if (num_entries != num_remove)
    2746            6 :         goto release_lock;
    2747              : 
    2748              :     /*
    2749              :      * Reset global statistics for pg_stat_statements since all entries are
    2750              :      * removed.
    2751              :      */
    2752          115 :     SpinLockAcquire(&pgss->mutex);
    2753          115 :     pgss->stats.dealloc = 0;
    2754          115 :     pgss->stats.stats_reset = stats_reset;
    2755          115 :     SpinLockRelease(&pgss->mutex);
    2756              : 
    2757              :     /*
    2758              :      * Write new empty query file, perhaps even creating a new one to recover
    2759              :      * if the file was missing.
    2760              :      */
    2761          115 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2762          115 :     if (qfile == NULL)
    2763              :     {
    2764            0 :         ereport(LOG,
    2765              :                 (errcode_for_file_access(),
    2766              :                  errmsg("could not create file \"%s\": %m",
    2767              :                         PGSS_TEXT_FILE)));
    2768            0 :         goto done;
    2769              :     }
    2770              : 
    2771              :     /* If ftruncate fails, log it, but it's not a fatal problem */
    2772          115 :     if (ftruncate(fileno(qfile), 0) != 0)
    2773            0 :         ereport(LOG,
    2774              :                 (errcode_for_file_access(),
    2775              :                  errmsg("could not truncate file \"%s\": %m",
    2776              :                         PGSS_TEXT_FILE)));
    2777              : 
    2778          115 :     FreeFile(qfile);
    2779              : 
    2780          115 : done:
    2781          115 :     pgss->extent = 0;
    2782              :     /* This counts as a query text garbage collection for our purposes */
    2783          115 :     record_gc_qtexts();
    2784              : 
    2785          121 : release_lock:
    2786          121 :     LWLockRelease(&pgss->lock.lock);
    2787              : 
    2788          121 :     return stats_reset;
    2789              : }
    2790              : 
    2791              : /*
    2792              :  * Generate a normalized version of the query string that will be used to
    2793              :  * represent all similar queries.
    2794              :  *
    2795              :  * Note that the normalized representation may well vary depending on
    2796              :  * just which "equivalent" query is used to create the hashtable entry.
    2797              :  * We assume this is OK.
    2798              :  *
    2799              :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2800              :  * the original string start, so we need to translate the provided locations
    2801              :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2802              :  * of interest, so it's worth doing.)
    2803              :  *
    2804              :  * *query_len_p contains the input string length, and is updated with
    2805              :  * the result string length on exit.  The resulting string might be longer
    2806              :  * or shorter depending on what happens with replacement of constants.
    2807              :  *
    2808              :  * Returns a palloc'd string.
    2809              :  */
    2810              : static char *
    2811        11403 : generate_normalized_query(const JumbleState *jstate, const char *query,
    2812              :                           int query_loc, int *query_len_p)
    2813              : {
    2814              :     char       *norm_query;
    2815        11403 :     int         query_len = *query_len_p;
    2816              :     int         norm_query_buflen,  /* Space allowed for norm_query */
    2817              :                 len_to_wrt,     /* Length (in bytes) to write */
    2818        11403 :                 quer_loc = 0,   /* Source query byte location */
    2819        11403 :                 n_quer_loc = 0, /* Normalized query byte location */
    2820        11403 :                 last_off = 0,   /* Offset from start for previous tok */
    2821        11403 :                 last_tok_len = 0;   /* Length (in bytes) of that tok */
    2822        11403 :     int         num_constants_replaced = 0;
    2823        11403 :     LocationLen *locs = NULL;
    2824              : 
    2825              :     /*
    2826              :      * Determine constants' lengths (core system only gives us locations), and
    2827              :      * return a sorted copy of jstate's LocationLen data with lengths filled
    2828              :      * in.
    2829              :      */
    2830        11403 :     locs = ComputeConstantLengths(jstate, query, query_loc);
    2831              : 
    2832              :     /*
    2833              :      * Allow for $n symbols to be longer than the constants they replace.
    2834              :      * Constants must take at least one byte in text form, while a $n symbol
    2835              :      * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
    2836              :      * could refine that limit based on the max value of n for the current
    2837              :      * query, but it hardly seems worth any extra effort to do so.
    2838              :      */
    2839        11403 :     norm_query_buflen = query_len + jstate->clocations_count * 10;
    2840              : 
    2841              :     /* Allocate result buffer */
    2842        11403 :     norm_query = palloc(norm_query_buflen + 1);
    2843              : 
    2844        45757 :     for (int i = 0; i < jstate->clocations_count; i++)
    2845              :     {
    2846              :         int         off,        /* Offset from start for cur tok */
    2847              :                     tok_len;    /* Length (in bytes) of that tok */
    2848              : 
    2849              :         /*
    2850              :          * If we have an external param at this location, but no lists are
    2851              :          * being squashed across the query, then we skip here; this will make
    2852              :          * us print the characters found in the original query that represent
    2853              :          * the parameter in the next iteration (or after the loop is done),
    2854              :          * which is a bit odd but seems to work okay in most cases.
    2855              :          */
    2856        34354 :         if (locs[i].extern_param && !jstate->has_squashed_lists)
    2857          162 :             continue;
    2858              : 
    2859        34192 :         off = locs[i].location;
    2860              : 
    2861              :         /* Adjust recorded location if we're dealing with partial string */
    2862        34192 :         off -= query_loc;
    2863              : 
    2864        34192 :         tok_len = locs[i].length;
    2865              : 
    2866        34192 :         if (tok_len < 0)
    2867          506 :             continue;           /* ignore any duplicates */
    2868              : 
    2869              :         /* Copy next chunk (what precedes the next constant) */
    2870        33686 :         len_to_wrt = off - last_off;
    2871        33686 :         len_to_wrt -= last_tok_len;
    2872              :         Assert(len_to_wrt >= 0);
    2873        33686 :         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2874        33686 :         n_quer_loc += len_to_wrt;
    2875              : 
    2876              :         /*
    2877              :          * And insert a param symbol in place of the constant token; and, if
    2878              :          * we have a squashable list, insert a placeholder comment starting
    2879              :          * from the list's second value.
    2880              :          */
    2881        33686 :         n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
    2882        33686 :                               num_constants_replaced + 1 + jstate->highest_extern_param_id,
    2883        33686 :                               locs[i].squashed ? " /*, ... */" : "");
    2884        33686 :         num_constants_replaced++;
    2885              : 
    2886              :         /* move forward */
    2887        33686 :         quer_loc = off + tok_len;
    2888        33686 :         last_off = off;
    2889        33686 :         last_tok_len = tok_len;
    2890              :     }
    2891              : 
    2892              :     /* Clean up, if needed */
    2893        11403 :     if (locs)
    2894        11403 :         pfree(locs);
    2895              : 
    2896              :     /*
    2897              :      * We've copied up until the last ignorable constant.  Copy over the
    2898              :      * remaining bytes of the original query string.
    2899              :      */
    2900        11403 :     len_to_wrt = query_len - quer_loc;
    2901              : 
    2902              :     Assert(len_to_wrt >= 0);
    2903        11403 :     memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2904        11403 :     n_quer_loc += len_to_wrt;
    2905              : 
    2906              :     Assert(n_quer_loc <= norm_query_buflen);
    2907        11403 :     norm_query[n_quer_loc] = '\0';
    2908              : 
    2909        11403 :     *query_len_p = n_quer_loc;
    2910        11403 :     return norm_query;
    2911              : }
        

Generated by: LCOV version 2.0-1