LCOV - code coverage report
Current view: top level - contrib/pg_stat_statements - pg_stat_statements.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 75.3 % 984 741
Test Date: 2026-03-27 22:16:19 Functions: 86.8 % 53 46
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pg_stat_statements.c
       4              :  *      Track statement planning and execution times as well as resource
       5              :  *      usage across a whole database cluster.
       6              :  *
       7              :  * Execution costs are totaled for each distinct source query, and kept in
       8              :  * a shared hashtable.  (We track only as many distinct queries as will fit
       9              :  * in the designated amount of shared memory.)
      10              :  *
      11              :  * Starting in Postgres 9.2, this module normalized query entries.  As of
      12              :  * Postgres 14, the normalization is done by the core if compute_query_id is
      13              :  * enabled, or optionally by third-party modules.
      14              :  *
      15              :  * To facilitate presenting entries to users, we create "representative" query
      16              :  * strings in which constants are replaced with parameter symbols ($n), to
      17              :  * make it clearer what a normalized entry can represent.  To save on shared
      18              :  * memory, and to avoid having to truncate oversized query strings, we store
      19              :  * these strings in a temporary external query-texts file.  Offsets into this
      20              :  * file are kept in shared memory.
      21              :  *
      22              :  * Note about locking issues: to create or delete an entry in the shared
      23              :  * hashtable, one must hold pgss->lock exclusively.  Modifying any field
      24              :  * in an entry except the counters requires the same.  To look up an entry,
      25              :  * one must hold the lock shared.  To read or update the counters within
      26              :  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
      27              :  * disappear!) and also take the entry's mutex spinlock.
      28              :  * The shared state variable pgss->extent (the next free spot in the external
      29              :  * query-text file) should be accessed only while holding either the
      30              :  * pgss->mutex spinlock, or exclusive lock on pgss->lock.  We use the mutex to
      31              :  * allow reserving file space while holding only shared lock on pgss->lock.
      32              :  * Rewriting the entire external query-text file, eg for garbage collection,
      33              :  * requires holding pgss->lock exclusively; this allows individual entries
      34              :  * in the file to be read or written while holding only shared lock.
      35              :  *
      36              :  *
      37              :  * Copyright (c) 2008-2026, PostgreSQL Global Development Group
      38              :  *
      39              :  * IDENTIFICATION
      40              :  *    contrib/pg_stat_statements/pg_stat_statements.c
      41              :  *
      42              :  *-------------------------------------------------------------------------
      43              :  */
      44              : #include "postgres.h"
      45              : 
      46              : #include <math.h>
      47              : #include <sys/stat.h>
      48              : #include <unistd.h>
      49              : 
      50              : #include "access/htup_details.h"
      51              : #include "access/parallel.h"
      52              : #include "catalog/pg_authid.h"
      53              : #include "common/int.h"
      54              : #include "executor/instrument.h"
      55              : #include "funcapi.h"
      56              : #include "jit/jit.h"
      57              : #include "mb/pg_wchar.h"
      58              : #include "miscadmin.h"
      59              : #include "nodes/queryjumble.h"
      60              : #include "optimizer/planner.h"
      61              : #include "parser/analyze.h"
      62              : #include "parser/scanner.h"
      63              : #include "pgstat.h"
      64              : #include "storage/fd.h"
      65              : #include "storage/ipc.h"
      66              : #include "storage/lwlock.h"
      67              : #include "storage/shmem.h"
      68              : #include "storage/spin.h"
      69              : #include "tcop/utility.h"
      70              : #include "utils/acl.h"
      71              : #include "utils/builtins.h"
      72              : #include "utils/memutils.h"
      73              : #include "utils/timestamp.h"
      74              : #include "utils/tuplestore.h"
      75              : 
      76            8 : PG_MODULE_MAGIC_EXT(
      77              :                     .name = "pg_stat_statements",
      78              :                     .version = PG_VERSION
      79              : );
      80              : 
      81              : /* Location of permanent stats file (valid when database is shut down) */
      82              : #define PGSS_DUMP_FILE  PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
      83              : 
      84              : /*
      85              :  * Location of external query text file.
      86              :  */
      87              : #define PGSS_TEXT_FILE  PG_STAT_TMP_DIR "/pgss_query_texts.stat"
      88              : 
      89              : /* Magic number identifying the stats file format */
      90              : static const uint32 PGSS_FILE_HEADER = 0x20250731;
      91              : 
      92              : /* PostgreSQL major version number, changes in which invalidate all entries */
      93              : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
      94              : 
      95              : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
      96              : #define USAGE_EXEC(duration)    (1.0)
      97              : #define USAGE_INIT              (1.0)   /* including initial planning */
      98              : #define ASSUMED_MEDIAN_INIT     (10.0)  /* initial assumed median usage */
      99              : #define ASSUMED_LENGTH_INIT     1024    /* initial assumed mean query length */
     100              : #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
     101              : #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
     102              : #define USAGE_DEALLOC_PERCENT   5   /* free this % of entries at once */
     103              : #define IS_STICKY(c)    ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
     104              : 
     105              : /*
     106              :  * Extension version number, for supporting older extension versions' objects
     107              :  */
     108              : typedef enum pgssVersion
     109              : {
     110              :     PGSS_V1_0 = 0,
     111              :     PGSS_V1_1,
     112              :     PGSS_V1_2,
     113              :     PGSS_V1_3,
     114              :     PGSS_V1_8,
     115              :     PGSS_V1_9,
     116              :     PGSS_V1_10,
     117              :     PGSS_V1_11,
     118              :     PGSS_V1_12,
     119              :     PGSS_V1_13,
     120              : } pgssVersion;
     121              : 
     122              : typedef enum pgssStoreKind
     123              : {
     124              :     PGSS_INVALID = -1,
     125              : 
     126              :     /*
     127              :      * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
     128              :      * reference the underlying values in the arrays in the Counters struct,
     129              :      * and this order is required in pg_stat_statements_internal().
     130              :      */
     131              :     PGSS_PLAN = 0,
     132              :     PGSS_EXEC,
     133              : } pgssStoreKind;
     134              : 
     135              : #define PGSS_NUMKIND (PGSS_EXEC + 1)
     136              : 
     137              : /*
     138              :  * Hashtable key that defines the identity of a hashtable entry.  We separate
     139              :  * queries by user and by database even if they are otherwise identical.
     140              :  *
     141              :  * If you add a new key to this struct, make sure to teach pgss_store() to
     142              :  * zero the padding bytes.  Otherwise, things will break, because pgss_hash is
     143              :  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
     144              :  */
     145              : typedef struct pgssHashKey
     146              : {
     147              :     Oid         userid;         /* user OID */
     148              :     Oid         dbid;           /* database OID */
     149              :     int64       queryid;        /* query identifier */
     150              :     bool        toplevel;       /* query executed at top level */
     151              : } pgssHashKey;
     152              : 
     153              : /*
     154              :  * The actual stats counters kept within pgssEntry.
     155              :  */
     156              : typedef struct Counters
     157              : {
     158              :     int64       calls[PGSS_NUMKIND];    /* # of times planned/executed */
     159              :     double      total_time[PGSS_NUMKIND];   /* total planning/execution time,
     160              :                                              * in msec */
     161              :     double      min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
     162              :                                          * msec since min/max reset */
     163              :     double      max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
     164              :                                          * msec since min/max reset */
     165              :     double      mean_time[PGSS_NUMKIND];    /* mean planning/execution time in
     166              :                                              * msec */
     167              :     double      sum_var_time[PGSS_NUMKIND]; /* sum of variances in
     168              :                                              * planning/execution time in msec */
     169              :     int64       rows;           /* total # of retrieved or affected rows */
     170              :     int64       shared_blks_hit;    /* # of shared buffer hits */
     171              :     int64       shared_blks_read;   /* # of shared disk blocks read */
     172              :     int64       shared_blks_dirtied;    /* # of shared disk blocks dirtied */
     173              :     int64       shared_blks_written;    /* # of shared disk blocks written */
     174              :     int64       local_blks_hit; /* # of local buffer hits */
     175              :     int64       local_blks_read;    /* # of local disk blocks read */
     176              :     int64       local_blks_dirtied; /* # of local disk blocks dirtied */
     177              :     int64       local_blks_written; /* # of local disk blocks written */
     178              :     int64       temp_blks_read; /* # of temp blocks read */
     179              :     int64       temp_blks_written;  /* # of temp blocks written */
     180              :     double      shared_blk_read_time;   /* time spent reading shared blocks,
     181              :                                          * in msec */
     182              :     double      shared_blk_write_time;  /* time spent writing shared blocks,
     183              :                                          * in msec */
     184              :     double      local_blk_read_time;    /* time spent reading local blocks, in
     185              :                                          * msec */
     186              :     double      local_blk_write_time;   /* time spent writing local blocks, in
     187              :                                          * msec */
     188              :     double      temp_blk_read_time; /* time spent reading temp blocks, in msec */
     189              :     double      temp_blk_write_time;    /* time spent writing temp blocks, in
     190              :                                          * msec */
     191              :     double      usage;          /* usage factor */
     192              :     int64       wal_records;    /* # of WAL records generated */
     193              :     int64       wal_fpi;        /* # of WAL full page images generated */
     194              :     uint64      wal_bytes;      /* total amount of WAL generated in bytes */
     195              :     int64       wal_buffers_full;   /* # of times the WAL buffers became full */
     196              :     int64       jit_functions;  /* total number of JIT functions emitted */
     197              :     double      jit_generation_time;    /* total time to generate jit code */
     198              :     int64       jit_inlining_count; /* number of times inlining time has been
     199              :                                      * > 0 */
     200              :     double      jit_deform_time;    /* total time to deform tuples in jit code */
     201              :     int64       jit_deform_count;   /* number of times deform time has been >
     202              :                                      * 0 */
     203              : 
     204              :     double      jit_inlining_time;  /* total time to inline jit code */
     205              :     int64       jit_optimization_count; /* number of times optimization time
     206              :                                          * has been > 0 */
     207              :     double      jit_optimization_time;  /* total time to optimize jit code */
     208              :     int64       jit_emission_count; /* number of times emission time has been
     209              :                                      * > 0 */
     210              :     double      jit_emission_time;  /* total time to emit jit code */
     211              :     int64       parallel_workers_to_launch; /* # of parallel workers planned
     212              :                                              * to be launched */
     213              :     int64       parallel_workers_launched;  /* # of parallel workers actually
     214              :                                              * launched */
     215              :     int64       generic_plan_calls; /* number of calls using a generic plan */
     216              :     int64       custom_plan_calls;  /* number of calls using a custom plan */
     217              : } Counters;
     218              : 
     219              : /*
     220              :  * Global statistics for pg_stat_statements
     221              :  */
     222              : typedef struct pgssGlobalStats
     223              : {
     224              :     int64       dealloc;        /* # of times entries were deallocated */
     225              :     TimestampTz stats_reset;    /* timestamp with all stats reset */
     226              : } pgssGlobalStats;
     227              : 
     228              : /*
     229              :  * Statistics per statement
     230              :  *
     231              :  * Note: in event of a failure in garbage collection of the query text file,
     232              :  * we reset query_offset to zero and query_len to -1.  This will be seen as
     233              :  * an invalid state by qtext_fetch().
     234              :  */
     235              : typedef struct pgssEntry
     236              : {
     237              :     pgssHashKey key;            /* hash key of entry - MUST BE FIRST */
     238              :     Counters    counters;       /* the statistics for this query */
     239              :     Size        query_offset;   /* query text offset in external file */
     240              :     int         query_len;      /* # of valid bytes in query string, or -1 */
     241              :     int         encoding;       /* query text encoding */
     242              :     TimestampTz stats_since;    /* timestamp of entry allocation */
     243              :     TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
     244              :     slock_t     mutex;          /* protects the counters only */
     245              : } pgssEntry;
     246              : 
     247              : /*
     248              :  * Global shared state
     249              :  */
     250              : typedef struct pgssSharedState
     251              : {
     252              :     LWLock     *lock;           /* protects hashtable search/modification */
     253              :     double      cur_median_usage;   /* current median usage in hashtable */
     254              :     Size        mean_query_len; /* current mean entry text length */
     255              :     slock_t     mutex;          /* protects following fields only: */
     256              :     Size        extent;         /* current extent of query file */
     257              :     int         n_writers;      /* number of active writers to query file */
     258              :     int         gc_count;       /* query file garbage collection cycle count */
     259              :     pgssGlobalStats stats;      /* global statistics for pgss */
     260              : } pgssSharedState;
     261              : 
     262              : /*---- Local variables ----*/
     263              : 
     264              : /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
     265              : static int  nesting_level = 0;
     266              : 
     267              : /* Saved hook values */
     268              : static shmem_request_hook_type prev_shmem_request_hook = NULL;
     269              : static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
     270              : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
     271              : static planner_hook_type prev_planner_hook = NULL;
     272              : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
     273              : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
     274              : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
     275              : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
     276              : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
     277              : 
     278              : /* Links to shared memory state */
     279              : static pgssSharedState *pgss = NULL;
     280              : static HTAB *pgss_hash = NULL;
     281              : 
     282              : /*---- GUC variables ----*/
     283              : 
     284              : typedef enum
     285              : {
     286              :     PGSS_TRACK_NONE,            /* track no statements */
     287              :     PGSS_TRACK_TOP,             /* only top level statements */
     288              :     PGSS_TRACK_ALL,             /* all statements, including nested ones */
     289              : }           PGSSTrackLevel;
     290              : 
     291              : static const struct config_enum_entry track_options[] =
     292              : {
     293              :     {"none", PGSS_TRACK_NONE, false},
     294              :     {"top", PGSS_TRACK_TOP, false},
     295              :     {"all", PGSS_TRACK_ALL, false},
     296              :     {NULL, 0, false}
     297              : };
     298              : 
     299              : static int  pgss_max = 5000;    /* max # statements to track */
     300              : static int  pgss_track = PGSS_TRACK_TOP;    /* tracking level */
     301              : static bool pgss_track_utility = true;  /* whether to track utility commands */
     302              : static bool pgss_track_planning = false;    /* whether to track planning
     303              :                                              * duration */
     304              : static bool pgss_save = true;   /* whether to save stats across shutdown */
     305              : 
     306              : #define pgss_enabled(level) \
     307              :     (!IsParallelWorker() && \
     308              :     (pgss_track == PGSS_TRACK_ALL || \
     309              :     (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
     310              : 
     311              : #define record_gc_qtexts() \
     312              :     do { \
     313              :         SpinLockAcquire(&pgss->mutex); \
     314              :         pgss->gc_count++; \
     315              :         SpinLockRelease(&pgss->mutex); \
     316              :     } while(0)
     317              : 
     318              : /*---- Function declarations ----*/
     319              : 
     320            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
     321            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
     322           20 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_11);
     323            0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
     324            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
     325            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
     326            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
     327            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
     328            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_11);
     329            6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_12);
     330           24 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_13);
     331            0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
     332            7 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
     333              : 
     334              : static void pgss_shmem_request(void);
     335              : static void pgss_shmem_startup(void);
     336              : static void pgss_shmem_shutdown(int code, Datum arg);
     337              : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
     338              :                                     JumbleState *jstate);
     339              : static PlannedStmt *pgss_planner(Query *parse,
     340              :                                  const char *query_string,
     341              :                                  int cursorOptions,
     342              :                                  ParamListInfo boundParams,
     343              :                                  ExplainState *es);
     344              : static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
     345              : static void pgss_ExecutorRun(QueryDesc *queryDesc,
     346              :                              ScanDirection direction,
     347              :                              uint64 count);
     348              : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
     349              : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
     350              : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
     351              :                                 bool readOnlyTree,
     352              :                                 ProcessUtilityContext context, ParamListInfo params,
     353              :                                 QueryEnvironment *queryEnv,
     354              :                                 DestReceiver *dest, QueryCompletion *qc);
     355              : static void pgss_store(const char *query, int64 queryId,
     356              :                        int query_location, int query_len,
     357              :                        pgssStoreKind kind,
     358              :                        double total_time, uint64 rows,
     359              :                        const BufferUsage *bufusage,
     360              :                        const WalUsage *walusage,
     361              :                        const struct JitInstrumentation *jitusage,
     362              :                        JumbleState *jstate,
     363              :                        int parallel_workers_to_launch,
     364              :                        int parallel_workers_launched,
     365              :                        PlannedStmtOrigin planOrigin);
     366              : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
     367              :                                         pgssVersion api_version,
     368              :                                         bool showtext);
     369              : static Size pgss_memsize(void);
     370              : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
     371              :                               int encoding, bool sticky);
     372              : static void entry_dealloc(void);
     373              : static bool qtext_store(const char *query, int query_len,
     374              :                         Size *query_offset, int *gc_count);
     375              : static char *qtext_load_file(Size *buffer_size);
     376              : static char *qtext_fetch(Size query_offset, int query_len,
     377              :                          char *buffer, Size buffer_size);
     378              : static bool need_gc_qtexts(void);
     379              : static void gc_qtexts(void);
     380              : static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
     381              : static char *generate_normalized_query(JumbleState *jstate, const char *query,
     382              :                                        int query_loc, int *query_len_p);
     383              : static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
     384              :                                      int query_loc);
     385              : static int  comp_location(const void *a, const void *b);
     386              : 
     387              : 
     388              : /*
     389              :  * Module load callback
     390              :  */
     391              : void
     392            8 : _PG_init(void)
     393              : {
     394              :     /*
     395              :      * In order to create our shared memory area, we have to be loaded via
     396              :      * shared_preload_libraries.  If not, fall out without hooking into any of
     397              :      * the main system.  (We don't throw error here because it seems useful to
     398              :      * allow the pg_stat_statements functions to be created even when the
     399              :      * module isn't active.  The functions must protect themselves against
     400              :      * being called then, however.)
     401              :      */
     402            8 :     if (!process_shared_preload_libraries_in_progress)
     403            1 :         return;
     404              : 
     405              :     /*
     406              :      * Inform the postmaster that we want to enable query_id calculation if
     407              :      * compute_query_id is set to auto.
     408              :      */
     409            7 :     EnableQueryId();
     410              : 
     411              :     /*
     412              :      * Define (or redefine) custom GUC variables.
     413              :      */
     414            7 :     DefineCustomIntVariable("pg_stat_statements.max",
     415              :                             "Sets the maximum number of statements tracked by pg_stat_statements.",
     416              :                             NULL,
     417              :                             &pgss_max,
     418              :                             5000,
     419              :                             100,
     420              :                             INT_MAX / 2,
     421              :                             PGC_POSTMASTER,
     422              :                             0,
     423              :                             NULL,
     424              :                             NULL,
     425              :                             NULL);
     426              : 
     427            7 :     DefineCustomEnumVariable("pg_stat_statements.track",
     428              :                              "Selects which statements are tracked by pg_stat_statements.",
     429              :                              NULL,
     430              :                              &pgss_track,
     431              :                              PGSS_TRACK_TOP,
     432              :                              track_options,
     433              :                              PGC_SUSET,
     434              :                              0,
     435              :                              NULL,
     436              :                              NULL,
     437              :                              NULL);
     438              : 
     439            7 :     DefineCustomBoolVariable("pg_stat_statements.track_utility",
     440              :                              "Selects whether utility commands are tracked by pg_stat_statements.",
     441              :                              NULL,
     442              :                              &pgss_track_utility,
     443              :                              true,
     444              :                              PGC_SUSET,
     445              :                              0,
     446              :                              NULL,
     447              :                              NULL,
     448              :                              NULL);
     449              : 
     450            7 :     DefineCustomBoolVariable("pg_stat_statements.track_planning",
     451              :                              "Selects whether planning duration is tracked by pg_stat_statements.",
     452              :                              NULL,
     453              :                              &pgss_track_planning,
     454              :                              false,
     455              :                              PGC_SUSET,
     456              :                              0,
     457              :                              NULL,
     458              :                              NULL,
     459              :                              NULL);
     460              : 
     461            7 :     DefineCustomBoolVariable("pg_stat_statements.save",
     462              :                              "Save pg_stat_statements statistics across server shutdowns.",
     463              :                              NULL,
     464              :                              &pgss_save,
     465              :                              true,
     466              :                              PGC_SIGHUP,
     467              :                              0,
     468              :                              NULL,
     469              :                              NULL,
     470              :                              NULL);
     471              : 
     472            7 :     MarkGUCPrefixReserved("pg_stat_statements");
     473              : 
     474              :     /*
     475              :      * Install hooks.
     476              :      */
     477            7 :     prev_shmem_request_hook = shmem_request_hook;
     478            7 :     shmem_request_hook = pgss_shmem_request;
     479            7 :     prev_shmem_startup_hook = shmem_startup_hook;
     480            7 :     shmem_startup_hook = pgss_shmem_startup;
     481            7 :     prev_post_parse_analyze_hook = post_parse_analyze_hook;
     482            7 :     post_parse_analyze_hook = pgss_post_parse_analyze;
     483            7 :     prev_planner_hook = planner_hook;
     484            7 :     planner_hook = pgss_planner;
     485            7 :     prev_ExecutorStart = ExecutorStart_hook;
     486            7 :     ExecutorStart_hook = pgss_ExecutorStart;
     487            7 :     prev_ExecutorRun = ExecutorRun_hook;
     488            7 :     ExecutorRun_hook = pgss_ExecutorRun;
     489            7 :     prev_ExecutorFinish = ExecutorFinish_hook;
     490            7 :     ExecutorFinish_hook = pgss_ExecutorFinish;
     491            7 :     prev_ExecutorEnd = ExecutorEnd_hook;
     492            7 :     ExecutorEnd_hook = pgss_ExecutorEnd;
     493            7 :     prev_ProcessUtility = ProcessUtility_hook;
     494            7 :     ProcessUtility_hook = pgss_ProcessUtility;
     495              : }
     496              : 
     497              : /*
     498              :  * shmem_request hook: request additional shared resources.  We'll allocate or
     499              :  * attach to the shared resources in pgss_shmem_startup().
     500              :  */
     501              : static void
     502            7 : pgss_shmem_request(void)
     503              : {
     504            7 :     if (prev_shmem_request_hook)
     505            0 :         prev_shmem_request_hook();
     506              : 
     507            7 :     RequestAddinShmemSpace(pgss_memsize());
     508            7 :     RequestNamedLWLockTranche("pg_stat_statements", 1);
     509            7 : }
     510              : 
     511              : /*
     512              :  * shmem_startup hook: allocate or attach to shared memory,
     513              :  * then load any pre-existing statistics from file.
     514              :  * Also create and load the query-texts file, which is expected to exist
     515              :  * (even if empty) while the module is enabled.
     516              :  */
     517              : static void
     518            7 : pgss_shmem_startup(void)
     519              : {
     520              :     bool        found;
     521              :     HASHCTL     info;
     522            7 :     FILE       *file = NULL;
     523            7 :     FILE       *qfile = NULL;
     524              :     uint32      header;
     525              :     int32       num;
     526              :     int32       pgver;
     527              :     int32       i;
     528              :     int         buffer_size;
     529            7 :     char       *buffer = NULL;
     530              : 
     531            7 :     if (prev_shmem_startup_hook)
     532            0 :         prev_shmem_startup_hook();
     533              : 
     534              :     /* reset in case this is a restart within the postmaster */
     535            7 :     pgss = NULL;
     536            7 :     pgss_hash = NULL;
     537              : 
     538              :     /*
     539              :      * Create or attach to the shared memory state, including hash table
     540              :      */
     541            7 :     LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
     542              : 
     543            7 :     pgss = ShmemInitStruct("pg_stat_statements",
     544              :                            sizeof(pgssSharedState),
     545              :                            &found);
     546              : 
     547            7 :     if (!found)
     548              :     {
     549              :         /* First time through ... */
     550            7 :         pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
     551            7 :         pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
     552            7 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
     553            7 :         SpinLockInit(&pgss->mutex);
     554            7 :         pgss->extent = 0;
     555            7 :         pgss->n_writers = 0;
     556            7 :         pgss->gc_count = 0;
     557            7 :         pgss->stats.dealloc = 0;
     558            7 :         pgss->stats.stats_reset = GetCurrentTimestamp();
     559              :     }
     560              : 
     561            7 :     info.keysize = sizeof(pgssHashKey);
     562            7 :     info.entrysize = sizeof(pgssEntry);
     563            7 :     pgss_hash = ShmemInitHash("pg_stat_statements hash",
     564              :                               pgss_max, pgss_max,
     565              :                               &info,
     566              :                               HASH_ELEM | HASH_BLOBS);
     567              : 
     568            7 :     LWLockRelease(AddinShmemInitLock);
     569              : 
     570              :     /*
     571              :      * If we're in the postmaster (or a standalone backend...), set up a shmem
     572              :      * exit hook to dump the statistics to disk.
     573              :      */
     574            7 :     if (!IsUnderPostmaster)
     575            7 :         on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
     576              : 
     577              :     /*
     578              :      * Done if some other process already completed our initialization.
     579              :      */
     580            7 :     if (found)
     581            7 :         return;
     582              : 
     583              :     /*
     584              :      * Note: we don't bother with locks here, because there should be no other
     585              :      * processes running when this code is reached.
     586              :      */
     587              : 
     588              :     /* Unlink query text file possibly left over from crash */
     589            7 :     unlink(PGSS_TEXT_FILE);
     590              : 
     591              :     /* Allocate new query text temp file */
     592            7 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
     593            7 :     if (qfile == NULL)
     594            0 :         goto write_error;
     595              : 
     596              :     /*
     597              :      * If we were told not to load old statistics, we're done.  (Note we do
     598              :      * not try to unlink any old dump file in this case.  This seems a bit
     599              :      * questionable but it's the historical behavior.)
     600              :      */
     601            7 :     if (!pgss_save)
     602              :     {
     603            1 :         FreeFile(qfile);
     604            1 :         return;
     605              :     }
     606              : 
     607              :     /*
     608              :      * Attempt to load old statistics from the dump file.
     609              :      */
     610            6 :     file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
     611            6 :     if (file == NULL)
     612              :     {
     613            4 :         if (errno != ENOENT)
     614            0 :             goto read_error;
     615              :         /* No existing persisted stats file, so we're done */
     616            4 :         FreeFile(qfile);
     617            4 :         return;
     618              :     }
     619              : 
     620            2 :     buffer_size = 2048;
     621            2 :     buffer = (char *) palloc(buffer_size);
     622              : 
     623            4 :     if (fread(&header, sizeof(uint32), 1, file) != 1 ||
     624            4 :         fread(&pgver, sizeof(uint32), 1, file) != 1 ||
     625            2 :         fread(&num, sizeof(int32), 1, file) != 1)
     626            0 :         goto read_error;
     627              : 
     628            2 :     if (header != PGSS_FILE_HEADER ||
     629            2 :         pgver != PGSS_PG_MAJOR_VERSION)
     630            0 :         goto data_error;
     631              : 
     632        28292 :     for (i = 0; i < num; i++)
     633              :     {
     634              :         pgssEntry   temp;
     635              :         pgssEntry  *entry;
     636              :         Size        query_offset;
     637              : 
     638        28290 :         if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
     639            0 :             goto read_error;
     640              : 
     641              :         /* Encoding is the only field we can easily sanity-check */
     642        28290 :         if (!PG_VALID_BE_ENCODING(temp.encoding))
     643            0 :             goto data_error;
     644              : 
     645              :         /* Resize buffer as needed */
     646        28290 :         if (temp.query_len >= buffer_size)
     647              :         {
     648            3 :             buffer_size = Max(buffer_size * 2, temp.query_len + 1);
     649            3 :             buffer = repalloc(buffer, buffer_size);
     650              :         }
     651              : 
     652        28290 :         if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
     653            0 :             goto read_error;
     654              : 
     655              :         /* Should have a trailing null, but let's make sure */
     656        28290 :         buffer[temp.query_len] = '\0';
     657              : 
     658              :         /* Skip loading "sticky" entries */
     659        28290 :         if (IS_STICKY(temp.counters))
     660          763 :             continue;
     661              : 
     662              :         /* Store the query text */
     663        27527 :         query_offset = pgss->extent;
     664        27527 :         if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
     665            0 :             goto write_error;
     666        27527 :         pgss->extent += temp.query_len + 1;
     667              : 
     668              :         /* make the hashtable entry (discards old entries if too many) */
     669        27527 :         entry = entry_alloc(&temp.key, query_offset, temp.query_len,
     670              :                             temp.encoding,
     671              :                             false);
     672              : 
     673              :         /* copy in the actual stats */
     674        27527 :         entry->counters = temp.counters;
     675        27527 :         entry->stats_since = temp.stats_since;
     676        27527 :         entry->minmax_stats_since = temp.minmax_stats_since;
     677              :     }
     678              : 
     679              :     /* Read global statistics for pg_stat_statements */
     680            2 :     if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     681            0 :         goto read_error;
     682              : 
     683            2 :     pfree(buffer);
     684            2 :     FreeFile(file);
     685            2 :     FreeFile(qfile);
     686              : 
     687              :     /*
     688              :      * Remove the persisted stats file so it's not included in
     689              :      * backups/replication standbys, etc.  A new file will be written on next
     690              :      * shutdown.
     691              :      *
     692              :      * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
     693              :      * because we remove that file on startup; it acts inversely to
     694              :      * PGSS_DUMP_FILE, in that it is only supposed to be around when the
     695              :      * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
     696              :      * when the server is not running.  Leaving the file creates no danger of
     697              :      * a newly restored database having a spurious record of execution costs,
     698              :      * which is what we're really concerned about here.
     699              :      */
     700            2 :     unlink(PGSS_DUMP_FILE);
     701              : 
     702            2 :     return;
     703              : 
     704            0 : read_error:
     705            0 :     ereport(LOG,
     706              :             (errcode_for_file_access(),
     707              :              errmsg("could not read file \"%s\": %m",
     708              :                     PGSS_DUMP_FILE)));
     709            0 :     goto fail;
     710            0 : data_error:
     711            0 :     ereport(LOG,
     712              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     713              :              errmsg("ignoring invalid data in file \"%s\"",
     714              :                     PGSS_DUMP_FILE)));
     715            0 :     goto fail;
     716            0 : write_error:
     717            0 :     ereport(LOG,
     718              :             (errcode_for_file_access(),
     719              :              errmsg("could not write file \"%s\": %m",
     720              :                     PGSS_TEXT_FILE)));
     721            0 : fail:
     722            0 :     if (buffer)
     723            0 :         pfree(buffer);
     724            0 :     if (file)
     725            0 :         FreeFile(file);
     726            0 :     if (qfile)
     727            0 :         FreeFile(qfile);
     728              :     /* If possible, throw away the bogus file; ignore any error */
     729            0 :     unlink(PGSS_DUMP_FILE);
     730              : 
     731              :     /*
     732              :      * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
     733              :      * server is running with pg_stat_statements enabled
     734              :      */
     735              : }
     736              : 
     737              : /*
     738              :  * shmem_shutdown hook: Dump statistics into file.
     739              :  *
     740              :  * Note: we don't bother with acquiring lock, because there should be no
     741              :  * other processes running when this is called.
     742              :  */
     743              : static void
     744            7 : pgss_shmem_shutdown(int code, Datum arg)
     745              : {
     746              :     FILE       *file;
     747            7 :     char       *qbuffer = NULL;
     748            7 :     Size        qbuffer_size = 0;
     749              :     HASH_SEQ_STATUS hash_seq;
     750              :     int32       num_entries;
     751              :     pgssEntry  *entry;
     752              : 
     753              :     /* Don't try to dump during a crash. */
     754            7 :     if (code)
     755            7 :         return;
     756              : 
     757              :     /* Safety check ... shouldn't get here unless shmem is set up. */
     758            7 :     if (!pgss || !pgss_hash)
     759            0 :         return;
     760              : 
     761              :     /* Don't dump if told not to. */
     762            7 :     if (!pgss_save)
     763            2 :         return;
     764              : 
     765            5 :     file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
     766            5 :     if (file == NULL)
     767            0 :         goto error;
     768              : 
     769            5 :     if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
     770            0 :         goto error;
     771            5 :     if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
     772            0 :         goto error;
     773            5 :     num_entries = hash_get_num_entries(pgss_hash);
     774            5 :     if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
     775            0 :         goto error;
     776              : 
     777            5 :     qbuffer = qtext_load_file(&qbuffer_size);
     778            5 :     if (qbuffer == NULL)
     779            0 :         goto error;
     780              : 
     781              :     /*
     782              :      * When serializing to disk, we store query texts immediately after their
     783              :      * entry data.  Any orphaned query texts are thereby excluded.
     784              :      */
     785            5 :     hash_seq_init(&hash_seq, pgss_hash);
     786        56896 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
     787              :     {
     788        56891 :         int         len = entry->query_len;
     789        56891 :         char       *qstr = qtext_fetch(entry->query_offset, len,
     790              :                                        qbuffer, qbuffer_size);
     791              : 
     792        56891 :         if (qstr == NULL)
     793            0 :             continue;           /* Ignore any entries with bogus texts */
     794              : 
     795        56891 :         if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
     796        56891 :             fwrite(qstr, 1, len + 1, file) != len + 1)
     797              :         {
     798              :             /* note: we assume hash_seq_term won't change errno */
     799            0 :             hash_seq_term(&hash_seq);
     800            0 :             goto error;
     801              :         }
     802              :     }
     803              : 
     804              :     /* Dump global statistics for pg_stat_statements */
     805            5 :     if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     806            0 :         goto error;
     807              : 
     808            5 :     pfree(qbuffer);
     809            5 :     qbuffer = NULL;
     810              : 
     811            5 :     if (FreeFile(file))
     812              :     {
     813            0 :         file = NULL;
     814            0 :         goto error;
     815              :     }
     816              : 
     817              :     /*
     818              :      * Rename file into place, so we atomically replace any old one.
     819              :      */
     820            5 :     (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
     821              : 
     822              :     /* Unlink query-texts file; it's not needed while shutdown */
     823            5 :     unlink(PGSS_TEXT_FILE);
     824              : 
     825            5 :     return;
     826              : 
     827            0 : error:
     828            0 :     ereport(LOG,
     829              :             (errcode_for_file_access(),
     830              :              errmsg("could not write file \"%s\": %m",
     831              :                     PGSS_DUMP_FILE ".tmp")));
     832            0 :     if (qbuffer)
     833            0 :         pfree(qbuffer);
     834            0 :     if (file)
     835            0 :         FreeFile(file);
     836            0 :     unlink(PGSS_DUMP_FILE ".tmp");
     837            0 :     unlink(PGSS_TEXT_FILE);
     838              : }
     839              : 
     840              : /*
     841              :  * Post-parse-analysis hook: mark query with a queryId
     842              :  */
     843              : static void
     844        82691 : pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
     845              : {
     846        82691 :     if (prev_post_parse_analyze_hook)
     847            0 :         prev_post_parse_analyze_hook(pstate, query, jstate);
     848              : 
     849              :     /* Safety check... */
     850        82691 :     if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
     851        12780 :         return;
     852              : 
     853              :     /*
     854              :      * If it's EXECUTE, clear the queryId so that stats will accumulate for
     855              :      * the underlying PREPARE.  But don't do this if we're not tracking
     856              :      * utility statements, to avoid messing up another extension that might be
     857              :      * tracking them.
     858              :      */
     859        69911 :     if (query->utilityStmt)
     860              :     {
     861        31374 :         if (pgss_track_utility && IsA(query->utilityStmt, ExecuteStmt))
     862              :         {
     863         3340 :             query->queryId = INT64CONST(0);
     864         3340 :             return;
     865              :         }
     866              :     }
     867              : 
     868              :     /*
     869              :      * If query jumbling were able to identify any ignorable constants, we
     870              :      * immediately create a hash table entry for the query, so that we can
     871              :      * record the normalized form of the query string.  If there were no such
     872              :      * constants, the normalized string would be the same as the query text
     873              :      * anyway, so there's no need for an early entry.
     874              :      */
     875        66571 :     if (jstate && jstate->clocations_count > 0)
     876        38383 :         pgss_store(pstate->p_sourcetext,
     877              :                    query->queryId,
     878              :                    query->stmt_location,
     879              :                    query->stmt_len,
     880              :                    PGSS_INVALID,
     881              :                    0,
     882              :                    0,
     883              :                    NULL,
     884              :                    NULL,
     885              :                    NULL,
     886              :                    jstate,
     887              :                    0,
     888              :                    0,
     889              :                    PLAN_STMT_UNKNOWN);
     890              : }
     891              : 
     892              : /*
     893              :  * Planner hook: forward to regular planner, but measure planning time
     894              :  * if needed.
     895              :  */
     896              : static PlannedStmt *
     897        49832 : pgss_planner(Query *parse,
     898              :              const char *query_string,
     899              :              int cursorOptions,
     900              :              ParamListInfo boundParams,
     901              :              ExplainState *es)
     902              : {
     903              :     PlannedStmt *result;
     904              : 
     905              :     /*
     906              :      * We can't process the query if no query_string is provided, as
     907              :      * pgss_store needs it.  We also ignore query without queryid, as it would
     908              :      * be treated as a utility statement, which may not be the case.
     909              :      */
     910        49832 :     if (pgss_enabled(nesting_level)
     911        38681 :         && pgss_track_planning && query_string
     912          150 :         && parse->queryId != INT64CONST(0))
     913          150 :     {
     914              :         instr_time  start;
     915              :         instr_time  duration;
     916              :         BufferUsage bufusage_start,
     917              :                     bufusage;
     918              :         WalUsage    walusage_start,
     919              :                     walusage;
     920              : 
     921              :         /* We need to track buffer usage as the planner can access them. */
     922          150 :         bufusage_start = pgBufferUsage;
     923              : 
     924              :         /*
     925              :          * Similarly the planner could write some WAL records in some cases
     926              :          * (e.g. setting a hint bit with those being WAL-logged)
     927              :          */
     928          150 :         walusage_start = pgWalUsage;
     929          150 :         INSTR_TIME_SET_CURRENT(start);
     930              : 
     931          150 :         nesting_level++;
     932          150 :         PG_TRY();
     933              :         {
     934          150 :             if (prev_planner_hook)
     935            0 :                 result = prev_planner_hook(parse, query_string, cursorOptions,
     936              :                                            boundParams, es);
     937              :             else
     938          150 :                 result = standard_planner(parse, query_string, cursorOptions,
     939              :                                           boundParams, es);
     940              :         }
     941            0 :         PG_FINALLY();
     942              :         {
     943          150 :             nesting_level--;
     944              :         }
     945          150 :         PG_END_TRY();
     946              : 
     947          150 :         INSTR_TIME_SET_CURRENT(duration);
     948          150 :         INSTR_TIME_SUBTRACT(duration, start);
     949              : 
     950              :         /* calc differences of buffer counters. */
     951          150 :         memset(&bufusage, 0, sizeof(BufferUsage));
     952          150 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
     953              : 
     954              :         /* calc differences of WAL counters. */
     955          150 :         memset(&walusage, 0, sizeof(WalUsage));
     956          150 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
     957              : 
     958          150 :         pgss_store(query_string,
     959              :                    parse->queryId,
     960              :                    parse->stmt_location,
     961              :                    parse->stmt_len,
     962              :                    PGSS_PLAN,
     963          150 :                    INSTR_TIME_GET_MILLISEC(duration),
     964              :                    0,
     965              :                    &bufusage,
     966              :                    &walusage,
     967              :                    NULL,
     968              :                    NULL,
     969              :                    0,
     970              :                    0,
     971              :                    result->planOrigin);
     972              :     }
     973              :     else
     974              :     {
     975              :         /*
     976              :          * Even though we're not tracking plan time for this statement, we
     977              :          * must still increment the nesting level, to ensure that functions
     978              :          * evaluated during planning are not seen as top-level calls.
     979              :          */
     980        49682 :         nesting_level++;
     981        49682 :         PG_TRY();
     982              :         {
     983        49682 :             if (prev_planner_hook)
     984            0 :                 result = prev_planner_hook(parse, query_string, cursorOptions,
     985              :                                            boundParams, es);
     986              :             else
     987        49682 :                 result = standard_planner(parse, query_string, cursorOptions,
     988              :                                           boundParams, es);
     989              :         }
     990          771 :         PG_FINALLY();
     991              :         {
     992        49682 :             nesting_level--;
     993              :         }
     994        49682 :         PG_END_TRY();
     995              :     }
     996              : 
     997        49061 :     return result;
     998              : }
     999              : 
    1000              : /*
    1001              :  * ExecutorStart hook: start up tracking if needed
    1002              :  */
    1003              : static void
    1004        60443 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
    1005              : {
    1006        60443 :     if (prev_ExecutorStart)
    1007            0 :         prev_ExecutorStart(queryDesc, eflags);
    1008              :     else
    1009        60443 :         standard_ExecutorStart(queryDesc, eflags);
    1010              : 
    1011              :     /*
    1012              :      * If query has queryId zero, don't track it.  This prevents double
    1013              :      * counting of optimizable statements that are directly contained in
    1014              :      * utility statements.
    1015              :      */
    1016        60167 :     if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
    1017              :     {
    1018              :         /*
    1019              :          * Set up to track total elapsed time in ExecutorRun.  Make sure the
    1020              :          * space is allocated in the per-query context so it will go away at
    1021              :          * ExecutorEnd.
    1022              :          */
    1023        40610 :         if (queryDesc->totaltime == NULL)
    1024              :         {
    1025              :             MemoryContext oldcxt;
    1026              : 
    1027        40610 :             oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
    1028        40610 :             queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
    1029        40610 :             MemoryContextSwitchTo(oldcxt);
    1030              :         }
    1031              :     }
    1032        60167 : }
    1033              : 
    1034              : /*
    1035              :  * ExecutorRun hook: all we need do is track nesting depth
    1036              :  */
    1037              : static void
    1038        58843 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
    1039              : {
    1040        58843 :     nesting_level++;
    1041        58843 :     PG_TRY();
    1042              :     {
    1043        58843 :         if (prev_ExecutorRun)
    1044            0 :             prev_ExecutorRun(queryDesc, direction, count);
    1045              :         else
    1046        58843 :             standard_ExecutorRun(queryDesc, direction, count);
    1047              :     }
    1048         3498 :     PG_FINALLY();
    1049              :     {
    1050        58843 :         nesting_level--;
    1051              :     }
    1052        58843 :     PG_END_TRY();
    1053        55345 : }
    1054              : 
    1055              : /*
    1056              :  * ExecutorFinish hook: all we need do is track nesting depth
    1057              :  */
    1058              : static void
    1059        53283 : pgss_ExecutorFinish(QueryDesc *queryDesc)
    1060              : {
    1061        53283 :     nesting_level++;
    1062        53283 :     PG_TRY();
    1063              :     {
    1064        53283 :         if (prev_ExecutorFinish)
    1065            0 :             prev_ExecutorFinish(queryDesc);
    1066              :         else
    1067        53283 :             standard_ExecutorFinish(queryDesc);
    1068              :     }
    1069          167 :     PG_FINALLY();
    1070              :     {
    1071        53283 :         nesting_level--;
    1072              :     }
    1073        53283 :     PG_END_TRY();
    1074        53116 : }
    1075              : 
    1076              : /*
    1077              :  * ExecutorEnd hook: store results if needed
    1078              :  */
    1079              : static void
    1080        56175 : pgss_ExecutorEnd(QueryDesc *queryDesc)
    1081              : {
    1082        56175 :     int64       queryId = queryDesc->plannedstmt->queryId;
    1083              : 
    1084        56175 :     if (queryId != INT64CONST(0) && queryDesc->totaltime &&
    1085        39017 :         pgss_enabled(nesting_level))
    1086              :     {
    1087              :         /*
    1088              :          * Make sure stats accumulation is done.  (Note: it's okay if several
    1089              :          * levels of hook all do this.)
    1090              :          */
    1091        39017 :         InstrEndLoop(queryDesc->totaltime);
    1092              : 
    1093        38932 :         pgss_store(queryDesc->sourceText,
    1094              :                    queryId,
    1095        39017 :                    queryDesc->plannedstmt->stmt_location,
    1096        39017 :                    queryDesc->plannedstmt->stmt_len,
    1097              :                    PGSS_EXEC,
    1098        39017 :                    INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total),
    1099        39017 :                    queryDesc->estate->es_total_processed,
    1100        39017 :                    &queryDesc->totaltime->bufusage,
    1101        39017 :                    &queryDesc->totaltime->walusage,
    1102           85 :                    queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
    1103              :                    NULL,
    1104        39017 :                    queryDesc->estate->es_parallel_workers_to_launch,
    1105        39017 :                    queryDesc->estate->es_parallel_workers_launched,
    1106        39017 :                    queryDesc->plannedstmt->planOrigin);
    1107              :     }
    1108              : 
    1109        56175 :     if (prev_ExecutorEnd)
    1110            0 :         prev_ExecutorEnd(queryDesc);
    1111              :     else
    1112        56175 :         standard_ExecutorEnd(queryDesc);
    1113        56175 : }
    1114              : 
    1115              : /*
    1116              :  * ProcessUtility hook
    1117              :  */
    1118              : static void
    1119        36863 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
    1120              :                     bool readOnlyTree,
    1121              :                     ProcessUtilityContext context,
    1122              :                     ParamListInfo params, QueryEnvironment *queryEnv,
    1123              :                     DestReceiver *dest, QueryCompletion *qc)
    1124              : {
    1125        36863 :     Node       *parsetree = pstmt->utilityStmt;
    1126        36863 :     int64       saved_queryId = pstmt->queryId;
    1127        36863 :     int         saved_stmt_location = pstmt->stmt_location;
    1128        36863 :     int         saved_stmt_len = pstmt->stmt_len;
    1129        36863 :     bool        enabled = pgss_track_utility && pgss_enabled(nesting_level);
    1130              : 
    1131              :     /*
    1132              :      * Force utility statements to get queryId zero.  We do this even in cases
    1133              :      * where the statement contains an optimizable statement for which a
    1134              :      * queryId could be derived (such as EXPLAIN or DECLARE CURSOR).  For such
    1135              :      * cases, runtime control will first go through ProcessUtility and then
    1136              :      * the executor, and we don't want the executor hooks to do anything,
    1137              :      * since we are already measuring the statement's costs at the utility
    1138              :      * level.
    1139              :      *
    1140              :      * Note that this is only done if pg_stat_statements is enabled and
    1141              :      * configured to track utility statements, in the unlikely possibility
    1142              :      * that user configured another extension to handle utility statements
    1143              :      * only.
    1144              :      */
    1145        36863 :     if (enabled)
    1146        31259 :         pstmt->queryId = INT64CONST(0);
    1147              : 
    1148              :     /*
    1149              :      * If it's an EXECUTE statement, we don't track it and don't increment the
    1150              :      * nesting level.  This allows the cycles to be charged to the underlying
    1151              :      * PREPARE instead (by the Executor hooks), which is much more useful.
    1152              :      *
    1153              :      * We also don't track execution of PREPARE.  If we did, we would get one
    1154              :      * hash table entry for the PREPARE (with hash calculated from the query
    1155              :      * string), and then a different one with the same query string (but hash
    1156              :      * calculated from the query tree) would be used to accumulate costs of
    1157              :      * ensuing EXECUTEs.  This would be confusing.  Since PREPARE doesn't
    1158              :      * actually run the planner (only parse+rewrite), its costs are generally
    1159              :      * pretty negligible and it seems okay to just ignore it.
    1160              :      */
    1161        36863 :     if (enabled &&
    1162        31259 :         !IsA(parsetree, ExecuteStmt) &&
    1163        27925 :         !IsA(parsetree, PrepareStmt))
    1164        25142 :     {
    1165              :         instr_time  start;
    1166              :         instr_time  duration;
    1167              :         uint64      rows;
    1168              :         BufferUsage bufusage_start,
    1169              :                     bufusage;
    1170              :         WalUsage    walusage_start,
    1171              :                     walusage;
    1172              : 
    1173        27797 :         bufusage_start = pgBufferUsage;
    1174        27797 :         walusage_start = pgWalUsage;
    1175        27797 :         INSTR_TIME_SET_CURRENT(start);
    1176              : 
    1177        27797 :         nesting_level++;
    1178        27797 :         PG_TRY();
    1179              :         {
    1180        27797 :             if (prev_ProcessUtility)
    1181            0 :                 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1182              :                                     context, params, queryEnv,
    1183              :                                     dest, qc);
    1184              :             else
    1185        27797 :                 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1186              :                                         context, params, queryEnv,
    1187              :                                         dest, qc);
    1188              :         }
    1189         2655 :         PG_FINALLY();
    1190              :         {
    1191        27797 :             nesting_level--;
    1192              :         }
    1193        27797 :         PG_END_TRY();
    1194              : 
    1195              :         /*
    1196              :          * CAUTION: do not access the *pstmt data structure again below here.
    1197              :          * If it was a ROLLBACK or similar, that data structure may have been
    1198              :          * freed.  We must copy everything we still need into local variables,
    1199              :          * which we did above.
    1200              :          *
    1201              :          * For the same reason, we can't risk restoring pstmt->queryId to its
    1202              :          * former value, which'd otherwise be a good idea.
    1203              :          */
    1204              : 
    1205        25142 :         INSTR_TIME_SET_CURRENT(duration);
    1206        25142 :         INSTR_TIME_SUBTRACT(duration, start);
    1207              : 
    1208              :         /*
    1209              :          * Track the total number of rows retrieved or affected by the utility
    1210              :          * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
    1211              :          * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
    1212              :          */
    1213        25139 :         rows = (qc && (qc->commandTag == CMDTAG_COPY ||
    1214        23340 :                        qc->commandTag == CMDTAG_FETCH ||
    1215        23080 :                        qc->commandTag == CMDTAG_SELECT ||
    1216        22887 :                        qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
    1217        50281 :             qc->nprocessed : 0;
    1218              : 
    1219              :         /* calc differences of buffer counters. */
    1220        25142 :         memset(&bufusage, 0, sizeof(BufferUsage));
    1221        25142 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
    1222              : 
    1223              :         /* calc differences of WAL counters. */
    1224        25142 :         memset(&walusage, 0, sizeof(WalUsage));
    1225        25142 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
    1226              : 
    1227        25142 :         pgss_store(queryString,
    1228              :                    saved_queryId,
    1229              :                    saved_stmt_location,
    1230              :                    saved_stmt_len,
    1231              :                    PGSS_EXEC,
    1232        25142 :                    INSTR_TIME_GET_MILLISEC(duration),
    1233              :                    rows,
    1234              :                    &bufusage,
    1235              :                    &walusage,
    1236              :                    NULL,
    1237              :                    NULL,
    1238              :                    0,
    1239              :                    0,
    1240              :                    pstmt->planOrigin);
    1241              :     }
    1242              :     else
    1243              :     {
    1244              :         /*
    1245              :          * Even though we're not tracking execution time for this statement,
    1246              :          * we must still increment the nesting level, to ensure that functions
    1247              :          * evaluated within it are not seen as top-level calls.  But don't do
    1248              :          * so for EXECUTE; that way, when control reaches pgss_planner or
    1249              :          * pgss_ExecutorStart, we will treat the costs as top-level if
    1250              :          * appropriate.  Likewise, don't bump for PREPARE, so that parse
    1251              :          * analysis will treat the statement as top-level if appropriate.
    1252              :          *
    1253              :          * To be absolutely certain we don't mess up the nesting level,
    1254              :          * evaluate the bump_level condition just once.
    1255              :          */
    1256         9066 :         bool        bump_level =
    1257        14797 :             !IsA(parsetree, ExecuteStmt) &&
    1258         5731 :             !IsA(parsetree, PrepareStmt);
    1259              : 
    1260         9066 :         if (bump_level)
    1261         5602 :             nesting_level++;
    1262         9066 :         PG_TRY();
    1263              :         {
    1264         9066 :             if (prev_ProcessUtility)
    1265            0 :                 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1266              :                                     context, params, queryEnv,
    1267              :                                     dest, qc);
    1268              :             else
    1269         9066 :                 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1270              :                                         context, params, queryEnv,
    1271              :                                         dest, qc);
    1272              :         }
    1273          138 :         PG_FINALLY();
    1274              :         {
    1275         9066 :             if (bump_level)
    1276         5602 :                 nesting_level--;
    1277              :         }
    1278         9066 :         PG_END_TRY();
    1279              :     }
    1280        34070 : }
    1281              : 
    1282              : /*
    1283              :  * Store some statistics for a statement.
    1284              :  *
    1285              :  * If jstate is not NULL then we're trying to create an entry for which
    1286              :  * we have no statistics as yet; we just want to record the normalized
    1287              :  * query string.  total_time, rows, bufusage and walusage are ignored in this
    1288              :  * case.
    1289              :  *
    1290              :  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
    1291              :  * for the arrays in the Counters field.
    1292              :  */
    1293              : static void
    1294       102692 : pgss_store(const char *query, int64 queryId,
    1295              :            int query_location, int query_len,
    1296              :            pgssStoreKind kind,
    1297              :            double total_time, uint64 rows,
    1298              :            const BufferUsage *bufusage,
    1299              :            const WalUsage *walusage,
    1300              :            const struct JitInstrumentation *jitusage,
    1301              :            JumbleState *jstate,
    1302              :            int parallel_workers_to_launch,
    1303              :            int parallel_workers_launched,
    1304              :            PlannedStmtOrigin planOrigin)
    1305              : {
    1306              :     pgssHashKey key;
    1307              :     pgssEntry  *entry;
    1308       102692 :     char       *norm_query = NULL;
    1309       102692 :     int         encoding = GetDatabaseEncoding();
    1310              : 
    1311              :     Assert(query != NULL);
    1312              : 
    1313              :     /* Safety check... */
    1314       102692 :     if (!pgss || !pgss_hash)
    1315            0 :         return;
    1316              : 
    1317              :     /*
    1318              :      * Nothing to do if compute_query_id isn't enabled and no other module
    1319              :      * computed a query identifier.
    1320              :      */
    1321       102692 :     if (queryId == INT64CONST(0))
    1322            0 :         return;
    1323              : 
    1324              :     /*
    1325              :      * Confine our attention to the relevant part of the string, if the query
    1326              :      * is a portion of a multi-statement source string, and update query
    1327              :      * location and length if needed.
    1328              :      */
    1329       102692 :     query = CleanQuerytext(query, &query_location, &query_len);
    1330              : 
    1331              :     /* Set up key for hashtable search */
    1332              : 
    1333              :     /* clear padding */
    1334       102692 :     memset(&key, 0, sizeof(pgssHashKey));
    1335              : 
    1336       102692 :     key.userid = GetUserId();
    1337       102692 :     key.dbid = MyDatabaseId;
    1338       102692 :     key.queryid = queryId;
    1339       102692 :     key.toplevel = (nesting_level == 0);
    1340              : 
    1341              :     /* Lookup the hash table entry with shared lock. */
    1342       102692 :     LWLockAcquire(pgss->lock, LW_SHARED);
    1343              : 
    1344       102692 :     entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    1345              : 
    1346              :     /* Create new entry, if not present */
    1347       102692 :     if (!entry)
    1348              :     {
    1349              :         Size        query_offset;
    1350              :         int         gc_count;
    1351              :         bool        stored;
    1352              :         bool        do_gc;
    1353              : 
    1354              :         /*
    1355              :          * Create a new, normalized query string if caller asked.  We don't
    1356              :          * need to hold the lock while doing this work.  (Note: in any case,
    1357              :          * it's possible that someone else creates a duplicate hashtable entry
    1358              :          * in the interval where we don't hold the lock below.  That case is
    1359              :          * handled by entry_alloc.)
    1360              :          */
    1361        30282 :         if (jstate)
    1362              :         {
    1363        11209 :             LWLockRelease(pgss->lock);
    1364        11209 :             norm_query = generate_normalized_query(jstate, query,
    1365              :                                                    query_location,
    1366              :                                                    &query_len);
    1367        11209 :             LWLockAcquire(pgss->lock, LW_SHARED);
    1368              :         }
    1369              : 
    1370              :         /* Append new query text to file with only shared lock held */
    1371        30282 :         stored = qtext_store(norm_query ? norm_query : query, query_len,
    1372              :                              &query_offset, &gc_count);
    1373              : 
    1374              :         /*
    1375              :          * Determine whether we need to garbage collect external query texts
    1376              :          * while the shared lock is still held.  This micro-optimization
    1377              :          * avoids taking the time to decide this while holding exclusive lock.
    1378              :          */
    1379        30282 :         do_gc = need_gc_qtexts();
    1380              : 
    1381              :         /* Need exclusive lock to make a new hashtable entry - promote */
    1382        30282 :         LWLockRelease(pgss->lock);
    1383        30282 :         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
    1384              : 
    1385              :         /*
    1386              :          * A garbage collection may have occurred while we weren't holding the
    1387              :          * lock.  In the unlikely event that this happens, the query text we
    1388              :          * stored above will have been garbage collected, so write it again.
    1389              :          * This should be infrequent enough that doing it while holding
    1390              :          * exclusive lock isn't a performance problem.
    1391              :          */
    1392        30282 :         if (!stored || pgss->gc_count != gc_count)
    1393            0 :             stored = qtext_store(norm_query ? norm_query : query, query_len,
    1394              :                                  &query_offset, NULL);
    1395              : 
    1396              :         /* If we failed to write to the text file, give up */
    1397        30282 :         if (!stored)
    1398            0 :             goto done;
    1399              : 
    1400              :         /* OK to create a new hashtable entry */
    1401        30282 :         entry = entry_alloc(&key, query_offset, query_len, encoding,
    1402              :                             jstate != NULL);
    1403              : 
    1404              :         /* If needed, perform garbage collection while exclusive lock held */
    1405        30282 :         if (do_gc)
    1406            0 :             gc_qtexts();
    1407              :     }
    1408              : 
    1409              :     /* Increment the counts, except when jstate is not NULL */
    1410       102692 :     if (!jstate)
    1411              :     {
    1412              :         Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
    1413              : 
    1414              :         /*
    1415              :          * Grab the spinlock while updating the counters (see comment about
    1416              :          * locking rules at the head of the file)
    1417              :          */
    1418        64309 :         SpinLockAcquire(&entry->mutex);
    1419              : 
    1420              :         /* "Unstick" entry if it was previously sticky */
    1421        64309 :         if (IS_STICKY(entry->counters))
    1422        29486 :             entry->counters.usage = USAGE_INIT;
    1423              : 
    1424        64309 :         entry->counters.calls[kind] += 1;
    1425        64309 :         entry->counters.total_time[kind] += total_time;
    1426              : 
    1427        64309 :         if (entry->counters.calls[kind] == 1)
    1428              :         {
    1429        29580 :             entry->counters.min_time[kind] = total_time;
    1430        29580 :             entry->counters.max_time[kind] = total_time;
    1431        29580 :             entry->counters.mean_time[kind] = total_time;
    1432              :         }
    1433              :         else
    1434              :         {
    1435              :             /*
    1436              :              * Welford's method for accurately computing variance. See
    1437              :              * <http://www.johndcook.com/blog/standard_deviation/>
    1438              :              */
    1439        34729 :             double      old_mean = entry->counters.mean_time[kind];
    1440              : 
    1441        34729 :             entry->counters.mean_time[kind] +=
    1442        34729 :                 (total_time - old_mean) / entry->counters.calls[kind];
    1443        34729 :             entry->counters.sum_var_time[kind] +=
    1444        34729 :                 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
    1445              : 
    1446              :             /*
    1447              :              * Calculate min and max time. min = 0 and max = 0 means that the
    1448              :              * min/max statistics were reset
    1449              :              */
    1450        34729 :             if (entry->counters.min_time[kind] == 0
    1451            6 :                 && entry->counters.max_time[kind] == 0)
    1452              :             {
    1453            3 :                 entry->counters.min_time[kind] = total_time;
    1454            3 :                 entry->counters.max_time[kind] = total_time;
    1455              :             }
    1456              :             else
    1457              :             {
    1458        34726 :                 if (entry->counters.min_time[kind] > total_time)
    1459         6543 :                     entry->counters.min_time[kind] = total_time;
    1460        34726 :                 if (entry->counters.max_time[kind] < total_time)
    1461         3576 :                     entry->counters.max_time[kind] = total_time;
    1462              :             }
    1463              :         }
    1464        64309 :         entry->counters.rows += rows;
    1465        64309 :         entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
    1466        64309 :         entry->counters.shared_blks_read += bufusage->shared_blks_read;
    1467        64309 :         entry->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
    1468        64309 :         entry->counters.shared_blks_written += bufusage->shared_blks_written;
    1469        64309 :         entry->counters.local_blks_hit += bufusage->local_blks_hit;
    1470        64309 :         entry->counters.local_blks_read += bufusage->local_blks_read;
    1471        64309 :         entry->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
    1472        64309 :         entry->counters.local_blks_written += bufusage->local_blks_written;
    1473        64309 :         entry->counters.temp_blks_read += bufusage->temp_blks_read;
    1474        64309 :         entry->counters.temp_blks_written += bufusage->temp_blks_written;
    1475        64309 :         entry->counters.shared_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_read_time);
    1476        64309 :         entry->counters.shared_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_write_time);
    1477        64309 :         entry->counters.local_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_read_time);
    1478        64309 :         entry->counters.local_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_write_time);
    1479        64309 :         entry->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
    1480        64309 :         entry->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
    1481        64309 :         entry->counters.usage += USAGE_EXEC(total_time);
    1482        64309 :         entry->counters.wal_records += walusage->wal_records;
    1483        64309 :         entry->counters.wal_fpi += walusage->wal_fpi;
    1484        64309 :         entry->counters.wal_bytes += walusage->wal_bytes;
    1485        64309 :         entry->counters.wal_buffers_full += walusage->wal_buffers_full;
    1486        64309 :         if (jitusage)
    1487              :         {
    1488           85 :             entry->counters.jit_functions += jitusage->created_functions;
    1489           85 :             entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
    1490              : 
    1491           85 :             if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
    1492           83 :                 entry->counters.jit_deform_count++;
    1493           85 :             entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
    1494              : 
    1495           85 :             if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
    1496           33 :                 entry->counters.jit_inlining_count++;
    1497           85 :             entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
    1498              : 
    1499           85 :             if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
    1500           80 :                 entry->counters.jit_optimization_count++;
    1501           85 :             entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
    1502              : 
    1503           85 :             if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
    1504           80 :                 entry->counters.jit_emission_count++;
    1505           85 :             entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
    1506              :         }
    1507              : 
    1508              :         /* parallel worker counters */
    1509        64309 :         entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
    1510        64309 :         entry->counters.parallel_workers_launched += parallel_workers_launched;
    1511              : 
    1512              :         /* plan cache counters */
    1513        64309 :         if (planOrigin == PLAN_STMT_CACHE_GENERIC)
    1514         3125 :             entry->counters.generic_plan_calls++;
    1515        61184 :         else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
    1516          379 :             entry->counters.custom_plan_calls++;
    1517              : 
    1518        64309 :         SpinLockRelease(&entry->mutex);
    1519              :     }
    1520              : 
    1521        38383 : done:
    1522       102692 :     LWLockRelease(pgss->lock);
    1523              : 
    1524              :     /* We postpone this clean-up until we're out of the lock */
    1525       102692 :     if (norm_query)
    1526        11209 :         pfree(norm_query);
    1527              : }
    1528              : 
    1529              : /*
    1530              :  * Reset statement statistics corresponding to userid, dbid, and queryid.
    1531              :  */
    1532              : Datum
    1533            1 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
    1534              : {
    1535              :     Oid         userid;
    1536              :     Oid         dbid;
    1537              :     int64       queryid;
    1538              : 
    1539            1 :     userid = PG_GETARG_OID(0);
    1540            1 :     dbid = PG_GETARG_OID(1);
    1541            1 :     queryid = PG_GETARG_INT64(2);
    1542              : 
    1543            1 :     entry_reset(userid, dbid, queryid, false);
    1544              : 
    1545            1 :     PG_RETURN_VOID();
    1546              : }
    1547              : 
    1548              : Datum
    1549          119 : pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
    1550              : {
    1551              :     Oid         userid;
    1552              :     Oid         dbid;
    1553              :     int64       queryid;
    1554              :     bool        minmax_only;
    1555              : 
    1556          119 :     userid = PG_GETARG_OID(0);
    1557          119 :     dbid = PG_GETARG_OID(1);
    1558          119 :     queryid = PG_GETARG_INT64(2);
    1559          119 :     minmax_only = PG_GETARG_BOOL(3);
    1560              : 
    1561          119 :     PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
    1562              : }
    1563              : 
    1564              : /*
    1565              :  * Reset statement statistics.
    1566              :  */
    1567              : Datum
    1568            1 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
    1569              : {
    1570            1 :     entry_reset(0, 0, 0, false);
    1571              : 
    1572            1 :     PG_RETURN_VOID();
    1573              : }
    1574              : 
    1575              : /* Number of output arguments (columns) for various API versions */
    1576              : #define PG_STAT_STATEMENTS_COLS_V1_0    14
    1577              : #define PG_STAT_STATEMENTS_COLS_V1_1    18
    1578              : #define PG_STAT_STATEMENTS_COLS_V1_2    19
    1579              : #define PG_STAT_STATEMENTS_COLS_V1_3    23
    1580              : #define PG_STAT_STATEMENTS_COLS_V1_8    32
    1581              : #define PG_STAT_STATEMENTS_COLS_V1_9    33
    1582              : #define PG_STAT_STATEMENTS_COLS_V1_10   43
    1583              : #define PG_STAT_STATEMENTS_COLS_V1_11   49
    1584              : #define PG_STAT_STATEMENTS_COLS_V1_12   52
    1585              : #define PG_STAT_STATEMENTS_COLS_V1_13   54
    1586              : #define PG_STAT_STATEMENTS_COLS         54  /* maximum of above */
    1587              : 
    1588              : /*
    1589              :  * Retrieve statement statistics.
    1590              :  *
    1591              :  * The SQL API of this function has changed multiple times, and will likely
    1592              :  * do so again in future.  To support the case where a newer version of this
    1593              :  * loadable module is being used with an old SQL declaration of the function,
    1594              :  * we continue to support the older API versions.  For 1.2 and later, the
    1595              :  * expected API version is identified by embedding it in the C name of the
    1596              :  * function.  Unfortunately we weren't bright enough to do that for 1.1.
    1597              :  */
    1598              : Datum
    1599          129 : pg_stat_statements_1_13(PG_FUNCTION_ARGS)
    1600              : {
    1601          129 :     bool        showtext = PG_GETARG_BOOL(0);
    1602              : 
    1603          129 :     pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext);
    1604              : 
    1605          129 :     return (Datum) 0;
    1606              : }
    1607              : 
    1608              : Datum
    1609            1 : pg_stat_statements_1_12(PG_FUNCTION_ARGS)
    1610              : {
    1611            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1612              : 
    1613            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
    1614              : 
    1615            1 :     return (Datum) 0;
    1616              : }
    1617              : 
    1618              : Datum
    1619            1 : pg_stat_statements_1_11(PG_FUNCTION_ARGS)
    1620              : {
    1621            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1622              : 
    1623            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
    1624              : 
    1625            1 :     return (Datum) 0;
    1626              : }
    1627              : 
    1628              : Datum
    1629            1 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
    1630              : {
    1631            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1632              : 
    1633            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
    1634              : 
    1635            1 :     return (Datum) 0;
    1636              : }
    1637              : 
    1638              : Datum
    1639            1 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
    1640              : {
    1641            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1642              : 
    1643            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
    1644              : 
    1645            1 :     return (Datum) 0;
    1646              : }
    1647              : 
    1648              : Datum
    1649            1 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
    1650              : {
    1651            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1652              : 
    1653            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
    1654              : 
    1655            1 :     return (Datum) 0;
    1656              : }
    1657              : 
    1658              : Datum
    1659            1 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
    1660              : {
    1661            1 :     bool        showtext = PG_GETARG_BOOL(0);
    1662              : 
    1663            1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
    1664              : 
    1665            1 :     return (Datum) 0;
    1666              : }
    1667              : 
    1668              : Datum
    1669            0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
    1670              : {
    1671            0 :     bool        showtext = PG_GETARG_BOOL(0);
    1672              : 
    1673            0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
    1674              : 
    1675            0 :     return (Datum) 0;
    1676              : }
    1677              : 
    1678              : /*
    1679              :  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
    1680              :  * This can be removed someday, perhaps.
    1681              :  */
    1682              : Datum
    1683            0 : pg_stat_statements(PG_FUNCTION_ARGS)
    1684              : {
    1685              :     /* If it's really API 1.1, we'll figure that out below */
    1686            0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
    1687              : 
    1688            0 :     return (Datum) 0;
    1689              : }
    1690              : 
    1691              : /* Common code for all versions of pg_stat_statements() */
    1692              : static void
    1693          135 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
    1694              :                             pgssVersion api_version,
    1695              :                             bool showtext)
    1696              : {
    1697          135 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
    1698          135 :     Oid         userid = GetUserId();
    1699          135 :     bool        is_allowed_role = false;
    1700          135 :     char       *qbuffer = NULL;
    1701          135 :     Size        qbuffer_size = 0;
    1702          135 :     Size        extent = 0;
    1703          135 :     int         gc_count = 0;
    1704              :     HASH_SEQ_STATUS hash_seq;
    1705              :     pgssEntry  *entry;
    1706              : 
    1707              :     /*
    1708              :      * Superusers or roles with the privileges of pg_read_all_stats members
    1709              :      * are allowed
    1710              :      */
    1711          135 :     is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
    1712              : 
    1713              :     /* hash table must exist already */
    1714          135 :     if (!pgss || !pgss_hash)
    1715            0 :         ereport(ERROR,
    1716              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1717              :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    1718              : 
    1719          135 :     InitMaterializedSRF(fcinfo, 0);
    1720              : 
    1721              :     /*
    1722              :      * Check we have the expected number of output arguments.  Aside from
    1723              :      * being a good safety check, we need a kluge here to detect API version
    1724              :      * 1.1, which was wedged into the code in an ill-considered way.
    1725              :      */
    1726          135 :     switch (rsinfo->setDesc->natts)
    1727              :     {
    1728            0 :         case PG_STAT_STATEMENTS_COLS_V1_0:
    1729            0 :             if (api_version != PGSS_V1_0)
    1730            0 :                 elog(ERROR, "incorrect number of output arguments");
    1731            0 :             break;
    1732            0 :         case PG_STAT_STATEMENTS_COLS_V1_1:
    1733              :             /* pg_stat_statements() should have told us 1.0 */
    1734            0 :             if (api_version != PGSS_V1_0)
    1735            0 :                 elog(ERROR, "incorrect number of output arguments");
    1736            0 :             api_version = PGSS_V1_1;
    1737            0 :             break;
    1738            0 :         case PG_STAT_STATEMENTS_COLS_V1_2:
    1739            0 :             if (api_version != PGSS_V1_2)
    1740            0 :                 elog(ERROR, "incorrect number of output arguments");
    1741            0 :             break;
    1742            1 :         case PG_STAT_STATEMENTS_COLS_V1_3:
    1743            1 :             if (api_version != PGSS_V1_3)
    1744            0 :                 elog(ERROR, "incorrect number of output arguments");
    1745            1 :             break;
    1746            1 :         case PG_STAT_STATEMENTS_COLS_V1_8:
    1747            1 :             if (api_version != PGSS_V1_8)
    1748            0 :                 elog(ERROR, "incorrect number of output arguments");
    1749            1 :             break;
    1750            1 :         case PG_STAT_STATEMENTS_COLS_V1_9:
    1751            1 :             if (api_version != PGSS_V1_9)
    1752            0 :                 elog(ERROR, "incorrect number of output arguments");
    1753            1 :             break;
    1754            1 :         case PG_STAT_STATEMENTS_COLS_V1_10:
    1755            1 :             if (api_version != PGSS_V1_10)
    1756            0 :                 elog(ERROR, "incorrect number of output arguments");
    1757            1 :             break;
    1758            1 :         case PG_STAT_STATEMENTS_COLS_V1_11:
    1759            1 :             if (api_version != PGSS_V1_11)
    1760            0 :                 elog(ERROR, "incorrect number of output arguments");
    1761            1 :             break;
    1762            1 :         case PG_STAT_STATEMENTS_COLS_V1_12:
    1763            1 :             if (api_version != PGSS_V1_12)
    1764            0 :                 elog(ERROR, "incorrect number of output arguments");
    1765            1 :             break;
    1766          129 :         case PG_STAT_STATEMENTS_COLS_V1_13:
    1767          129 :             if (api_version != PGSS_V1_13)
    1768            0 :                 elog(ERROR, "incorrect number of output arguments");
    1769          129 :             break;
    1770            0 :         default:
    1771            0 :             elog(ERROR, "incorrect number of output arguments");
    1772              :     }
    1773              : 
    1774              :     /*
    1775              :      * We'd like to load the query text file (if needed) while not holding any
    1776              :      * lock on pgss->lock.  In the worst case we'll have to do this again
    1777              :      * after we have the lock, but it's unlikely enough to make this a win
    1778              :      * despite occasional duplicated work.  We need to reload if anybody
    1779              :      * writes to the file (either a retail qtext_store(), or a garbage
    1780              :      * collection) between this point and where we've gotten shared lock.  If
    1781              :      * a qtext_store is actually in progress when we look, we might as well
    1782              :      * skip the speculative load entirely.
    1783              :      */
    1784          135 :     if (showtext)
    1785              :     {
    1786              :         int         n_writers;
    1787              : 
    1788              :         /* Take the mutex so we can examine variables */
    1789          135 :         SpinLockAcquire(&pgss->mutex);
    1790          135 :         extent = pgss->extent;
    1791          135 :         n_writers = pgss->n_writers;
    1792          135 :         gc_count = pgss->gc_count;
    1793          135 :         SpinLockRelease(&pgss->mutex);
    1794              : 
    1795              :         /* No point in loading file now if there are active writers */
    1796          135 :         if (n_writers == 0)
    1797          135 :             qbuffer = qtext_load_file(&qbuffer_size);
    1798              :     }
    1799              : 
    1800              :     /*
    1801              :      * Get shared lock, load or reload the query text file if we must, and
    1802              :      * iterate over the hashtable entries.
    1803              :      *
    1804              :      * With a large hash table, we might be holding the lock rather longer
    1805              :      * than one could wish.  However, this only blocks creation of new hash
    1806              :      * table entries, and the larger the hash table the less likely that is to
    1807              :      * be needed.  So we can hope this is okay.  Perhaps someday we'll decide
    1808              :      * we need to partition the hash table to limit the time spent holding any
    1809              :      * one lock.
    1810              :      */
    1811          135 :     LWLockAcquire(pgss->lock, LW_SHARED);
    1812              : 
    1813          135 :     if (showtext)
    1814              :     {
    1815              :         /*
    1816              :          * Here it is safe to examine extent and gc_count without taking the
    1817              :          * mutex.  Note that although other processes might change
    1818              :          * pgss->extent just after we look at it, the strings they then write
    1819              :          * into the file cannot yet be referenced in the hashtable, so we
    1820              :          * don't care whether we see them or not.
    1821              :          *
    1822              :          * If qtext_load_file fails, we just press on; we'll return NULL for
    1823              :          * every query text.
    1824              :          */
    1825          135 :         if (qbuffer == NULL ||
    1826          135 :             pgss->extent != extent ||
    1827          135 :             pgss->gc_count != gc_count)
    1828              :         {
    1829            0 :             if (qbuffer)
    1830            0 :                 pfree(qbuffer);
    1831            0 :             qbuffer = qtext_load_file(&qbuffer_size);
    1832              :         }
    1833              :     }
    1834              : 
    1835          135 :     hash_seq_init(&hash_seq, pgss_hash);
    1836        28684 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    1837              :     {
    1838              :         Datum       values[PG_STAT_STATEMENTS_COLS];
    1839              :         bool        nulls[PG_STAT_STATEMENTS_COLS];
    1840        28549 :         int         i = 0;
    1841              :         Counters    tmp;
    1842              :         double      stddev;
    1843        28549 :         int64       queryid = entry->key.queryid;
    1844              :         TimestampTz stats_since;
    1845              :         TimestampTz minmax_stats_since;
    1846              : 
    1847        28549 :         memset(values, 0, sizeof(values));
    1848        28549 :         memset(nulls, 0, sizeof(nulls));
    1849              : 
    1850        28549 :         values[i++] = ObjectIdGetDatum(entry->key.userid);
    1851        28549 :         values[i++] = ObjectIdGetDatum(entry->key.dbid);
    1852        28549 :         if (api_version >= PGSS_V1_9)
    1853        28537 :             values[i++] = BoolGetDatum(entry->key.toplevel);
    1854              : 
    1855        28549 :         if (is_allowed_role || entry->key.userid == userid)
    1856              :         {
    1857        28545 :             if (api_version >= PGSS_V1_2)
    1858        28545 :                 values[i++] = Int64GetDatumFast(queryid);
    1859              : 
    1860        28545 :             if (showtext)
    1861              :             {
    1862        28545 :                 char       *qstr = qtext_fetch(entry->query_offset,
    1863              :                                                entry->query_len,
    1864              :                                                qbuffer,
    1865              :                                                qbuffer_size);
    1866              : 
    1867        28545 :                 if (qstr)
    1868              :                 {
    1869              :                     char       *enc;
    1870              : 
    1871        28545 :                     enc = pg_any_to_server(qstr,
    1872              :                                            entry->query_len,
    1873              :                                            entry->encoding);
    1874              : 
    1875        28545 :                     values[i++] = CStringGetTextDatum(enc);
    1876              : 
    1877        28545 :                     if (enc != qstr)
    1878            0 :                         pfree(enc);
    1879              :                 }
    1880              :                 else
    1881              :                 {
    1882              :                     /* Just return a null if we fail to find the text */
    1883            0 :                     nulls[i++] = true;
    1884              :                 }
    1885              :             }
    1886              :             else
    1887              :             {
    1888              :                 /* Query text not requested */
    1889            0 :                 nulls[i++] = true;
    1890              :             }
    1891              :         }
    1892              :         else
    1893              :         {
    1894              :             /* Don't show queryid */
    1895            4 :             if (api_version >= PGSS_V1_2)
    1896            4 :                 nulls[i++] = true;
    1897              : 
    1898              :             /*
    1899              :              * Don't show query text, but hint as to the reason for not doing
    1900              :              * so if it was requested
    1901              :              */
    1902            4 :             if (showtext)
    1903            4 :                 values[i++] = CStringGetTextDatum("<insufficient privilege>");
    1904              :             else
    1905            0 :                 nulls[i++] = true;
    1906              :         }
    1907              : 
    1908              :         /* copy counters to a local variable to keep locking time short */
    1909        28549 :         SpinLockAcquire(&entry->mutex);
    1910        28549 :         tmp = entry->counters;
    1911        28549 :         SpinLockRelease(&entry->mutex);
    1912              : 
    1913              :         /*
    1914              :          * The spinlock is not required when reading these two as they are
    1915              :          * always updated when holding pgss->lock exclusively.
    1916              :          */
    1917        28549 :         stats_since = entry->stats_since;
    1918        28549 :         minmax_stats_since = entry->minmax_stats_since;
    1919              : 
    1920              :         /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
    1921        28549 :         if (IS_STICKY(tmp))
    1922           45 :             continue;
    1923              : 
    1924              :         /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
    1925        85512 :         for (int kind = 0; kind < PGSS_NUMKIND; kind++)
    1926              :         {
    1927        57008 :             if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
    1928              :             {
    1929        57004 :                 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
    1930        57004 :                 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
    1931              :             }
    1932              : 
    1933        57008 :             if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
    1934              :                 api_version >= PGSS_V1_8)
    1935              :             {
    1936        57004 :                 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
    1937        57004 :                 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
    1938        57004 :                 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
    1939              : 
    1940              :                 /*
    1941              :                  * Note we are calculating the population variance here, not
    1942              :                  * the sample variance, as we have data for the whole
    1943              :                  * population, so Bessel's correction is not used, and we
    1944              :                  * don't divide by tmp.calls - 1.
    1945              :                  */
    1946        57004 :                 if (tmp.calls[kind] > 1)
    1947         5317 :                     stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
    1948              :                 else
    1949        51687 :                     stddev = 0.0;
    1950        57004 :                 values[i++] = Float8GetDatumFast(stddev);
    1951              :             }
    1952              :         }
    1953        28504 :         values[i++] = Int64GetDatumFast(tmp.rows);
    1954        28504 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
    1955        28504 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
    1956        28504 :         if (api_version >= PGSS_V1_1)
    1957        28504 :             values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
    1958        28504 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
    1959        28504 :         values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
    1960        28504 :         values[i++] = Int64GetDatumFast(tmp.local_blks_read);
    1961        28504 :         if (api_version >= PGSS_V1_1)
    1962        28504 :             values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
    1963        28504 :         values[i++] = Int64GetDatumFast(tmp.local_blks_written);
    1964        28504 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
    1965        28504 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
    1966        28504 :         if (api_version >= PGSS_V1_1)
    1967              :         {
    1968        28504 :             values[i++] = Float8GetDatumFast(tmp.shared_blk_read_time);
    1969        28504 :             values[i++] = Float8GetDatumFast(tmp.shared_blk_write_time);
    1970              :         }
    1971        28504 :         if (api_version >= PGSS_V1_11)
    1972              :         {
    1973        28476 :             values[i++] = Float8GetDatumFast(tmp.local_blk_read_time);
    1974        28476 :             values[i++] = Float8GetDatumFast(tmp.local_blk_write_time);
    1975              :         }
    1976        28504 :         if (api_version >= PGSS_V1_10)
    1977              :         {
    1978        28485 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
    1979        28485 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
    1980              :         }
    1981        28504 :         if (api_version >= PGSS_V1_8)
    1982              :         {
    1983              :             char        buf[256];
    1984              :             Datum       wal_bytes;
    1985              : 
    1986        28500 :             values[i++] = Int64GetDatumFast(tmp.wal_records);
    1987        28500 :             values[i++] = Int64GetDatumFast(tmp.wal_fpi);
    1988              : 
    1989        28500 :             snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
    1990              : 
    1991              :             /* Convert to numeric. */
    1992        28500 :             wal_bytes = DirectFunctionCall3(numeric_in,
    1993              :                                             CStringGetDatum(buf),
    1994              :                                             ObjectIdGetDatum(0),
    1995              :                                             Int32GetDatum(-1));
    1996        28500 :             values[i++] = wal_bytes;
    1997              :         }
    1998        28504 :         if (api_version >= PGSS_V1_12)
    1999              :         {
    2000        28466 :             values[i++] = Int64GetDatumFast(tmp.wal_buffers_full);
    2001              :         }
    2002        28504 :         if (api_version >= PGSS_V1_10)
    2003              :         {
    2004        28485 :             values[i++] = Int64GetDatumFast(tmp.jit_functions);
    2005        28485 :             values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
    2006        28485 :             values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
    2007        28485 :             values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
    2008        28485 :             values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
    2009        28485 :             values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
    2010        28485 :             values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
    2011        28485 :             values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
    2012              :         }
    2013        28504 :         if (api_version >= PGSS_V1_11)
    2014              :         {
    2015        28476 :             values[i++] = Int64GetDatumFast(tmp.jit_deform_count);
    2016        28476 :             values[i++] = Float8GetDatumFast(tmp.jit_deform_time);
    2017              :         }
    2018        28504 :         if (api_version >= PGSS_V1_12)
    2019              :         {
    2020        28466 :             values[i++] = Int64GetDatumFast(tmp.parallel_workers_to_launch);
    2021        28466 :             values[i++] = Int64GetDatumFast(tmp.parallel_workers_launched);
    2022              :         }
    2023        28504 :         if (api_version >= PGSS_V1_13)
    2024              :         {
    2025        28461 :             values[i++] = Int64GetDatumFast(tmp.generic_plan_calls);
    2026        28461 :             values[i++] = Int64GetDatumFast(tmp.custom_plan_calls);
    2027              :         }
    2028        28504 :         if (api_version >= PGSS_V1_11)
    2029              :         {
    2030        28476 :             values[i++] = TimestampTzGetDatum(stats_since);
    2031        28476 :             values[i++] = TimestampTzGetDatum(minmax_stats_since);
    2032              :         }
    2033              : 
    2034              :         Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
    2035              :                      api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
    2036              :                      api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
    2037              :                      api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
    2038              :                      api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
    2039              :                      api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
    2040              :                      api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
    2041              :                      api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
    2042              :                      api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
    2043              :                      api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
    2044              :                      -1 /* fail if you forget to update this assert */ ));
    2045              : 
    2046        28504 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
    2047              :     }
    2048              : 
    2049          135 :     LWLockRelease(pgss->lock);
    2050              : 
    2051          135 :     if (qbuffer)
    2052          135 :         pfree(qbuffer);
    2053          135 : }
    2054              : 
    2055              : /* Number of output arguments (columns) for pg_stat_statements_info */
    2056              : #define PG_STAT_STATEMENTS_INFO_COLS    2
    2057              : 
    2058              : /*
    2059              :  * Return statistics of pg_stat_statements.
    2060              :  */
    2061              : Datum
    2062            2 : pg_stat_statements_info(PG_FUNCTION_ARGS)
    2063              : {
    2064              :     pgssGlobalStats stats;
    2065              :     TupleDesc   tupdesc;
    2066            2 :     Datum       values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    2067            2 :     bool        nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    2068              : 
    2069            2 :     if (!pgss || !pgss_hash)
    2070            0 :         ereport(ERROR,
    2071              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    2072              :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    2073              : 
    2074              :     /* Build a tuple descriptor for our result type */
    2075            2 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    2076            0 :         elog(ERROR, "return type must be a row type");
    2077              : 
    2078              :     /* Read global statistics for pg_stat_statements */
    2079            2 :     SpinLockAcquire(&pgss->mutex);
    2080            2 :     stats = pgss->stats;
    2081            2 :     SpinLockRelease(&pgss->mutex);
    2082              : 
    2083            2 :     values[0] = Int64GetDatum(stats.dealloc);
    2084            2 :     values[1] = TimestampTzGetDatum(stats.stats_reset);
    2085              : 
    2086            2 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
    2087              : }
    2088              : 
    2089              : /*
    2090              :  * Estimate shared memory space needed.
    2091              :  */
    2092              : static Size
    2093            7 : pgss_memsize(void)
    2094              : {
    2095              :     Size        size;
    2096              : 
    2097            7 :     size = MAXALIGN(sizeof(pgssSharedState));
    2098            7 :     size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
    2099              : 
    2100            7 :     return size;
    2101              : }
    2102              : 
    2103              : /*
    2104              :  * Allocate a new hashtable entry.
    2105              :  * caller must hold an exclusive lock on pgss->lock
    2106              :  *
    2107              :  * "query" need not be null-terminated; we rely on query_len instead
    2108              :  *
    2109              :  * If "sticky" is true, make the new entry artificially sticky so that it will
    2110              :  * probably still be there when the query finishes execution.  We do this by
    2111              :  * giving it a median usage value rather than the normal value.  (Strictly
    2112              :  * speaking, query strings are normalized on a best effort basis, though it
    2113              :  * would be difficult to demonstrate this even under artificial conditions.)
    2114              :  *
    2115              :  * Note: despite needing exclusive lock, it's not an error for the target
    2116              :  * entry to already exist.  This is because pgss_store releases and
    2117              :  * reacquires lock after failing to find a match; so someone else could
    2118              :  * have made the entry while we waited to get exclusive lock.
    2119              :  */
    2120              : static pgssEntry *
    2121        57809 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
    2122              :             bool sticky)
    2123              : {
    2124              :     pgssEntry  *entry;
    2125              :     bool        found;
    2126              : 
    2127              :     /* Make space if needed */
    2128        57809 :     while (hash_get_num_entries(pgss_hash) >= pgss_max)
    2129            0 :         entry_dealloc();
    2130              : 
    2131              :     /* Find or create an entry with desired hash code */
    2132        57809 :     entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
    2133              : 
    2134        57809 :     if (!found)
    2135              :     {
    2136              :         /* New entry, initialize it */
    2137              : 
    2138              :         /* reset the statistics */
    2139        57809 :         memset(&entry->counters, 0, sizeof(Counters));
    2140              :         /* set the appropriate initial usage count */
    2141        57809 :         entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
    2142              :         /* re-initialize the mutex each time ... we assume no one using it */
    2143        57809 :         SpinLockInit(&entry->mutex);
    2144              :         /* ... and don't forget the query text metadata */
    2145              :         Assert(query_len >= 0);
    2146        57809 :         entry->query_offset = query_offset;
    2147        57809 :         entry->query_len = query_len;
    2148        57809 :         entry->encoding = encoding;
    2149        57809 :         entry->stats_since = GetCurrentTimestamp();
    2150        57809 :         entry->minmax_stats_since = entry->stats_since;
    2151              :     }
    2152              : 
    2153        57809 :     return entry;
    2154              : }
    2155              : 
    2156              : /*
    2157              :  * qsort comparator for sorting into increasing usage order
    2158              :  */
    2159              : static int
    2160            0 : entry_cmp(const void *lhs, const void *rhs)
    2161              : {
    2162            0 :     double      l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
    2163            0 :     double      r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
    2164              : 
    2165            0 :     if (l_usage < r_usage)
    2166            0 :         return -1;
    2167            0 :     else if (l_usage > r_usage)
    2168            0 :         return +1;
    2169              :     else
    2170            0 :         return 0;
    2171              : }
    2172              : 
    2173              : /*
    2174              :  * Deallocate least-used entries.
    2175              :  *
    2176              :  * Caller must hold an exclusive lock on pgss->lock.
    2177              :  */
    2178              : static void
    2179            0 : entry_dealloc(void)
    2180              : {
    2181              :     HASH_SEQ_STATUS hash_seq;
    2182              :     pgssEntry **entries;
    2183              :     pgssEntry  *entry;
    2184              :     int         nvictims;
    2185              :     int         i;
    2186              :     Size        tottextlen;
    2187              :     int         nvalidtexts;
    2188              : 
    2189              :     /*
    2190              :      * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
    2191              :      * While we're scanning the table, apply the decay factor to the usage
    2192              :      * values, and update the mean query length.
    2193              :      *
    2194              :      * Note that the mean query length is almost immediately obsolete, since
    2195              :      * we compute it before not after discarding the least-used entries.
    2196              :      * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
    2197              :      * making two passes to get a more current result.  Likewise, the new
    2198              :      * cur_median_usage includes the entries we're about to zap.
    2199              :      */
    2200              : 
    2201            0 :     entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
    2202              : 
    2203            0 :     i = 0;
    2204            0 :     tottextlen = 0;
    2205            0 :     nvalidtexts = 0;
    2206              : 
    2207            0 :     hash_seq_init(&hash_seq, pgss_hash);
    2208            0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2209              :     {
    2210            0 :         entries[i++] = entry;
    2211              :         /* "Sticky" entries get a different usage decay rate. */
    2212            0 :         if (IS_STICKY(entry->counters))
    2213            0 :             entry->counters.usage *= STICKY_DECREASE_FACTOR;
    2214              :         else
    2215            0 :             entry->counters.usage *= USAGE_DECREASE_FACTOR;
    2216              :         /* In the mean length computation, ignore dropped texts. */
    2217            0 :         if (entry->query_len >= 0)
    2218              :         {
    2219            0 :             tottextlen += entry->query_len + 1;
    2220            0 :             nvalidtexts++;
    2221              :         }
    2222              :     }
    2223              : 
    2224              :     /* Sort into increasing order by usage */
    2225            0 :     qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
    2226              : 
    2227              :     /* Record the (approximate) median usage */
    2228            0 :     if (i > 0)
    2229            0 :         pgss->cur_median_usage = entries[i / 2]->counters.usage;
    2230              :     /* Record the mean query length */
    2231            0 :     if (nvalidtexts > 0)
    2232            0 :         pgss->mean_query_len = tottextlen / nvalidtexts;
    2233              :     else
    2234            0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2235              : 
    2236              :     /* Now zap an appropriate fraction of lowest-usage entries */
    2237            0 :     nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
    2238            0 :     nvictims = Min(nvictims, i);
    2239              : 
    2240            0 :     for (i = 0; i < nvictims; i++)
    2241              :     {
    2242            0 :         hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
    2243              :     }
    2244              : 
    2245            0 :     pfree(entries);
    2246              : 
    2247              :     /* Increment the number of times entries are deallocated */
    2248            0 :     SpinLockAcquire(&pgss->mutex);
    2249            0 :     pgss->stats.dealloc += 1;
    2250            0 :     SpinLockRelease(&pgss->mutex);
    2251            0 : }
    2252              : 
    2253              : /*
    2254              :  * Given a query string (not necessarily null-terminated), allocate a new
    2255              :  * entry in the external query text file and store the string there.
    2256              :  *
    2257              :  * If successful, returns true, and stores the new entry's offset in the file
    2258              :  * into *query_offset.  Also, if gc_count isn't NULL, *gc_count is set to the
    2259              :  * number of garbage collections that have occurred so far.
    2260              :  *
    2261              :  * On failure, returns false.
    2262              :  *
    2263              :  * At least a shared lock on pgss->lock must be held by the caller, so as
    2264              :  * to prevent a concurrent garbage collection.  Share-lock-holding callers
    2265              :  * should pass a gc_count pointer to obtain the number of garbage collections,
    2266              :  * so that they can recheck the count after obtaining exclusive lock to
    2267              :  * detect whether a garbage collection occurred (and removed this entry).
    2268              :  */
    2269              : static bool
    2270        30282 : qtext_store(const char *query, int query_len,
    2271              :             Size *query_offset, int *gc_count)
    2272              : {
    2273              :     Size        off;
    2274              :     int         fd;
    2275              : 
    2276              :     /*
    2277              :      * We use a spinlock to protect extent/n_writers/gc_count, so that
    2278              :      * multiple processes may execute this function concurrently.
    2279              :      */
    2280        30282 :     SpinLockAcquire(&pgss->mutex);
    2281        30282 :     off = pgss->extent;
    2282        30282 :     pgss->extent += query_len + 1;
    2283        30282 :     pgss->n_writers++;
    2284        30282 :     if (gc_count)
    2285        30282 :         *gc_count = pgss->gc_count;
    2286        30282 :     SpinLockRelease(&pgss->mutex);
    2287              : 
    2288        30282 :     *query_offset = off;
    2289              : 
    2290              :     /*
    2291              :      * Don't allow the file to grow larger than what qtext_load_file can
    2292              :      * (theoretically) handle.  This has been seen to be reachable on 32-bit
    2293              :      * platforms.
    2294              :      */
    2295        30282 :     if (unlikely(query_len >= MaxAllocHugeSize - off))
    2296              :     {
    2297            0 :         errno = EFBIG;          /* not quite right, but it'll do */
    2298            0 :         fd = -1;
    2299            0 :         goto error;
    2300              :     }
    2301              : 
    2302              :     /* Now write the data into the successfully-reserved part of the file */
    2303        30282 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
    2304        30282 :     if (fd < 0)
    2305            0 :         goto error;
    2306              : 
    2307        30282 :     if (pg_pwrite(fd, query, query_len, off) != query_len)
    2308            0 :         goto error;
    2309        30282 :     if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
    2310            0 :         goto error;
    2311              : 
    2312        30282 :     CloseTransientFile(fd);
    2313              : 
    2314              :     /* Mark our write complete */
    2315        30282 :     SpinLockAcquire(&pgss->mutex);
    2316        30282 :     pgss->n_writers--;
    2317        30282 :     SpinLockRelease(&pgss->mutex);
    2318              : 
    2319        30282 :     return true;
    2320              : 
    2321            0 : error:
    2322            0 :     ereport(LOG,
    2323              :             (errcode_for_file_access(),
    2324              :              errmsg("could not write file \"%s\": %m",
    2325              :                     PGSS_TEXT_FILE)));
    2326              : 
    2327            0 :     if (fd >= 0)
    2328            0 :         CloseTransientFile(fd);
    2329              : 
    2330              :     /* Mark our write complete */
    2331            0 :     SpinLockAcquire(&pgss->mutex);
    2332            0 :     pgss->n_writers--;
    2333            0 :     SpinLockRelease(&pgss->mutex);
    2334              : 
    2335            0 :     return false;
    2336              : }
    2337              : 
    2338              : /*
    2339              :  * Read the external query text file into a palloc'd buffer.
    2340              :  *
    2341              :  * Returns NULL (without throwing an error) if unable to read, eg
    2342              :  * file not there or insufficient memory.
    2343              :  *
    2344              :  * On success, the buffer size is also returned into *buffer_size.
    2345              :  *
    2346              :  * This can be called without any lock on pgss->lock, but in that case
    2347              :  * the caller is responsible for verifying that the result is sane.
    2348              :  */
    2349              : static char *
    2350          140 : qtext_load_file(Size *buffer_size)
    2351              : {
    2352              :     char       *buf;
    2353              :     int         fd;
    2354              :     struct stat stat;
    2355              :     Size        nread;
    2356              : 
    2357          140 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
    2358          140 :     if (fd < 0)
    2359              :     {
    2360            0 :         if (errno != ENOENT)
    2361            0 :             ereport(LOG,
    2362              :                     (errcode_for_file_access(),
    2363              :                      errmsg("could not read file \"%s\": %m",
    2364              :                             PGSS_TEXT_FILE)));
    2365            0 :         return NULL;
    2366              :     }
    2367              : 
    2368              :     /* Get file length */
    2369          140 :     if (fstat(fd, &stat))
    2370              :     {
    2371            0 :         ereport(LOG,
    2372              :                 (errcode_for_file_access(),
    2373              :                  errmsg("could not stat file \"%s\": %m",
    2374              :                         PGSS_TEXT_FILE)));
    2375            0 :         CloseTransientFile(fd);
    2376            0 :         return NULL;
    2377              :     }
    2378              : 
    2379              :     /* Allocate buffer; beware that off_t might be wider than size_t */
    2380          140 :     if (stat.st_size <= MaxAllocHugeSize)
    2381          140 :         buf = (char *) palloc_extended(stat.st_size, MCXT_ALLOC_HUGE | MCXT_ALLOC_NO_OOM);
    2382              :     else
    2383            0 :         buf = NULL;
    2384          140 :     if (buf == NULL)
    2385              :     {
    2386            0 :         ereport(LOG,
    2387              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    2388              :                  errmsg("out of memory"),
    2389              :                  errdetail("Could not allocate enough memory to read file \"%s\".",
    2390              :                            PGSS_TEXT_FILE)));
    2391            0 :         CloseTransientFile(fd);
    2392            0 :         return NULL;
    2393              :     }
    2394              : 
    2395              :     /*
    2396              :      * OK, slurp in the file.  Windows fails if we try to read more than
    2397              :      * INT_MAX bytes at once, and other platforms might not like that either,
    2398              :      * so read a very large file in 1GB segments.
    2399              :      */
    2400          140 :     nread = 0;
    2401          279 :     while (nread < stat.st_size)
    2402              :     {
    2403          139 :         int         toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
    2404              : 
    2405              :         /*
    2406              :          * If we get a short read and errno doesn't get set, the reason is
    2407              :          * probably that garbage collection truncated the file since we did
    2408              :          * the fstat(), so we don't log a complaint --- but we don't return
    2409              :          * the data, either, since it's most likely corrupt due to concurrent
    2410              :          * writes from garbage collection.
    2411              :          */
    2412          139 :         errno = 0;
    2413          139 :         if (read(fd, buf + nread, toread) != toread)
    2414              :         {
    2415            0 :             if (errno)
    2416            0 :                 ereport(LOG,
    2417              :                         (errcode_for_file_access(),
    2418              :                          errmsg("could not read file \"%s\": %m",
    2419              :                                 PGSS_TEXT_FILE)));
    2420            0 :             pfree(buf);
    2421            0 :             CloseTransientFile(fd);
    2422            0 :             return NULL;
    2423              :         }
    2424          139 :         nread += toread;
    2425              :     }
    2426              : 
    2427          140 :     if (CloseTransientFile(fd) != 0)
    2428            0 :         ereport(LOG,
    2429              :                 (errcode_for_file_access(),
    2430              :                  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
    2431              : 
    2432          140 :     *buffer_size = nread;
    2433          140 :     return buf;
    2434              : }
    2435              : 
    2436              : /*
    2437              :  * Locate a query text in the file image previously read by qtext_load_file().
    2438              :  *
    2439              :  * We validate the given offset/length, and return NULL if bogus.  Otherwise,
    2440              :  * the result points to a null-terminated string within the buffer.
    2441              :  */
    2442              : static char *
    2443        85436 : qtext_fetch(Size query_offset, int query_len,
    2444              :             char *buffer, Size buffer_size)
    2445              : {
    2446              :     /* File read failed? */
    2447        85436 :     if (buffer == NULL)
    2448            0 :         return NULL;
    2449              :     /* Bogus offset/length? */
    2450        85436 :     if (query_len < 0 ||
    2451        85436 :         query_offset + query_len >= buffer_size)
    2452            0 :         return NULL;
    2453              :     /* As a further sanity check, make sure there's a trailing null */
    2454        85436 :     if (buffer[query_offset + query_len] != '\0')
    2455            0 :         return NULL;
    2456              :     /* Looks OK */
    2457        85436 :     return buffer + query_offset;
    2458              : }
    2459              : 
    2460              : /*
    2461              :  * Do we need to garbage-collect the external query text file?
    2462              :  *
    2463              :  * Caller should hold at least a shared lock on pgss->lock.
    2464              :  */
    2465              : static bool
    2466        30282 : need_gc_qtexts(void)
    2467              : {
    2468              :     Size        extent;
    2469              : 
    2470              :     /* Read shared extent pointer */
    2471        30282 :     SpinLockAcquire(&pgss->mutex);
    2472        30282 :     extent = pgss->extent;
    2473        30282 :     SpinLockRelease(&pgss->mutex);
    2474              : 
    2475              :     /*
    2476              :      * Don't proceed if file does not exceed 512 bytes per possible entry.
    2477              :      *
    2478              :      * Here and in the next test, 32-bit machines have overflow hazards if
    2479              :      * pgss_max and/or mean_query_len are large.  Force the multiplications
    2480              :      * and comparisons to be done in uint64 arithmetic to forestall trouble.
    2481              :      */
    2482        30282 :     if ((uint64) extent < (uint64) 512 * pgss_max)
    2483        30282 :         return false;
    2484              : 
    2485              :     /*
    2486              :      * Don't proceed if file is less than about 50% bloat.  Nothing can or
    2487              :      * should be done in the event of unusually large query texts accounting
    2488              :      * for file's large size.  We go to the trouble of maintaining the mean
    2489              :      * query length in order to prevent garbage collection from thrashing
    2490              :      * uselessly.
    2491              :      */
    2492            0 :     if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
    2493            0 :         return false;
    2494              : 
    2495            0 :     return true;
    2496              : }
    2497              : 
    2498              : /*
    2499              :  * Garbage-collect orphaned query texts in external file.
    2500              :  *
    2501              :  * This won't be called often in the typical case, since it's likely that
    2502              :  * there won't be too much churn, and besides, a similar compaction process
    2503              :  * occurs when serializing to disk at shutdown or as part of resetting.
    2504              :  * Despite this, it seems prudent to plan for the edge case where the file
    2505              :  * becomes unreasonably large, with no other method of compaction likely to
    2506              :  * occur in the foreseeable future.
    2507              :  *
    2508              :  * The caller must hold an exclusive lock on pgss->lock.
    2509              :  *
    2510              :  * At the first sign of trouble we unlink the query text file to get a clean
    2511              :  * slate (although existing statistics are retained), rather than risk
    2512              :  * thrashing by allowing the same problem case to recur indefinitely.
    2513              :  */
    2514              : static void
    2515            0 : gc_qtexts(void)
    2516              : {
    2517              :     char       *qbuffer;
    2518              :     Size        qbuffer_size;
    2519            0 :     FILE       *qfile = NULL;
    2520              :     HASH_SEQ_STATUS hash_seq;
    2521              :     pgssEntry  *entry;
    2522              :     Size        extent;
    2523              :     int         nentries;
    2524              : 
    2525              :     /*
    2526              :      * When called from pgss_store, some other session might have proceeded
    2527              :      * with garbage collection in the no-lock-held interim of lock strength
    2528              :      * escalation.  Check once more that this is actually necessary.
    2529              :      */
    2530            0 :     if (!need_gc_qtexts())
    2531            0 :         return;
    2532              : 
    2533              :     /*
    2534              :      * Load the old texts file.  If we fail (out of memory, for instance),
    2535              :      * invalidate query texts.  Hopefully this is rare.  It might seem better
    2536              :      * to leave things alone on an OOM failure, but the problem is that the
    2537              :      * file is only going to get bigger; hoping for a future non-OOM result is
    2538              :      * risky and can easily lead to complete denial of service.
    2539              :      */
    2540            0 :     qbuffer = qtext_load_file(&qbuffer_size);
    2541            0 :     if (qbuffer == NULL)
    2542            0 :         goto gc_fail;
    2543              : 
    2544              :     /*
    2545              :      * We overwrite the query texts file in place, so as to reduce the risk of
    2546              :      * an out-of-disk-space failure.  Since the file is guaranteed not to get
    2547              :      * larger, this should always work on traditional filesystems; though we
    2548              :      * could still lose on copy-on-write filesystems.
    2549              :      */
    2550            0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2551            0 :     if (qfile == NULL)
    2552              :     {
    2553            0 :         ereport(LOG,
    2554              :                 (errcode_for_file_access(),
    2555              :                  errmsg("could not write file \"%s\": %m",
    2556              :                         PGSS_TEXT_FILE)));
    2557            0 :         goto gc_fail;
    2558              :     }
    2559              : 
    2560            0 :     extent = 0;
    2561            0 :     nentries = 0;
    2562              : 
    2563            0 :     hash_seq_init(&hash_seq, pgss_hash);
    2564            0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2565              :     {
    2566            0 :         int         query_len = entry->query_len;
    2567            0 :         char       *qry = qtext_fetch(entry->query_offset,
    2568              :                                       query_len,
    2569              :                                       qbuffer,
    2570              :                                       qbuffer_size);
    2571              : 
    2572            0 :         if (qry == NULL)
    2573              :         {
    2574              :             /* Trouble ... drop the text */
    2575            0 :             entry->query_offset = 0;
    2576            0 :             entry->query_len = -1;
    2577              :             /* entry will not be counted in mean query length computation */
    2578            0 :             continue;
    2579              :         }
    2580              : 
    2581            0 :         if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
    2582              :         {
    2583            0 :             ereport(LOG,
    2584              :                     (errcode_for_file_access(),
    2585              :                      errmsg("could not write file \"%s\": %m",
    2586              :                             PGSS_TEXT_FILE)));
    2587            0 :             hash_seq_term(&hash_seq);
    2588            0 :             goto gc_fail;
    2589              :         }
    2590              : 
    2591            0 :         entry->query_offset = extent;
    2592            0 :         extent += query_len + 1;
    2593            0 :         nentries++;
    2594              :     }
    2595              : 
    2596              :     /*
    2597              :      * Truncate away any now-unused space.  If this fails for some odd reason,
    2598              :      * we log it, but there's no need to fail.
    2599              :      */
    2600            0 :     if (ftruncate(fileno(qfile), extent) != 0)
    2601            0 :         ereport(LOG,
    2602              :                 (errcode_for_file_access(),
    2603              :                  errmsg("could not truncate file \"%s\": %m",
    2604              :                         PGSS_TEXT_FILE)));
    2605              : 
    2606            0 :     if (FreeFile(qfile))
    2607              :     {
    2608            0 :         ereport(LOG,
    2609              :                 (errcode_for_file_access(),
    2610              :                  errmsg("could not write file \"%s\": %m",
    2611              :                         PGSS_TEXT_FILE)));
    2612            0 :         qfile = NULL;
    2613            0 :         goto gc_fail;
    2614              :     }
    2615              : 
    2616            0 :     elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
    2617              :          pgss->extent, extent);
    2618              : 
    2619              :     /* Reset the shared extent pointer */
    2620            0 :     pgss->extent = extent;
    2621              : 
    2622              :     /*
    2623              :      * Also update the mean query length, to be sure that need_gc_qtexts()
    2624              :      * won't still think we have a problem.
    2625              :      */
    2626            0 :     if (nentries > 0)
    2627            0 :         pgss->mean_query_len = extent / nentries;
    2628              :     else
    2629            0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2630              : 
    2631            0 :     pfree(qbuffer);
    2632              : 
    2633              :     /*
    2634              :      * OK, count a garbage collection cycle.  (Note: even though we have
    2635              :      * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
    2636              :      * other processes may examine gc_count while holding only the mutex.
    2637              :      * Also, we have to advance the count *after* we've rewritten the file,
    2638              :      * else other processes might not realize they read a stale file.)
    2639              :      */
    2640            0 :     record_gc_qtexts();
    2641              : 
    2642            0 :     return;
    2643              : 
    2644            0 : gc_fail:
    2645              :     /* clean up resources */
    2646            0 :     if (qfile)
    2647            0 :         FreeFile(qfile);
    2648            0 :     if (qbuffer)
    2649            0 :         pfree(qbuffer);
    2650              : 
    2651              :     /*
    2652              :      * Since the contents of the external file are now uncertain, mark all
    2653              :      * hashtable entries as having invalid texts.
    2654              :      */
    2655            0 :     hash_seq_init(&hash_seq, pgss_hash);
    2656            0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2657              :     {
    2658            0 :         entry->query_offset = 0;
    2659            0 :         entry->query_len = -1;
    2660              :     }
    2661              : 
    2662              :     /*
    2663              :      * Destroy the query text file and create a new, empty one
    2664              :      */
    2665            0 :     (void) unlink(PGSS_TEXT_FILE);
    2666            0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2667            0 :     if (qfile == NULL)
    2668            0 :         ereport(LOG,
    2669              :                 (errcode_for_file_access(),
    2670              :                  errmsg("could not recreate file \"%s\": %m",
    2671              :                         PGSS_TEXT_FILE)));
    2672              :     else
    2673            0 :         FreeFile(qfile);
    2674              : 
    2675              :     /* Reset the shared extent pointer */
    2676            0 :     pgss->extent = 0;
    2677              : 
    2678              :     /* Reset mean_query_len to match the new state */
    2679            0 :     pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2680              : 
    2681              :     /*
    2682              :      * Bump the GC count even though we failed.
    2683              :      *
    2684              :      * This is needed to make concurrent readers of file without any lock on
    2685              :      * pgss->lock notice existence of new version of file.  Once readers
    2686              :      * subsequently observe a change in GC count with pgss->lock held, that
    2687              :      * forces a safe reopen of file.  Writers also require that we bump here,
    2688              :      * of course.  (As required by locking protocol, readers and writers don't
    2689              :      * trust earlier file contents until gc_count is found unchanged after
    2690              :      * pgss->lock acquired in shared or exclusive mode respectively.)
    2691              :      */
    2692            0 :     record_gc_qtexts();
    2693              : }
    2694              : 
    2695              : #define SINGLE_ENTRY_RESET(e) \
    2696              : if (e) { \
    2697              :     if (minmax_only) { \
    2698              :         /* When requested reset only min/max statistics of an entry */ \
    2699              :         for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
    2700              :         { \
    2701              :             e->counters.max_time[kind] = 0; \
    2702              :             e->counters.min_time[kind] = 0; \
    2703              :         } \
    2704              :         e->minmax_stats_since = stats_reset; \
    2705              :     } \
    2706              :     else \
    2707              :     { \
    2708              :         /* Remove the key otherwise  */ \
    2709              :         hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
    2710              :         num_remove++; \
    2711              :     } \
    2712              : }
    2713              : 
    2714              : /*
    2715              :  * Reset entries corresponding to parameters passed.
    2716              :  */
    2717              : static TimestampTz
    2718          121 : entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
    2719              : {
    2720              :     HASH_SEQ_STATUS hash_seq;
    2721              :     pgssEntry  *entry;
    2722              :     FILE       *qfile;
    2723              :     int64       num_entries;
    2724          121 :     int64       num_remove = 0;
    2725              :     pgssHashKey key;
    2726              :     TimestampTz stats_reset;
    2727              : 
    2728          121 :     if (!pgss || !pgss_hash)
    2729            0 :         ereport(ERROR,
    2730              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    2731              :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    2732              : 
    2733          121 :     LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
    2734          121 :     num_entries = hash_get_num_entries(pgss_hash);
    2735              : 
    2736          121 :     stats_reset = GetCurrentTimestamp();
    2737              : 
    2738          121 :     if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
    2739              :     {
    2740              :         /* If all the parameters are available, use the fast path. */
    2741            1 :         memset(&key, 0, sizeof(pgssHashKey));
    2742            1 :         key.userid = userid;
    2743            1 :         key.dbid = dbid;
    2744            1 :         key.queryid = queryid;
    2745              : 
    2746              :         /*
    2747              :          * Reset the entry if it exists, starting with the non-top-level
    2748              :          * entry.
    2749              :          */
    2750            1 :         key.toplevel = false;
    2751            1 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    2752              : 
    2753            1 :         SINGLE_ENTRY_RESET(entry);
    2754              : 
    2755              :         /* Also reset the top-level entry if it exists. */
    2756            1 :         key.toplevel = true;
    2757            1 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    2758              : 
    2759            1 :         SINGLE_ENTRY_RESET(entry);
    2760              :     }
    2761          120 :     else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
    2762              :     {
    2763              :         /* Reset entries corresponding to valid parameters. */
    2764            4 :         hash_seq_init(&hash_seq, pgss_hash);
    2765           51 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2766              :         {
    2767           47 :             if ((!userid || entry->key.userid == userid) &&
    2768           36 :                 (!dbid || entry->key.dbid == dbid) &&
    2769           34 :                 (!queryid || entry->key.queryid == queryid))
    2770              :             {
    2771            7 :                 SINGLE_ENTRY_RESET(entry);
    2772              :             }
    2773              :         }
    2774              :     }
    2775              :     else
    2776              :     {
    2777              :         /* Reset all entries. */
    2778          116 :         hash_seq_init(&hash_seq, pgss_hash);
    2779         1151 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2780              :         {
    2781          941 :             SINGLE_ENTRY_RESET(entry);
    2782              :         }
    2783              :     }
    2784              : 
    2785              :     /* All entries are removed? */
    2786          121 :     if (num_entries != num_remove)
    2787            6 :         goto release_lock;
    2788              : 
    2789              :     /*
    2790              :      * Reset global statistics for pg_stat_statements since all entries are
    2791              :      * removed.
    2792              :      */
    2793          115 :     SpinLockAcquire(&pgss->mutex);
    2794          115 :     pgss->stats.dealloc = 0;
    2795          115 :     pgss->stats.stats_reset = stats_reset;
    2796          115 :     SpinLockRelease(&pgss->mutex);
    2797              : 
    2798              :     /*
    2799              :      * Write new empty query file, perhaps even creating a new one to recover
    2800              :      * if the file was missing.
    2801              :      */
    2802          115 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2803          115 :     if (qfile == NULL)
    2804              :     {
    2805            0 :         ereport(LOG,
    2806              :                 (errcode_for_file_access(),
    2807              :                  errmsg("could not create file \"%s\": %m",
    2808              :                         PGSS_TEXT_FILE)));
    2809            0 :         goto done;
    2810              :     }
    2811              : 
    2812              :     /* If ftruncate fails, log it, but it's not a fatal problem */
    2813          115 :     if (ftruncate(fileno(qfile), 0) != 0)
    2814            0 :         ereport(LOG,
    2815              :                 (errcode_for_file_access(),
    2816              :                  errmsg("could not truncate file \"%s\": %m",
    2817              :                         PGSS_TEXT_FILE)));
    2818              : 
    2819          115 :     FreeFile(qfile);
    2820              : 
    2821          115 : done:
    2822          115 :     pgss->extent = 0;
    2823              :     /* This counts as a query text garbage collection for our purposes */
    2824          115 :     record_gc_qtexts();
    2825              : 
    2826          121 : release_lock:
    2827          121 :     LWLockRelease(pgss->lock);
    2828              : 
    2829          121 :     return stats_reset;
    2830              : }
    2831              : 
    2832              : /*
    2833              :  * Generate a normalized version of the query string that will be used to
    2834              :  * represent all similar queries.
    2835              :  *
    2836              :  * Note that the normalized representation may well vary depending on
    2837              :  * just which "equivalent" query is used to create the hashtable entry.
    2838              :  * We assume this is OK.
    2839              :  *
    2840              :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2841              :  * the original string start, so we need to translate the provided locations
    2842              :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2843              :  * of interest, so it's worth doing.)
    2844              :  *
    2845              :  * *query_len_p contains the input string length, and is updated with
    2846              :  * the result string length on exit.  The resulting string might be longer
    2847              :  * or shorter depending on what happens with replacement of constants.
    2848              :  *
    2849              :  * Returns a palloc'd string.
    2850              :  */
    2851              : static char *
    2852        11209 : generate_normalized_query(JumbleState *jstate, const char *query,
    2853              :                           int query_loc, int *query_len_p)
    2854              : {
    2855              :     char       *norm_query;
    2856        11209 :     int         query_len = *query_len_p;
    2857              :     int         norm_query_buflen,  /* Space allowed for norm_query */
    2858              :                 len_to_wrt,     /* Length (in bytes) to write */
    2859        11209 :                 quer_loc = 0,   /* Source query byte location */
    2860        11209 :                 n_quer_loc = 0, /* Normalized query byte location */
    2861        11209 :                 last_off = 0,   /* Offset from start for previous tok */
    2862        11209 :                 last_tok_len = 0;   /* Length (in bytes) of that tok */
    2863        11209 :     int         num_constants_replaced = 0;
    2864              : 
    2865              :     /*
    2866              :      * Get constants' lengths (core system only gives us locations).  Note
    2867              :      * this also ensures the items are sorted by location.
    2868              :      */
    2869        11209 :     fill_in_constant_lengths(jstate, query, query_loc);
    2870              : 
    2871              :     /*
    2872              :      * Allow for $n symbols to be longer than the constants they replace.
    2873              :      * Constants must take at least one byte in text form, while a $n symbol
    2874              :      * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
    2875              :      * could refine that limit based on the max value of n for the current
    2876              :      * query, but it hardly seems worth any extra effort to do so.
    2877              :      */
    2878        11209 :     norm_query_buflen = query_len + jstate->clocations_count * 10;
    2879              : 
    2880              :     /* Allocate result buffer */
    2881        11209 :     norm_query = palloc(norm_query_buflen + 1);
    2882              : 
    2883        44747 :     for (int i = 0; i < jstate->clocations_count; i++)
    2884              :     {
    2885              :         int         off,        /* Offset from start for cur tok */
    2886              :                     tok_len;    /* Length (in bytes) of that tok */
    2887              : 
    2888              :         /*
    2889              :          * If we have an external param at this location, but no lists are
    2890              :          * being squashed across the query, then we skip here; this will make
    2891              :          * us print the characters found in the original query that represent
    2892              :          * the parameter in the next iteration (or after the loop is done),
    2893              :          * which is a bit odd but seems to work okay in most cases.
    2894              :          */
    2895        33538 :         if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
    2896          157 :             continue;
    2897              : 
    2898        33381 :         off = jstate->clocations[i].location;
    2899              : 
    2900              :         /* Adjust recorded location if we're dealing with partial string */
    2901        33381 :         off -= query_loc;
    2902              : 
    2903        33381 :         tok_len = jstate->clocations[i].length;
    2904              : 
    2905        33381 :         if (tok_len < 0)
    2906          253 :             continue;           /* ignore any duplicates */
    2907              : 
    2908              :         /* Copy next chunk (what precedes the next constant) */
    2909        33128 :         len_to_wrt = off - last_off;
    2910        33128 :         len_to_wrt -= last_tok_len;
    2911              :         Assert(len_to_wrt >= 0);
    2912        33128 :         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2913        33128 :         n_quer_loc += len_to_wrt;
    2914              : 
    2915              :         /*
    2916              :          * And insert a param symbol in place of the constant token; and, if
    2917              :          * we have a squashable list, insert a placeholder comment starting
    2918              :          * from the list's second value.
    2919              :          */
    2920        33128 :         n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
    2921        33128 :                               num_constants_replaced + 1 + jstate->highest_extern_param_id,
    2922        33128 :                               jstate->clocations[i].squashed ? " /*, ... */" : "");
    2923        33128 :         num_constants_replaced++;
    2924              : 
    2925              :         /* move forward */
    2926        33128 :         quer_loc = off + tok_len;
    2927        33128 :         last_off = off;
    2928        33128 :         last_tok_len = tok_len;
    2929              :     }
    2930              : 
    2931              :     /*
    2932              :      * We've copied up until the last ignorable constant.  Copy over the
    2933              :      * remaining bytes of the original query string.
    2934              :      */
    2935        11209 :     len_to_wrt = query_len - quer_loc;
    2936              : 
    2937              :     Assert(len_to_wrt >= 0);
    2938        11209 :     memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2939        11209 :     n_quer_loc += len_to_wrt;
    2940              : 
    2941              :     Assert(n_quer_loc <= norm_query_buflen);
    2942        11209 :     norm_query[n_quer_loc] = '\0';
    2943              : 
    2944        11209 :     *query_len_p = n_quer_loc;
    2945        11209 :     return norm_query;
    2946              : }
    2947              : 
    2948              : /*
    2949              :  * Given a valid SQL string and an array of constant-location records,
    2950              :  * fill in the textual lengths of those constants.
    2951              :  *
    2952              :  * The constants may use any allowed constant syntax, such as float literals,
    2953              :  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
    2954              :  * accomplished by using the public API for the core scanner.
    2955              :  *
    2956              :  * It is the caller's job to ensure that the string is a valid SQL statement
    2957              :  * with constants at the indicated locations.  Since in practice the string
    2958              :  * has already been parsed, and the locations that the caller provides will
    2959              :  * have originated from within the authoritative parser, this should not be
    2960              :  * a problem.
    2961              :  *
    2962              :  * Multiple constants can have the same location.  We reset lengths of those
    2963              :  * past the first to -1 so that they can later be ignored.
    2964              :  *
    2965              :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2966              :  * the original string start, so we need to translate the provided locations
    2967              :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2968              :  * of interest, so it's worth doing.)
    2969              :  *
    2970              :  * N.B. There is an assumption that a '-' character at a Const location begins
    2971              :  * a negative numeric constant.  This precludes there ever being another
    2972              :  * reason for a constant to start with a '-'.
    2973              :  */
    2974              : static void
    2975        11209 : fill_in_constant_lengths(JumbleState *jstate, const char *query,
    2976              :                          int query_loc)
    2977              : {
    2978              :     LocationLen *locs;
    2979              :     core_yyscan_t yyscanner;
    2980              :     core_yy_extra_type yyextra;
    2981              :     core_YYSTYPE yylval;
    2982              :     YYLTYPE     yylloc;
    2983              : 
    2984              :     /*
    2985              :      * Sort the records by location so that we can process them in order while
    2986              :      * scanning the query text.
    2987              :      */
    2988        11209 :     if (jstate->clocations_count > 1)
    2989         7092 :         qsort(jstate->clocations, jstate->clocations_count,
    2990              :               sizeof(LocationLen), comp_location);
    2991        11209 :     locs = jstate->clocations;
    2992              : 
    2993              :     /* initialize the flex scanner --- should match raw_parser() */
    2994        11209 :     yyscanner = scanner_init(query,
    2995              :                              &yyextra,
    2996              :                              &ScanKeywords,
    2997              :                              ScanKeywordTokens);
    2998              : 
    2999              :     /* Search for each constant, in sequence */
    3000        44747 :     for (int i = 0; i < jstate->clocations_count; i++)
    3001              :     {
    3002              :         int         loc;
    3003              :         int         tok;
    3004              : 
    3005              :         /* Ignore constants after the first one in the same location */
    3006        33538 :         if (i > 0 && locs[i].location == locs[i - 1].location)
    3007              :         {
    3008          255 :             locs[i].length = -1;
    3009          255 :             continue;
    3010              :         }
    3011              : 
    3012        33283 :         if (locs[i].squashed)
    3013          660 :             continue;           /* squashable list, ignore */
    3014              : 
    3015              :         /* Adjust recorded location if we're dealing with partial string */
    3016        32623 :         loc = locs[i].location - query_loc;
    3017              :         Assert(loc >= 0);
    3018              : 
    3019              :         /*
    3020              :          * We have a valid location for a constant that's not a dupe. Lex
    3021              :          * tokens until we find the desired constant.
    3022              :          */
    3023              :         for (;;)
    3024              :         {
    3025       251931 :             tok = core_yylex(&yylval, &yylloc, yyscanner);
    3026              : 
    3027              :             /* We should not hit end-of-string, but if we do, behave sanely */
    3028       251931 :             if (tok == 0)
    3029            0 :                 break;          /* out of inner for-loop */
    3030              : 
    3031              :             /*
    3032              :              * We should find the token position exactly, but if we somehow
    3033              :              * run past it, work with that.
    3034              :              */
    3035       251931 :             if (yylloc >= loc)
    3036              :             {
    3037        32623 :                 if (query[loc] == '-')
    3038              :                 {
    3039              :                     /*
    3040              :                      * It's a negative value - this is the one and only case
    3041              :                      * where we replace more than a single token.
    3042              :                      *
    3043              :                      * Do not compensate for the core system's special-case
    3044              :                      * adjustment of location to that of the leading '-'
    3045              :                      * operator in the event of a negative constant.  It is
    3046              :                      * also useful for our purposes to start from the minus
    3047              :                      * symbol.  In this way, queries like "select * from foo
    3048              :                      * where bar = 1" and "select * from foo where bar = -2"
    3049              :                      * will have identical normalized query strings.
    3050              :                      */
    3051          378 :                     tok = core_yylex(&yylval, &yylloc, yyscanner);
    3052          378 :                     if (tok == 0)
    3053            0 :                         break;  /* out of inner for-loop */
    3054              :                 }
    3055              : 
    3056              :                 /*
    3057              :                  * We now rely on the assumption that flex has placed a zero
    3058              :                  * byte after the text of the current token in scanbuf.
    3059              :                  */
    3060        32623 :                 locs[i].length = strlen(yyextra.scanbuf + loc);
    3061        32623 :                 break;          /* out of inner for-loop */
    3062              :             }
    3063              :         }
    3064              : 
    3065              :         /* If we hit end-of-string, give up, leaving remaining lengths -1 */
    3066        32623 :         if (tok == 0)
    3067            0 :             break;
    3068              :     }
    3069              : 
    3070        11209 :     scanner_finish(yyscanner);
    3071        11209 : }
    3072              : 
    3073              : /*
    3074              :  * comp_location: comparator for qsorting LocationLen structs by location
    3075              :  */
    3076              : static int
    3077        37598 : comp_location(const void *a, const void *b)
    3078              : {
    3079        37598 :     int         l = ((const LocationLen *) a)->location;
    3080        37598 :     int         r = ((const LocationLen *) b)->location;
    3081              : 
    3082        37598 :     return pg_cmp_s32(l, r);
    3083              : }
        

Generated by: LCOV version 2.0-1