LCOV - code coverage report
Current view: top level - contrib/pg_stat_statements - pg_stat_statements.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 743 983 75.6 %
Date: 2025-10-23 17:17:24 Functions: 46 53 86.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pg_stat_statements.c
       4             :  *      Track statement planning and execution times as well as resource
       5             :  *      usage across a whole database cluster.
       6             :  *
       7             :  * Execution costs are totaled for each distinct source query, and kept in
       8             :  * a shared hashtable.  (We track only as many distinct queries as will fit
       9             :  * in the designated amount of shared memory.)
      10             :  *
      11             :  * Starting in Postgres 9.2, this module normalized query entries.  As of
      12             :  * Postgres 14, the normalization is done by the core if compute_query_id is
      13             :  * enabled, or optionally by third-party modules.
      14             :  *
      15             :  * To facilitate presenting entries to users, we create "representative" query
      16             :  * strings in which constants are replaced with parameter symbols ($n), to
      17             :  * make it clearer what a normalized entry can represent.  To save on shared
      18             :  * memory, and to avoid having to truncate oversized query strings, we store
      19             :  * these strings in a temporary external query-texts file.  Offsets into this
      20             :  * file are kept in shared memory.
      21             :  *
      22             :  * Note about locking issues: to create or delete an entry in the shared
      23             :  * hashtable, one must hold pgss->lock exclusively.  Modifying any field
      24             :  * in an entry except the counters requires the same.  To look up an entry,
      25             :  * one must hold the lock shared.  To read or update the counters within
      26             :  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
      27             :  * disappear!) and also take the entry's mutex spinlock.
      28             :  * The shared state variable pgss->extent (the next free spot in the external
      29             :  * query-text file) should be accessed only while holding either the
      30             :  * pgss->mutex spinlock, or exclusive lock on pgss->lock.  We use the mutex to
      31             :  * allow reserving file space while holding only shared lock on pgss->lock.
      32             :  * Rewriting the entire external query-text file, eg for garbage collection,
      33             :  * requires holding pgss->lock exclusively; this allows individual entries
      34             :  * in the file to be read or written while holding only shared lock.
      35             :  *
      36             :  *
      37             :  * Copyright (c) 2008-2025, PostgreSQL Global Development Group
      38             :  *
      39             :  * IDENTIFICATION
      40             :  *    contrib/pg_stat_statements/pg_stat_statements.c
      41             :  *
      42             :  *-------------------------------------------------------------------------
      43             :  */
      44             : #include "postgres.h"
      45             : 
      46             : #include <math.h>
      47             : #include <sys/stat.h>
      48             : #include <unistd.h>
      49             : 
      50             : #include "access/htup_details.h"
      51             : #include "access/parallel.h"
      52             : #include "catalog/pg_authid.h"
      53             : #include "common/int.h"
      54             : #include "executor/instrument.h"
      55             : #include "funcapi.h"
      56             : #include "jit/jit.h"
      57             : #include "mb/pg_wchar.h"
      58             : #include "miscadmin.h"
      59             : #include "nodes/queryjumble.h"
      60             : #include "optimizer/planner.h"
      61             : #include "parser/analyze.h"
      62             : #include "parser/scanner.h"
      63             : #include "pgstat.h"
      64             : #include "storage/fd.h"
      65             : #include "storage/ipc.h"
      66             : #include "storage/lwlock.h"
      67             : #include "storage/shmem.h"
      68             : #include "storage/spin.h"
      69             : #include "tcop/utility.h"
      70             : #include "utils/acl.h"
      71             : #include "utils/builtins.h"
      72             : #include "utils/memutils.h"
      73             : #include "utils/timestamp.h"
      74             : 
      75          16 : PG_MODULE_MAGIC_EXT(
      76             :                     .name = "pg_stat_statements",
      77             :                     .version = PG_VERSION
      78             : );
      79             : 
      80             : /* Location of permanent stats file (valid when database is shut down) */
      81             : #define PGSS_DUMP_FILE  PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
      82             : 
      83             : /*
      84             :  * Location of external query text file.
      85             :  */
      86             : #define PGSS_TEXT_FILE  PG_STAT_TMP_DIR "/pgss_query_texts.stat"
      87             : 
      88             : /* Magic number identifying the stats file format */
      89             : static const uint32 PGSS_FILE_HEADER = 0x20250731;
      90             : 
      91             : /* PostgreSQL major version number, changes in which invalidate all entries */
      92             : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
      93             : 
      94             : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
      95             : #define USAGE_EXEC(duration)    (1.0)
      96             : #define USAGE_INIT              (1.0)   /* including initial planning */
      97             : #define ASSUMED_MEDIAN_INIT     (10.0)  /* initial assumed median usage */
      98             : #define ASSUMED_LENGTH_INIT     1024    /* initial assumed mean query length */
      99             : #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
     100             : #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
     101             : #define USAGE_DEALLOC_PERCENT   5   /* free this % of entries at once */
     102             : #define IS_STICKY(c)    ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
     103             : 
     104             : /*
     105             :  * Extension version number, for supporting older extension versions' objects
     106             :  */
     107             : typedef enum pgssVersion
     108             : {
     109             :     PGSS_V1_0 = 0,
     110             :     PGSS_V1_1,
     111             :     PGSS_V1_2,
     112             :     PGSS_V1_3,
     113             :     PGSS_V1_8,
     114             :     PGSS_V1_9,
     115             :     PGSS_V1_10,
     116             :     PGSS_V1_11,
     117             :     PGSS_V1_12,
     118             :     PGSS_V1_13,
     119             : } pgssVersion;
     120             : 
     121             : typedef enum pgssStoreKind
     122             : {
     123             :     PGSS_INVALID = -1,
     124             : 
     125             :     /*
     126             :      * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
     127             :      * reference the underlying values in the arrays in the Counters struct,
     128             :      * and this order is required in pg_stat_statements_internal().
     129             :      */
     130             :     PGSS_PLAN = 0,
     131             :     PGSS_EXEC,
     132             : } pgssStoreKind;
     133             : 
     134             : #define PGSS_NUMKIND (PGSS_EXEC + 1)
     135             : 
     136             : /*
     137             :  * Hashtable key that defines the identity of a hashtable entry.  We separate
     138             :  * queries by user and by database even if they are otherwise identical.
     139             :  *
     140             :  * If you add a new key to this struct, make sure to teach pgss_store() to
     141             :  * zero the padding bytes.  Otherwise, things will break, because pgss_hash is
     142             :  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
     143             :  */
     144             : typedef struct pgssHashKey
     145             : {
     146             :     Oid         userid;         /* user OID */
     147             :     Oid         dbid;           /* database OID */
     148             :     int64       queryid;        /* query identifier */
     149             :     bool        toplevel;       /* query executed at top level */
     150             : } pgssHashKey;
     151             : 
     152             : /*
     153             :  * The actual stats counters kept within pgssEntry.
     154             :  */
     155             : typedef struct Counters
     156             : {
     157             :     int64       calls[PGSS_NUMKIND];    /* # of times planned/executed */
     158             :     double      total_time[PGSS_NUMKIND];   /* total planning/execution time,
     159             :                                              * in msec */
     160             :     double      min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
     161             :                                          * msec since min/max reset */
     162             :     double      max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
     163             :                                          * msec since min/max reset */
     164             :     double      mean_time[PGSS_NUMKIND];    /* mean planning/execution time in
     165             :                                              * msec */
     166             :     double      sum_var_time[PGSS_NUMKIND]; /* sum of variances in
     167             :                                              * planning/execution time in msec */
     168             :     int64       rows;           /* total # of retrieved or affected rows */
     169             :     int64       shared_blks_hit;    /* # of shared buffer hits */
     170             :     int64       shared_blks_read;   /* # of shared disk blocks read */
     171             :     int64       shared_blks_dirtied;    /* # of shared disk blocks dirtied */
     172             :     int64       shared_blks_written;    /* # of shared disk blocks written */
     173             :     int64       local_blks_hit; /* # of local buffer hits */
     174             :     int64       local_blks_read;    /* # of local disk blocks read */
     175             :     int64       local_blks_dirtied; /* # of local disk blocks dirtied */
     176             :     int64       local_blks_written; /* # of local disk blocks written */
     177             :     int64       temp_blks_read; /* # of temp blocks read */
     178             :     int64       temp_blks_written;  /* # of temp blocks written */
     179             :     double      shared_blk_read_time;   /* time spent reading shared blocks,
     180             :                                          * in msec */
     181             :     double      shared_blk_write_time;  /* time spent writing shared blocks,
     182             :                                          * in msec */
     183             :     double      local_blk_read_time;    /* time spent reading local blocks, in
     184             :                                          * msec */
     185             :     double      local_blk_write_time;   /* time spent writing local blocks, in
     186             :                                          * msec */
     187             :     double      temp_blk_read_time; /* time spent reading temp blocks, in msec */
     188             :     double      temp_blk_write_time;    /* time spent writing temp blocks, in
     189             :                                          * msec */
     190             :     double      usage;          /* usage factor */
     191             :     int64       wal_records;    /* # of WAL records generated */
     192             :     int64       wal_fpi;        /* # of WAL full page images generated */
     193             :     uint64      wal_bytes;      /* total amount of WAL generated in bytes */
     194             :     int64       wal_buffers_full;   /* # of times the WAL buffers became full */
     195             :     int64       jit_functions;  /* total number of JIT functions emitted */
     196             :     double      jit_generation_time;    /* total time to generate jit code */
     197             :     int64       jit_inlining_count; /* number of times inlining time has been
     198             :                                      * > 0 */
     199             :     double      jit_deform_time;    /* total time to deform tuples in jit code */
     200             :     int64       jit_deform_count;   /* number of times deform time has been >
     201             :                                      * 0 */
     202             : 
     203             :     double      jit_inlining_time;  /* total time to inline jit code */
     204             :     int64       jit_optimization_count; /* number of times optimization time
     205             :                                          * has been > 0 */
     206             :     double      jit_optimization_time;  /* total time to optimize jit code */
     207             :     int64       jit_emission_count; /* number of times emission time has been
     208             :                                      * > 0 */
     209             :     double      jit_emission_time;  /* total time to emit jit code */
     210             :     int64       parallel_workers_to_launch; /* # of parallel workers planned
     211             :                                              * to be launched */
     212             :     int64       parallel_workers_launched;  /* # of parallel workers actually
     213             :                                              * launched */
     214             :     int64       generic_plan_calls; /* number of calls using a generic plan */
     215             :     int64       custom_plan_calls;  /* number of calls using a custom plan */
     216             : } Counters;
     217             : 
     218             : /*
     219             :  * Global statistics for pg_stat_statements
     220             :  */
     221             : typedef struct pgssGlobalStats
     222             : {
     223             :     int64       dealloc;        /* # of times entries were deallocated */
     224             :     TimestampTz stats_reset;    /* timestamp with all stats reset */
     225             : } pgssGlobalStats;
     226             : 
     227             : /*
     228             :  * Statistics per statement
     229             :  *
     230             :  * Note: in event of a failure in garbage collection of the query text file,
     231             :  * we reset query_offset to zero and query_len to -1.  This will be seen as
     232             :  * an invalid state by qtext_fetch().
     233             :  */
     234             : typedef struct pgssEntry
     235             : {
     236             :     pgssHashKey key;            /* hash key of entry - MUST BE FIRST */
     237             :     Counters    counters;       /* the statistics for this query */
     238             :     Size        query_offset;   /* query text offset in external file */
     239             :     int         query_len;      /* # of valid bytes in query string, or -1 */
     240             :     int         encoding;       /* query text encoding */
     241             :     TimestampTz stats_since;    /* timestamp of entry allocation */
     242             :     TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
     243             :     slock_t     mutex;          /* protects the counters only */
     244             : } pgssEntry;
     245             : 
     246             : /*
     247             :  * Global shared state
     248             :  */
     249             : typedef struct pgssSharedState
     250             : {
     251             :     LWLock     *lock;           /* protects hashtable search/modification */
     252             :     double      cur_median_usage;   /* current median usage in hashtable */
     253             :     Size        mean_query_len; /* current mean entry text length */
     254             :     slock_t     mutex;          /* protects following fields only: */
     255             :     Size        extent;         /* current extent of query file */
     256             :     int         n_writers;      /* number of active writers to query file */
     257             :     int         gc_count;       /* query file garbage collection cycle count */
     258             :     pgssGlobalStats stats;      /* global statistics for pgss */
     259             : } pgssSharedState;
     260             : 
     261             : /*---- Local variables ----*/
     262             : 
     263             : /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
     264             : static int  nesting_level = 0;
     265             : 
     266             : /* Saved hook values */
     267             : static shmem_request_hook_type prev_shmem_request_hook = NULL;
     268             : static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
     269             : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
     270             : static planner_hook_type prev_planner_hook = NULL;
     271             : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
     272             : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
     273             : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
     274             : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
     275             : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
     276             : 
     277             : /* Links to shared memory state */
     278             : static pgssSharedState *pgss = NULL;
     279             : static HTAB *pgss_hash = NULL;
     280             : 
     281             : /*---- GUC variables ----*/
     282             : 
     283             : typedef enum
     284             : {
     285             :     PGSS_TRACK_NONE,            /* track no statements */
     286             :     PGSS_TRACK_TOP,             /* only top level statements */
     287             :     PGSS_TRACK_ALL,             /* all statements, including nested ones */
     288             : }           PGSSTrackLevel;
     289             : 
     290             : static const struct config_enum_entry track_options[] =
     291             : {
     292             :     {"none", PGSS_TRACK_NONE, false},
     293             :     {"top", PGSS_TRACK_TOP, false},
     294             :     {"all", PGSS_TRACK_ALL, false},
     295             :     {NULL, 0, false}
     296             : };
     297             : 
     298             : static int  pgss_max = 5000;    /* max # statements to track */
     299             : static int  pgss_track = PGSS_TRACK_TOP;    /* tracking level */
     300             : static bool pgss_track_utility = true;  /* whether to track utility commands */
     301             : static bool pgss_track_planning = false;    /* whether to track planning
     302             :                                              * duration */
     303             : static bool pgss_save = true;   /* whether to save stats across shutdown */
     304             : 
     305             : #define pgss_enabled(level) \
     306             :     (!IsParallelWorker() && \
     307             :     (pgss_track == PGSS_TRACK_ALL || \
     308             :     (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
     309             : 
     310             : #define record_gc_qtexts() \
     311             :     do { \
     312             :         SpinLockAcquire(&pgss->mutex); \
     313             :         pgss->gc_count++; \
     314             :         SpinLockRelease(&pgss->mutex); \
     315             :     } while(0)
     316             : 
     317             : /*---- Function declarations ----*/
     318             : 
     319          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
     320          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
     321          42 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_11);
     322           0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
     323          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
     324          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
     325          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
     326          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
     327          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_11);
     328          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_12);
     329          50 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_13);
     330           0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
     331          16 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
     332             : 
     333             : static void pgss_shmem_request(void);
     334             : static void pgss_shmem_startup(void);
     335             : static void pgss_shmem_shutdown(int code, Datum arg);
     336             : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
     337             :                                     JumbleState *jstate);
     338             : static PlannedStmt *pgss_planner(Query *parse,
     339             :                                  const char *query_string,
     340             :                                  int cursorOptions,
     341             :                                  ParamListInfo boundParams,
     342             :                                  ExplainState *es);
     343             : static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
     344             : static void pgss_ExecutorRun(QueryDesc *queryDesc,
     345             :                              ScanDirection direction,
     346             :                              uint64 count);
     347             : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
     348             : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
     349             : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
     350             :                                 bool readOnlyTree,
     351             :                                 ProcessUtilityContext context, ParamListInfo params,
     352             :                                 QueryEnvironment *queryEnv,
     353             :                                 DestReceiver *dest, QueryCompletion *qc);
     354             : static void pgss_store(const char *query, int64 queryId,
     355             :                        int query_location, int query_len,
     356             :                        pgssStoreKind kind,
     357             :                        double total_time, uint64 rows,
     358             :                        const BufferUsage *bufusage,
     359             :                        const WalUsage *walusage,
     360             :                        const struct JitInstrumentation *jitusage,
     361             :                        JumbleState *jstate,
     362             :                        int parallel_workers_to_launch,
     363             :                        int parallel_workers_launched,
     364             :                        PlannedStmtOrigin planOrigin);
     365             : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
     366             :                                         pgssVersion api_version,
     367             :                                         bool showtext);
     368             : static Size pgss_memsize(void);
     369             : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
     370             :                               int encoding, bool sticky);
     371             : static void entry_dealloc(void);
     372             : static bool qtext_store(const char *query, int query_len,
     373             :                         Size *query_offset, int *gc_count);
     374             : static char *qtext_load_file(Size *buffer_size);
     375             : static char *qtext_fetch(Size query_offset, int query_len,
     376             :                          char *buffer, Size buffer_size);
     377             : static bool need_gc_qtexts(void);
     378             : static void gc_qtexts(void);
     379             : static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
     380             : static char *generate_normalized_query(JumbleState *jstate, const char *query,
     381             :                                        int query_loc, int *query_len_p);
     382             : static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
     383             :                                      int query_loc);
     384             : static int  comp_location(const void *a, const void *b);
     385             : 
     386             : 
     387             : /*
     388             :  * Module load callback
     389             :  */
     390             : void
     391          16 : _PG_init(void)
     392             : {
     393             :     /*
     394             :      * In order to create our shared memory area, we have to be loaded via
     395             :      * shared_preload_libraries.  If not, fall out without hooking into any of
     396             :      * the main system.  (We don't throw error here because it seems useful to
     397             :      * allow the pg_stat_statements functions to be created even when the
     398             :      * module isn't active.  The functions must protect themselves against
     399             :      * being called then, however.)
     400             :      */
     401          16 :     if (!process_shared_preload_libraries_in_progress)
     402           2 :         return;
     403             : 
     404             :     /*
     405             :      * Inform the postmaster that we want to enable query_id calculation if
     406             :      * compute_query_id is set to auto.
     407             :      */
     408          14 :     EnableQueryId();
     409             : 
     410             :     /*
     411             :      * Define (or redefine) custom GUC variables.
     412             :      */
     413          14 :     DefineCustomIntVariable("pg_stat_statements.max",
     414             :                             "Sets the maximum number of statements tracked by pg_stat_statements.",
     415             :                             NULL,
     416             :                             &pgss_max,
     417             :                             5000,
     418             :                             100,
     419             :                             INT_MAX / 2,
     420             :                             PGC_POSTMASTER,
     421             :                             0,
     422             :                             NULL,
     423             :                             NULL,
     424             :                             NULL);
     425             : 
     426          14 :     DefineCustomEnumVariable("pg_stat_statements.track",
     427             :                              "Selects which statements are tracked by pg_stat_statements.",
     428             :                              NULL,
     429             :                              &pgss_track,
     430             :                              PGSS_TRACK_TOP,
     431             :                              track_options,
     432             :                              PGC_SUSET,
     433             :                              0,
     434             :                              NULL,
     435             :                              NULL,
     436             :                              NULL);
     437             : 
     438          14 :     DefineCustomBoolVariable("pg_stat_statements.track_utility",
     439             :                              "Selects whether utility commands are tracked by pg_stat_statements.",
     440             :                              NULL,
     441             :                              &pgss_track_utility,
     442             :                              true,
     443             :                              PGC_SUSET,
     444             :                              0,
     445             :                              NULL,
     446             :                              NULL,
     447             :                              NULL);
     448             : 
     449          14 :     DefineCustomBoolVariable("pg_stat_statements.track_planning",
     450             :                              "Selects whether planning duration is tracked by pg_stat_statements.",
     451             :                              NULL,
     452             :                              &pgss_track_planning,
     453             :                              false,
     454             :                              PGC_SUSET,
     455             :                              0,
     456             :                              NULL,
     457             :                              NULL,
     458             :                              NULL);
     459             : 
     460          14 :     DefineCustomBoolVariable("pg_stat_statements.save",
     461             :                              "Save pg_stat_statements statistics across server shutdowns.",
     462             :                              NULL,
     463             :                              &pgss_save,
     464             :                              true,
     465             :                              PGC_SIGHUP,
     466             :                              0,
     467             :                              NULL,
     468             :                              NULL,
     469             :                              NULL);
     470             : 
     471          14 :     MarkGUCPrefixReserved("pg_stat_statements");
     472             : 
     473             :     /*
     474             :      * Install hooks.
     475             :      */
     476          14 :     prev_shmem_request_hook = shmem_request_hook;
     477          14 :     shmem_request_hook = pgss_shmem_request;
     478          14 :     prev_shmem_startup_hook = shmem_startup_hook;
     479          14 :     shmem_startup_hook = pgss_shmem_startup;
     480          14 :     prev_post_parse_analyze_hook = post_parse_analyze_hook;
     481          14 :     post_parse_analyze_hook = pgss_post_parse_analyze;
     482          14 :     prev_planner_hook = planner_hook;
     483          14 :     planner_hook = pgss_planner;
     484          14 :     prev_ExecutorStart = ExecutorStart_hook;
     485          14 :     ExecutorStart_hook = pgss_ExecutorStart;
     486          14 :     prev_ExecutorRun = ExecutorRun_hook;
     487          14 :     ExecutorRun_hook = pgss_ExecutorRun;
     488          14 :     prev_ExecutorFinish = ExecutorFinish_hook;
     489          14 :     ExecutorFinish_hook = pgss_ExecutorFinish;
     490          14 :     prev_ExecutorEnd = ExecutorEnd_hook;
     491          14 :     ExecutorEnd_hook = pgss_ExecutorEnd;
     492          14 :     prev_ProcessUtility = ProcessUtility_hook;
     493          14 :     ProcessUtility_hook = pgss_ProcessUtility;
     494             : }
     495             : 
     496             : /*
     497             :  * shmem_request hook: request additional shared resources.  We'll allocate or
     498             :  * attach to the shared resources in pgss_shmem_startup().
     499             :  */
     500             : static void
     501          14 : pgss_shmem_request(void)
     502             : {
     503          14 :     if (prev_shmem_request_hook)
     504           0 :         prev_shmem_request_hook();
     505             : 
     506          14 :     RequestAddinShmemSpace(pgss_memsize());
     507          14 :     RequestNamedLWLockTranche("pg_stat_statements", 1);
     508          14 : }
     509             : 
     510             : /*
     511             :  * shmem_startup hook: allocate or attach to shared memory,
     512             :  * then load any pre-existing statistics from file.
     513             :  * Also create and load the query-texts file, which is expected to exist
     514             :  * (even if empty) while the module is enabled.
     515             :  */
     516             : static void
     517          14 : pgss_shmem_startup(void)
     518             : {
     519             :     bool        found;
     520             :     HASHCTL     info;
     521          14 :     FILE       *file = NULL;
     522          14 :     FILE       *qfile = NULL;
     523             :     uint32      header;
     524             :     int32       num;
     525             :     int32       pgver;
     526             :     int32       i;
     527             :     int         buffer_size;
     528          14 :     char       *buffer = NULL;
     529             : 
     530          14 :     if (prev_shmem_startup_hook)
     531           0 :         prev_shmem_startup_hook();
     532             : 
     533             :     /* reset in case this is a restart within the postmaster */
     534          14 :     pgss = NULL;
     535          14 :     pgss_hash = NULL;
     536             : 
     537             :     /*
     538             :      * Create or attach to the shared memory state, including hash table
     539             :      */
     540          14 :     LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
     541             : 
     542          14 :     pgss = ShmemInitStruct("pg_stat_statements",
     543             :                            sizeof(pgssSharedState),
     544             :                            &found);
     545             : 
     546          14 :     if (!found)
     547             :     {
     548             :         /* First time through ... */
     549          14 :         pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
     550          14 :         pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
     551          14 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
     552          14 :         SpinLockInit(&pgss->mutex);
     553          14 :         pgss->extent = 0;
     554          14 :         pgss->n_writers = 0;
     555          14 :         pgss->gc_count = 0;
     556          14 :         pgss->stats.dealloc = 0;
     557          14 :         pgss->stats.stats_reset = GetCurrentTimestamp();
     558             :     }
     559             : 
     560          14 :     info.keysize = sizeof(pgssHashKey);
     561          14 :     info.entrysize = sizeof(pgssEntry);
     562          14 :     pgss_hash = ShmemInitHash("pg_stat_statements hash",
     563             :                               pgss_max, pgss_max,
     564             :                               &info,
     565             :                               HASH_ELEM | HASH_BLOBS);
     566             : 
     567          14 :     LWLockRelease(AddinShmemInitLock);
     568             : 
     569             :     /*
     570             :      * If we're in the postmaster (or a standalone backend...), set up a shmem
     571             :      * exit hook to dump the statistics to disk.
     572             :      */
     573          14 :     if (!IsUnderPostmaster)
     574          14 :         on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
     575             : 
     576             :     /*
     577             :      * Done if some other process already completed our initialization.
     578             :      */
     579          14 :     if (found)
     580          14 :         return;
     581             : 
     582             :     /*
     583             :      * Note: we don't bother with locks here, because there should be no other
     584             :      * processes running when this code is reached.
     585             :      */
     586             : 
     587             :     /* Unlink query text file possibly left over from crash */
     588          14 :     unlink(PGSS_TEXT_FILE);
     589             : 
     590             :     /* Allocate new query text temp file */
     591          14 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
     592          14 :     if (qfile == NULL)
     593           0 :         goto write_error;
     594             : 
     595             :     /*
     596             :      * If we were told not to load old statistics, we're done.  (Note we do
     597             :      * not try to unlink any old dump file in this case.  This seems a bit
     598             :      * questionable but it's the historical behavior.)
     599             :      */
     600          14 :     if (!pgss_save)
     601             :     {
     602           2 :         FreeFile(qfile);
     603           2 :         return;
     604             :     }
     605             : 
     606             :     /*
     607             :      * Attempt to load old statistics from the dump file.
     608             :      */
     609          12 :     file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
     610          12 :     if (file == NULL)
     611             :     {
     612           8 :         if (errno != ENOENT)
     613           0 :             goto read_error;
     614             :         /* No existing persisted stats file, so we're done */
     615           8 :         FreeFile(qfile);
     616           8 :         return;
     617             :     }
     618             : 
     619           4 :     buffer_size = 2048;
     620           4 :     buffer = (char *) palloc(buffer_size);
     621             : 
     622           8 :     if (fread(&header, sizeof(uint32), 1, file) != 1 ||
     623           8 :         fread(&pgver, sizeof(uint32), 1, file) != 1 ||
     624           4 :         fread(&num, sizeof(int32), 1, file) != 1)
     625           0 :         goto read_error;
     626             : 
     627           4 :     if (header != PGSS_FILE_HEADER ||
     628           4 :         pgver != PGSS_PG_MAJOR_VERSION)
     629           0 :         goto data_error;
     630             : 
     631       53292 :     for (i = 0; i < num; i++)
     632             :     {
     633             :         pgssEntry   temp;
     634             :         pgssEntry  *entry;
     635             :         Size        query_offset;
     636             : 
     637       53288 :         if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
     638           0 :             goto read_error;
     639             : 
     640             :         /* Encoding is the only field we can easily sanity-check */
     641       53288 :         if (!PG_VALID_BE_ENCODING(temp.encoding))
     642           0 :             goto data_error;
     643             : 
     644             :         /* Resize buffer as needed */
     645       53288 :         if (temp.query_len >= buffer_size)
     646             :         {
     647           4 :             buffer_size = Max(buffer_size * 2, temp.query_len + 1);
     648           4 :             buffer = repalloc(buffer, buffer_size);
     649             :         }
     650             : 
     651       53288 :         if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
     652           0 :             goto read_error;
     653             : 
     654             :         /* Should have a trailing null, but let's make sure */
     655       53288 :         buffer[temp.query_len] = '\0';
     656             : 
     657             :         /* Skip loading "sticky" entries */
     658       53288 :         if (IS_STICKY(temp.counters))
     659        1478 :             continue;
     660             : 
     661             :         /* Store the query text */
     662       51810 :         query_offset = pgss->extent;
     663       51810 :         if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
     664           0 :             goto write_error;
     665       51810 :         pgss->extent += temp.query_len + 1;
     666             : 
     667             :         /* make the hashtable entry (discards old entries if too many) */
     668       51810 :         entry = entry_alloc(&temp.key, query_offset, temp.query_len,
     669             :                             temp.encoding,
     670             :                             false);
     671             : 
     672             :         /* copy in the actual stats */
     673       51810 :         entry->counters = temp.counters;
     674       51810 :         entry->stats_since = temp.stats_since;
     675       51810 :         entry->minmax_stats_since = temp.minmax_stats_since;
     676             :     }
     677             : 
     678             :     /* Read global statistics for pg_stat_statements */
     679           4 :     if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     680           0 :         goto read_error;
     681             : 
     682           4 :     pfree(buffer);
     683           4 :     FreeFile(file);
     684           4 :     FreeFile(qfile);
     685             : 
     686             :     /*
     687             :      * Remove the persisted stats file so it's not included in
     688             :      * backups/replication standbys, etc.  A new file will be written on next
     689             :      * shutdown.
     690             :      *
     691             :      * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
     692             :      * because we remove that file on startup; it acts inversely to
     693             :      * PGSS_DUMP_FILE, in that it is only supposed to be around when the
     694             :      * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
     695             :      * when the server is not running.  Leaving the file creates no danger of
     696             :      * a newly restored database having a spurious record of execution costs,
     697             :      * which is what we're really concerned about here.
     698             :      */
     699           4 :     unlink(PGSS_DUMP_FILE);
     700             : 
     701           4 :     return;
     702             : 
     703           0 : read_error:
     704           0 :     ereport(LOG,
     705             :             (errcode_for_file_access(),
     706             :              errmsg("could not read file \"%s\": %m",
     707             :                     PGSS_DUMP_FILE)));
     708           0 :     goto fail;
     709           0 : data_error:
     710           0 :     ereport(LOG,
     711             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     712             :              errmsg("ignoring invalid data in file \"%s\"",
     713             :                     PGSS_DUMP_FILE)));
     714           0 :     goto fail;
     715           0 : write_error:
     716           0 :     ereport(LOG,
     717             :             (errcode_for_file_access(),
     718             :              errmsg("could not write file \"%s\": %m",
     719             :                     PGSS_TEXT_FILE)));
     720           0 : fail:
     721           0 :     if (buffer)
     722           0 :         pfree(buffer);
     723           0 :     if (file)
     724           0 :         FreeFile(file);
     725           0 :     if (qfile)
     726           0 :         FreeFile(qfile);
     727             :     /* If possible, throw away the bogus file; ignore any error */
     728           0 :     unlink(PGSS_DUMP_FILE);
     729             : 
     730             :     /*
     731             :      * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
     732             :      * server is running with pg_stat_statements enabled
     733             :      */
     734             : }
     735             : 
     736             : /*
     737             :  * shmem_shutdown hook: Dump statistics into file.
     738             :  *
     739             :  * Note: we don't bother with acquiring lock, because there should be no
     740             :  * other processes running when this is called.
     741             :  */
     742             : static void
     743          14 : pgss_shmem_shutdown(int code, Datum arg)
     744             : {
     745             :     FILE       *file;
     746          14 :     char       *qbuffer = NULL;
     747          14 :     Size        qbuffer_size = 0;
     748             :     HASH_SEQ_STATUS hash_seq;
     749             :     int32       num_entries;
     750             :     pgssEntry  *entry;
     751             : 
     752             :     /* Don't try to dump during a crash. */
     753          14 :     if (code)
     754          14 :         return;
     755             : 
     756             :     /* Safety check ... shouldn't get here unless shmem is set up. */
     757          14 :     if (!pgss || !pgss_hash)
     758           0 :         return;
     759             : 
     760             :     /* Don't dump if told not to. */
     761          14 :     if (!pgss_save)
     762           4 :         return;
     763             : 
     764          10 :     file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
     765          10 :     if (file == NULL)
     766           0 :         goto error;
     767             : 
     768          10 :     if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
     769           0 :         goto error;
     770          10 :     if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
     771           0 :         goto error;
     772          10 :     num_entries = hash_get_num_entries(pgss_hash);
     773          10 :     if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
     774           0 :         goto error;
     775             : 
     776          10 :     qbuffer = qtext_load_file(&qbuffer_size);
     777          10 :     if (qbuffer == NULL)
     778           0 :         goto error;
     779             : 
     780             :     /*
     781             :      * When serializing to disk, we store query texts immediately after their
     782             :      * entry data.  Any orphaned query texts are thereby excluded.
     783             :      */
     784          10 :     hash_seq_init(&hash_seq, pgss_hash);
     785      107124 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
     786             :     {
     787      107114 :         int         len = entry->query_len;
     788      107114 :         char       *qstr = qtext_fetch(entry->query_offset, len,
     789             :                                        qbuffer, qbuffer_size);
     790             : 
     791      107114 :         if (qstr == NULL)
     792           0 :             continue;           /* Ignore any entries with bogus texts */
     793             : 
     794      107114 :         if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
     795      107114 :             fwrite(qstr, 1, len + 1, file) != len + 1)
     796             :         {
     797             :             /* note: we assume hash_seq_term won't change errno */
     798           0 :             hash_seq_term(&hash_seq);
     799           0 :             goto error;
     800             :         }
     801             :     }
     802             : 
     803             :     /* Dump global statistics for pg_stat_statements */
     804          10 :     if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     805           0 :         goto error;
     806             : 
     807          10 :     free(qbuffer);
     808          10 :     qbuffer = NULL;
     809             : 
     810          10 :     if (FreeFile(file))
     811             :     {
     812           0 :         file = NULL;
     813           0 :         goto error;
     814             :     }
     815             : 
     816             :     /*
     817             :      * Rename file into place, so we atomically replace any old one.
     818             :      */
     819          10 :     (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
     820             : 
     821             :     /* Unlink query-texts file; it's not needed while shutdown */
     822          10 :     unlink(PGSS_TEXT_FILE);
     823             : 
     824          10 :     return;
     825             : 
     826           0 : error:
     827           0 :     ereport(LOG,
     828             :             (errcode_for_file_access(),
     829             :              errmsg("could not write file \"%s\": %m",
     830             :                     PGSS_DUMP_FILE ".tmp")));
     831           0 :     free(qbuffer);
     832           0 :     if (file)
     833           0 :         FreeFile(file);
     834           0 :     unlink(PGSS_DUMP_FILE ".tmp");
     835           0 :     unlink(PGSS_TEXT_FILE);
     836             : }
     837             : 
     838             : /*
     839             :  * Post-parse-analysis hook: mark query with a queryId
     840             :  */
     841             : static void
     842      156210 : pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
     843             : {
     844      156210 :     if (prev_post_parse_analyze_hook)
     845           0 :         prev_post_parse_analyze_hook(pstate, query, jstate);
     846             : 
     847             :     /* Safety check... */
     848      156210 :     if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
     849       24670 :         return;
     850             : 
     851             :     /*
     852             :      * If it's EXECUTE, clear the queryId so that stats will accumulate for
     853             :      * the underlying PREPARE.  But don't do this if we're not tracking
     854             :      * utility statements, to avoid messing up another extension that might be
     855             :      * tracking them.
     856             :      */
     857      131540 :     if (query->utilityStmt)
     858             :     {
     859       58648 :         if (pgss_track_utility && IsA(query->utilityStmt, ExecuteStmt))
     860             :         {
     861        6484 :             query->queryId = INT64CONST(0);
     862        6484 :             return;
     863             :         }
     864             :     }
     865             : 
     866             :     /*
     867             :      * If query jumbling were able to identify any ignorable constants, we
     868             :      * immediately create a hash table entry for the query, so that we can
     869             :      * record the normalized form of the query string.  If there were no such
     870             :      * constants, the normalized string would be the same as the query text
     871             :      * anyway, so there's no need for an early entry.
     872             :      */
     873      125056 :     if (jstate && jstate->clocations_count > 0)
     874       72434 :         pgss_store(pstate->p_sourcetext,
     875             :                    query->queryId,
     876             :                    query->stmt_location,
     877             :                    query->stmt_len,
     878             :                    PGSS_INVALID,
     879             :                    0,
     880             :                    0,
     881             :                    NULL,
     882             :                    NULL,
     883             :                    NULL,
     884             :                    jstate,
     885             :                    0,
     886             :                    0,
     887             :                    PLAN_STMT_UNKNOWN);
     888             : }
     889             : 
     890             : /*
     891             :  * Planner hook: forward to regular planner, but measure planning time
     892             :  * if needed.
     893             :  */
     894             : static PlannedStmt *
     895       94586 : pgss_planner(Query *parse,
     896             :              const char *query_string,
     897             :              int cursorOptions,
     898             :              ParamListInfo boundParams,
     899             :              ExplainState *es)
     900             : {
     901             :     PlannedStmt *result;
     902             : 
     903             :     /*
     904             :      * We can't process the query if no query_string is provided, as
     905             :      * pgss_store needs it.  We also ignore query without queryid, as it would
     906             :      * be treated as a utility statement, which may not be the case.
     907             :      */
     908       94586 :     if (pgss_enabled(nesting_level)
     909       73224 :         && pgss_track_planning && query_string
     910         262 :         && parse->queryId != INT64CONST(0))
     911         262 :     {
     912             :         instr_time  start;
     913             :         instr_time  duration;
     914             :         BufferUsage bufusage_start,
     915             :                     bufusage;
     916             :         WalUsage    walusage_start,
     917             :                     walusage;
     918             : 
     919             :         /* We need to track buffer usage as the planner can access them. */
     920         262 :         bufusage_start = pgBufferUsage;
     921             : 
     922             :         /*
     923             :          * Similarly the planner could write some WAL records in some cases
     924             :          * (e.g. setting a hint bit with those being WAL-logged)
     925             :          */
     926         262 :         walusage_start = pgWalUsage;
     927         262 :         INSTR_TIME_SET_CURRENT(start);
     928             : 
     929         262 :         nesting_level++;
     930         262 :         PG_TRY();
     931             :         {
     932         262 :             if (prev_planner_hook)
     933           0 :                 result = prev_planner_hook(parse, query_string, cursorOptions,
     934             :                                            boundParams, es);
     935             :             else
     936         262 :                 result = standard_planner(parse, query_string, cursorOptions,
     937             :                                           boundParams, es);
     938             :         }
     939           0 :         PG_FINALLY();
     940             :         {
     941         262 :             nesting_level--;
     942             :         }
     943         262 :         PG_END_TRY();
     944             : 
     945         262 :         INSTR_TIME_SET_CURRENT(duration);
     946         262 :         INSTR_TIME_SUBTRACT(duration, start);
     947             : 
     948             :         /* calc differences of buffer counters. */
     949         262 :         memset(&bufusage, 0, sizeof(BufferUsage));
     950         262 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
     951             : 
     952             :         /* calc differences of WAL counters. */
     953         262 :         memset(&walusage, 0, sizeof(WalUsage));
     954         262 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
     955             : 
     956         262 :         pgss_store(query_string,
     957             :                    parse->queryId,
     958             :                    parse->stmt_location,
     959             :                    parse->stmt_len,
     960             :                    PGSS_PLAN,
     961         262 :                    INSTR_TIME_GET_MILLISEC(duration),
     962             :                    0,
     963             :                    &bufusage,
     964             :                    &walusage,
     965             :                    NULL,
     966             :                    NULL,
     967             :                    0,
     968             :                    0,
     969             :                    result->planOrigin);
     970             :     }
     971             :     else
     972             :     {
     973             :         /*
     974             :          * Even though we're not tracking plan time for this statement, we
     975             :          * must still increment the nesting level, to ensure that functions
     976             :          * evaluated during planning are not seen as top-level calls.
     977             :          */
     978       94324 :         nesting_level++;
     979       94324 :         PG_TRY();
     980             :         {
     981       94324 :             if (prev_planner_hook)
     982           0 :                 result = prev_planner_hook(parse, query_string, cursorOptions,
     983             :                                            boundParams, es);
     984             :             else
     985       94324 :                 result = standard_planner(parse, query_string, cursorOptions,
     986             :                                           boundParams, es);
     987             :         }
     988        1516 :         PG_FINALLY();
     989             :         {
     990       94324 :             nesting_level--;
     991             :         }
     992       94324 :         PG_END_TRY();
     993             :     }
     994             : 
     995       93070 :     return result;
     996             : }
     997             : 
     998             : /*
     999             :  * ExecutorStart hook: start up tracking if needed
    1000             :  */
    1001             : static void
    1002      115848 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
    1003             : {
    1004      115848 :     if (prev_ExecutorStart)
    1005           0 :         prev_ExecutorStart(queryDesc, eflags);
    1006             :     else
    1007      115848 :         standard_ExecutorStart(queryDesc, eflags);
    1008             : 
    1009             :     /*
    1010             :      * If query has queryId zero, don't track it.  This prevents double
    1011             :      * counting of optimizable statements that are directly contained in
    1012             :      * utility statements.
    1013             :      */
    1014      115314 :     if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
    1015             :     {
    1016             :         /*
    1017             :          * Set up to track total elapsed time in ExecutorRun.  Make sure the
    1018             :          * space is allocated in the per-query context so it will go away at
    1019             :          * ExecutorEnd.
    1020             :          */
    1021       77042 :         if (queryDesc->totaltime == NULL)
    1022             :         {
    1023             :             MemoryContext oldcxt;
    1024             : 
    1025       77042 :             oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
    1026       77042 :             queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
    1027       77042 :             MemoryContextSwitchTo(oldcxt);
    1028             :         }
    1029             :     }
    1030      115314 : }
    1031             : 
    1032             : /*
    1033             :  * ExecutorRun hook: all we need do is track nesting depth
    1034             :  */
    1035             : static void
    1036      112698 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
    1037             : {
    1038      112698 :     nesting_level++;
    1039      112698 :     PG_TRY();
    1040             :     {
    1041      112698 :         if (prev_ExecutorRun)
    1042           0 :             prev_ExecutorRun(queryDesc, direction, count);
    1043             :         else
    1044      112698 :             standard_ExecutorRun(queryDesc, direction, count);
    1045             :     }
    1046        6886 :     PG_FINALLY();
    1047             :     {
    1048      112698 :         nesting_level--;
    1049             :     }
    1050      112698 :     PG_END_TRY();
    1051      105812 : }
    1052             : 
    1053             : /*
    1054             :  * ExecutorFinish hook: all we need do is track nesting depth
    1055             :  */
    1056             : static void
    1057      101894 : pgss_ExecutorFinish(QueryDesc *queryDesc)
    1058             : {
    1059      101894 :     nesting_level++;
    1060      101894 :     PG_TRY();
    1061             :     {
    1062      101894 :         if (prev_ExecutorFinish)
    1063           0 :             prev_ExecutorFinish(queryDesc);
    1064             :         else
    1065      101894 :             standard_ExecutorFinish(queryDesc);
    1066             :     }
    1067         328 :     PG_FINALLY();
    1068             :     {
    1069      101894 :         nesting_level--;
    1070             :     }
    1071      101894 :     PG_END_TRY();
    1072      101566 : }
    1073             : 
    1074             : /*
    1075             :  * ExecutorEnd hook: store results if needed
    1076             :  */
    1077             : static void
    1078      107446 : pgss_ExecutorEnd(QueryDesc *queryDesc)
    1079             : {
    1080      107446 :     int64       queryId = queryDesc->plannedstmt->queryId;
    1081             : 
    1082      107446 :     if (queryId != INT64CONST(0) && queryDesc->totaltime &&
    1083       73958 :         pgss_enabled(nesting_level))
    1084             :     {
    1085             :         /*
    1086             :          * Make sure stats accumulation is done.  (Note: it's okay if several
    1087             :          * levels of hook all do this.)
    1088             :          */
    1089       73958 :         InstrEndLoop(queryDesc->totaltime);
    1090             : 
    1091       73800 :         pgss_store(queryDesc->sourceText,
    1092             :                    queryId,
    1093       73958 :                    queryDesc->plannedstmt->stmt_location,
    1094       73958 :                    queryDesc->plannedstmt->stmt_len,
    1095             :                    PGSS_EXEC,
    1096       73958 :                    queryDesc->totaltime->total * 1000.0,  /* convert to msec */
    1097       73958 :                    queryDesc->estate->es_total_processed,
    1098       73958 :                    &queryDesc->totaltime->bufusage,
    1099       73958 :                    &queryDesc->totaltime->walusage,
    1100         158 :                    queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
    1101             :                    NULL,
    1102       73958 :                    queryDesc->estate->es_parallel_workers_to_launch,
    1103       73958 :                    queryDesc->estate->es_parallel_workers_launched,
    1104       73958 :                    queryDesc->plannedstmt->planOrigin);
    1105             :     }
    1106             : 
    1107      107446 :     if (prev_ExecutorEnd)
    1108           0 :         prev_ExecutorEnd(queryDesc);
    1109             :     else
    1110      107446 :         standard_ExecutorEnd(queryDesc);
    1111      107446 : }
    1112             : 
    1113             : /*
    1114             :  * ProcessUtility hook
    1115             :  */
    1116             : static void
    1117       69422 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
    1118             :                     bool readOnlyTree,
    1119             :                     ProcessUtilityContext context,
    1120             :                     ParamListInfo params, QueryEnvironment *queryEnv,
    1121             :                     DestReceiver *dest, QueryCompletion *qc)
    1122             : {
    1123       69422 :     Node       *parsetree = pstmt->utilityStmt;
    1124       69422 :     int64       saved_queryId = pstmt->queryId;
    1125       69422 :     int         saved_stmt_location = pstmt->stmt_location;
    1126       69422 :     int         saved_stmt_len = pstmt->stmt_len;
    1127       69422 :     bool        enabled = pgss_track_utility && pgss_enabled(nesting_level);
    1128             : 
    1129             :     /*
    1130             :      * Force utility statements to get queryId zero.  We do this even in cases
    1131             :      * where the statement contains an optimizable statement for which a
    1132             :      * queryId could be derived (such as EXPLAIN or DECLARE CURSOR).  For such
    1133             :      * cases, runtime control will first go through ProcessUtility and then
    1134             :      * the executor, and we don't want the executor hooks to do anything,
    1135             :      * since we are already measuring the statement's costs at the utility
    1136             :      * level.
    1137             :      *
    1138             :      * Note that this is only done if pg_stat_statements is enabled and
    1139             :      * configured to track utility statements, in the unlikely possibility
    1140             :      * that user configured another extension to handle utility statements
    1141             :      * only.
    1142             :      */
    1143       69422 :     if (enabled)
    1144       58440 :         pstmt->queryId = INT64CONST(0);
    1145             : 
    1146             :     /*
    1147             :      * If it's an EXECUTE statement, we don't track it and don't increment the
    1148             :      * nesting level.  This allows the cycles to be charged to the underlying
    1149             :      * PREPARE instead (by the Executor hooks), which is much more useful.
    1150             :      *
    1151             :      * We also don't track execution of PREPARE.  If we did, we would get one
    1152             :      * hash table entry for the PREPARE (with hash calculated from the query
    1153             :      * string), and then a different one with the same query string (but hash
    1154             :      * calculated from the query tree) would be used to accumulate costs of
    1155             :      * ensuing EXECUTEs.  This would be confusing.  Since PREPARE doesn't
    1156             :      * actually run the planner (only parse+rewrite), its costs are generally
    1157             :      * pretty negligible and it seems okay to just ignore it.
    1158             :      */
    1159       69422 :     if (enabled &&
    1160       58440 :         !IsA(parsetree, ExecuteStmt) &&
    1161       51968 :         !IsA(parsetree, PrepareStmt))
    1162       46892 :     {
    1163             :         instr_time  start;
    1164             :         instr_time  duration;
    1165             :         uint64      rows;
    1166             :         BufferUsage bufusage_start,
    1167             :                     bufusage;
    1168             :         WalUsage    walusage_start,
    1169             :                     walusage;
    1170             : 
    1171       51722 :         bufusage_start = pgBufferUsage;
    1172       51722 :         walusage_start = pgWalUsage;
    1173       51722 :         INSTR_TIME_SET_CURRENT(start);
    1174             : 
    1175       51722 :         nesting_level++;
    1176       51722 :         PG_TRY();
    1177             :         {
    1178       51722 :             if (prev_ProcessUtility)
    1179           0 :                 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1180             :                                     context, params, queryEnv,
    1181             :                                     dest, qc);
    1182             :             else
    1183       51722 :                 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1184             :                                         context, params, queryEnv,
    1185             :                                         dest, qc);
    1186             :         }
    1187        4830 :         PG_FINALLY();
    1188             :         {
    1189       51722 :             nesting_level--;
    1190             :         }
    1191       51722 :         PG_END_TRY();
    1192             : 
    1193             :         /*
    1194             :          * CAUTION: do not access the *pstmt data structure again below here.
    1195             :          * If it was a ROLLBACK or similar, that data structure may have been
    1196             :          * freed.  We must copy everything we still need into local variables,
    1197             :          * which we did above.
    1198             :          *
    1199             :          * For the same reason, we can't risk restoring pstmt->queryId to its
    1200             :          * former value, which'd otherwise be a good idea.
    1201             :          */
    1202             : 
    1203       46892 :         INSTR_TIME_SET_CURRENT(duration);
    1204       46892 :         INSTR_TIME_SUBTRACT(duration, start);
    1205             : 
    1206             :         /*
    1207             :          * Track the total number of rows retrieved or affected by the utility
    1208             :          * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
    1209             :          * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
    1210             :          */
    1211       46886 :         rows = (qc && (qc->commandTag == CMDTAG_COPY ||
    1212       43574 :                        qc->commandTag == CMDTAG_FETCH ||
    1213       43054 :                        qc->commandTag == CMDTAG_SELECT ||
    1214       42680 :                        qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
    1215       93778 :             qc->nprocessed : 0;
    1216             : 
    1217             :         /* calc differences of buffer counters. */
    1218       46892 :         memset(&bufusage, 0, sizeof(BufferUsage));
    1219       46892 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
    1220             : 
    1221             :         /* calc differences of WAL counters. */
    1222       46892 :         memset(&walusage, 0, sizeof(WalUsage));
    1223       46892 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
    1224             : 
    1225       46892 :         pgss_store(queryString,
    1226             :                    saved_queryId,
    1227             :                    saved_stmt_location,
    1228             :                    saved_stmt_len,
    1229             :                    PGSS_EXEC,
    1230       46892 :                    INSTR_TIME_GET_MILLISEC(duration),
    1231             :                    rows,
    1232             :                    &bufusage,
    1233             :                    &walusage,
    1234             :                    NULL,
    1235             :                    NULL,
    1236             :                    0,
    1237             :                    0,
    1238             :                    pstmt->planOrigin);
    1239             :     }
    1240             :     else
    1241             :     {
    1242             :         /*
    1243             :          * Even though we're not tracking execution time for this statement,
    1244             :          * we must still increment the nesting level, to ensure that functions
    1245             :          * evaluated within it are not seen as top-level calls.  But don't do
    1246             :          * so for EXECUTE; that way, when control reaches pgss_planner or
    1247             :          * pgss_ExecutorStart, we will treat the costs as top-level if
    1248             :          * appropriate.  Likewise, don't bump for PREPARE, so that parse
    1249             :          * analysis will treat the statement as top-level if appropriate.
    1250             :          *
    1251             :          * To be absolutely certain we don't mess up the nesting level,
    1252             :          * evaluate the bump_level condition just once.
    1253             :          */
    1254       17700 :         bool        bump_level =
    1255       28926 :             !IsA(parsetree, ExecuteStmt) &&
    1256       11226 :             !IsA(parsetree, PrepareStmt);
    1257             : 
    1258       17700 :         if (bump_level)
    1259       10978 :             nesting_level++;
    1260       17700 :         PG_TRY();
    1261             :         {
    1262       17700 :             if (prev_ProcessUtility)
    1263           0 :                 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1264             :                                     context, params, queryEnv,
    1265             :                                     dest, qc);
    1266             :             else
    1267       17700 :                 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1268             :                                         context, params, queryEnv,
    1269             :                                         dest, qc);
    1270             :         }
    1271         260 :         PG_FINALLY();
    1272             :         {
    1273       17700 :             if (bump_level)
    1274       10978 :                 nesting_level--;
    1275             :         }
    1276       17700 :         PG_END_TRY();
    1277             :     }
    1278       64332 : }
    1279             : 
    1280             : /*
    1281             :  * Store some statistics for a statement.
    1282             :  *
    1283             :  * If jstate is not NULL then we're trying to create an entry for which
    1284             :  * we have no statistics as yet; we just want to record the normalized
    1285             :  * query string.  total_time, rows, bufusage and walusage are ignored in this
    1286             :  * case.
    1287             :  *
    1288             :  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
    1289             :  * for the arrays in the Counters field.
    1290             :  */
    1291             : static void
    1292      193546 : pgss_store(const char *query, int64 queryId,
    1293             :            int query_location, int query_len,
    1294             :            pgssStoreKind kind,
    1295             :            double total_time, uint64 rows,
    1296             :            const BufferUsage *bufusage,
    1297             :            const WalUsage *walusage,
    1298             :            const struct JitInstrumentation *jitusage,
    1299             :            JumbleState *jstate,
    1300             :            int parallel_workers_to_launch,
    1301             :            int parallel_workers_launched,
    1302             :            PlannedStmtOrigin planOrigin)
    1303             : {
    1304             :     pgssHashKey key;
    1305             :     pgssEntry  *entry;
    1306      193546 :     char       *norm_query = NULL;
    1307      193546 :     int         encoding = GetDatabaseEncoding();
    1308             : 
    1309             :     Assert(query != NULL);
    1310             : 
    1311             :     /* Safety check... */
    1312      193546 :     if (!pgss || !pgss_hash)
    1313           0 :         return;
    1314             : 
    1315             :     /*
    1316             :      * Nothing to do if compute_query_id isn't enabled and no other module
    1317             :      * computed a query identifier.
    1318             :      */
    1319      193546 :     if (queryId == INT64CONST(0))
    1320           0 :         return;
    1321             : 
    1322             :     /*
    1323             :      * Confine our attention to the relevant part of the string, if the query
    1324             :      * is a portion of a multi-statement source string, and update query
    1325             :      * location and length if needed.
    1326             :      */
    1327      193546 :     query = CleanQuerytext(query, &query_location, &query_len);
    1328             : 
    1329             :     /* Set up key for hashtable search */
    1330             : 
    1331             :     /* clear padding */
    1332      193546 :     memset(&key, 0, sizeof(pgssHashKey));
    1333             : 
    1334      193546 :     key.userid = GetUserId();
    1335      193546 :     key.dbid = MyDatabaseId;
    1336      193546 :     key.queryid = queryId;
    1337      193546 :     key.toplevel = (nesting_level == 0);
    1338             : 
    1339             :     /* Lookup the hash table entry with shared lock. */
    1340      193546 :     LWLockAcquire(pgss->lock, LW_SHARED);
    1341             : 
    1342      193546 :     entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    1343             : 
    1344             :     /* Create new entry, if not present */
    1345      193546 :     if (!entry)
    1346             :     {
    1347             :         Size        query_offset;
    1348             :         int         gc_count;
    1349             :         bool        stored;
    1350             :         bool        do_gc;
    1351             : 
    1352             :         /*
    1353             :          * Create a new, normalized query string if caller asked.  We don't
    1354             :          * need to hold the lock while doing this work.  (Note: in any case,
    1355             :          * it's possible that someone else creates a duplicate hashtable entry
    1356             :          * in the interval where we don't hold the lock below.  That case is
    1357             :          * handled by entry_alloc.)
    1358             :          */
    1359       57062 :         if (jstate)
    1360             :         {
    1361       21328 :             LWLockRelease(pgss->lock);
    1362       21328 :             norm_query = generate_normalized_query(jstate, query,
    1363             :                                                    query_location,
    1364             :                                                    &query_len);
    1365       21328 :             LWLockAcquire(pgss->lock, LW_SHARED);
    1366             :         }
    1367             : 
    1368             :         /* Append new query text to file with only shared lock held */
    1369       57062 :         stored = qtext_store(norm_query ? norm_query : query, query_len,
    1370             :                              &query_offset, &gc_count);
    1371             : 
    1372             :         /*
    1373             :          * Determine whether we need to garbage collect external query texts
    1374             :          * while the shared lock is still held.  This micro-optimization
    1375             :          * avoids taking the time to decide this while holding exclusive lock.
    1376             :          */
    1377       57062 :         do_gc = need_gc_qtexts();
    1378             : 
    1379             :         /* Need exclusive lock to make a new hashtable entry - promote */
    1380       57062 :         LWLockRelease(pgss->lock);
    1381       57062 :         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
    1382             : 
    1383             :         /*
    1384             :          * A garbage collection may have occurred while we weren't holding the
    1385             :          * lock.  In the unlikely event that this happens, the query text we
    1386             :          * stored above will have been garbage collected, so write it again.
    1387             :          * This should be infrequent enough that doing it while holding
    1388             :          * exclusive lock isn't a performance problem.
    1389             :          */
    1390       57062 :         if (!stored || pgss->gc_count != gc_count)
    1391           0 :             stored = qtext_store(norm_query ? norm_query : query, query_len,
    1392             :                                  &query_offset, NULL);
    1393             : 
    1394             :         /* If we failed to write to the text file, give up */
    1395       57062 :         if (!stored)
    1396           0 :             goto done;
    1397             : 
    1398             :         /* OK to create a new hashtable entry */
    1399       57062 :         entry = entry_alloc(&key, query_offset, query_len, encoding,
    1400             :                             jstate != NULL);
    1401             : 
    1402             :         /* If needed, perform garbage collection while exclusive lock held */
    1403       57062 :         if (do_gc)
    1404           0 :             gc_qtexts();
    1405             :     }
    1406             : 
    1407             :     /* Increment the counts, except when jstate is not NULL */
    1408      193546 :     if (!jstate)
    1409             :     {
    1410             :         Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
    1411             : 
    1412             :         /*
    1413             :          * Grab the spinlock while updating the counters (see comment about
    1414             :          * locking rules at the head of the file)
    1415             :          */
    1416      121112 :         SpinLockAcquire(&entry->mutex);
    1417             : 
    1418             :         /* "Unstick" entry if it was previously sticky */
    1419      121112 :         if (IS_STICKY(entry->counters))
    1420       55528 :             entry->counters.usage = USAGE_INIT;
    1421             : 
    1422      121112 :         entry->counters.calls[kind] += 1;
    1423      121112 :         entry->counters.total_time[kind] += total_time;
    1424             : 
    1425      121112 :         if (entry->counters.calls[kind] == 1)
    1426             :         {
    1427       55684 :             entry->counters.min_time[kind] = total_time;
    1428       55684 :             entry->counters.max_time[kind] = total_time;
    1429       55684 :             entry->counters.mean_time[kind] = total_time;
    1430             :         }
    1431             :         else
    1432             :         {
    1433             :             /*
    1434             :              * Welford's method for accurately computing variance. See
    1435             :              * <http://www.johndcook.com/blog/standard_deviation/>
    1436             :              */
    1437       65428 :             double      old_mean = entry->counters.mean_time[kind];
    1438             : 
    1439       65428 :             entry->counters.mean_time[kind] +=
    1440       65428 :                 (total_time - old_mean) / entry->counters.calls[kind];
    1441       65428 :             entry->counters.sum_var_time[kind] +=
    1442       65428 :                 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
    1443             : 
    1444             :             /*
    1445             :              * Calculate min and max time. min = 0 and max = 0 means that the
    1446             :              * min/max statistics were reset
    1447             :              */
    1448       65428 :             if (entry->counters.min_time[kind] == 0
    1449          12 :                 && entry->counters.max_time[kind] == 0)
    1450             :             {
    1451           6 :                 entry->counters.min_time[kind] = total_time;
    1452           6 :                 entry->counters.max_time[kind] = total_time;
    1453             :             }
    1454             :             else
    1455             :             {
    1456       65422 :                 if (entry->counters.min_time[kind] > total_time)
    1457       13344 :                     entry->counters.min_time[kind] = total_time;
    1458       65422 :                 if (entry->counters.max_time[kind] < total_time)
    1459        6326 :                     entry->counters.max_time[kind] = total_time;
    1460             :             }
    1461             :         }
    1462      121112 :         entry->counters.rows += rows;
    1463      121112 :         entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
    1464      121112 :         entry->counters.shared_blks_read += bufusage->shared_blks_read;
    1465      121112 :         entry->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
    1466      121112 :         entry->counters.shared_blks_written += bufusage->shared_blks_written;
    1467      121112 :         entry->counters.local_blks_hit += bufusage->local_blks_hit;
    1468      121112 :         entry->counters.local_blks_read += bufusage->local_blks_read;
    1469      121112 :         entry->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
    1470      121112 :         entry->counters.local_blks_written += bufusage->local_blks_written;
    1471      121112 :         entry->counters.temp_blks_read += bufusage->temp_blks_read;
    1472      121112 :         entry->counters.temp_blks_written += bufusage->temp_blks_written;
    1473      121112 :         entry->counters.shared_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_read_time);
    1474      121112 :         entry->counters.shared_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_write_time);
    1475      121112 :         entry->counters.local_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_read_time);
    1476      121112 :         entry->counters.local_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_write_time);
    1477      121112 :         entry->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
    1478      121112 :         entry->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
    1479      121112 :         entry->counters.usage += USAGE_EXEC(total_time);
    1480      121112 :         entry->counters.wal_records += walusage->wal_records;
    1481      121112 :         entry->counters.wal_fpi += walusage->wal_fpi;
    1482      121112 :         entry->counters.wal_bytes += walusage->wal_bytes;
    1483      121112 :         entry->counters.wal_buffers_full += walusage->wal_buffers_full;
    1484      121112 :         if (jitusage)
    1485             :         {
    1486         158 :             entry->counters.jit_functions += jitusage->created_functions;
    1487         158 :             entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
    1488             : 
    1489         158 :             if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
    1490         154 :                 entry->counters.jit_deform_count++;
    1491         158 :             entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
    1492             : 
    1493         158 :             if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
    1494          74 :                 entry->counters.jit_inlining_count++;
    1495         158 :             entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
    1496             : 
    1497         158 :             if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
    1498         154 :                 entry->counters.jit_optimization_count++;
    1499         158 :             entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
    1500             : 
    1501         158 :             if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
    1502         154 :                 entry->counters.jit_emission_count++;
    1503         158 :             entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
    1504             :         }
    1505             : 
    1506             :         /* parallel worker counters */
    1507      121112 :         entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
    1508      121112 :         entry->counters.parallel_workers_launched += parallel_workers_launched;
    1509             : 
    1510             :         /* plan cache counters */
    1511      121112 :         if (planOrigin == PLAN_STMT_CACHE_GENERIC)
    1512        6092 :             entry->counters.generic_plan_calls++;
    1513      115020 :         else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
    1514         738 :             entry->counters.custom_plan_calls++;
    1515             : 
    1516      121112 :         SpinLockRelease(&entry->mutex);
    1517             :     }
    1518             : 
    1519       72434 : done:
    1520      193546 :     LWLockRelease(pgss->lock);
    1521             : 
    1522             :     /* We postpone this clean-up until we're out of the lock */
    1523      193546 :     if (norm_query)
    1524       21328 :         pfree(norm_query);
    1525             : }
    1526             : 
    1527             : /*
    1528             :  * Reset statement statistics corresponding to userid, dbid, and queryid.
    1529             :  */
    1530             : Datum
    1531           2 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
    1532             : {
    1533             :     Oid         userid;
    1534             :     Oid         dbid;
    1535             :     int64       queryid;
    1536             : 
    1537           2 :     userid = PG_GETARG_OID(0);
    1538           2 :     dbid = PG_GETARG_OID(1);
    1539           2 :     queryid = PG_GETARG_INT64(2);
    1540             : 
    1541           2 :     entry_reset(userid, dbid, queryid, false);
    1542             : 
    1543           2 :     PG_RETURN_VOID();
    1544             : }
    1545             : 
    1546             : Datum
    1547         232 : pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
    1548             : {
    1549             :     Oid         userid;
    1550             :     Oid         dbid;
    1551             :     int64       queryid;
    1552             :     bool        minmax_only;
    1553             : 
    1554         232 :     userid = PG_GETARG_OID(0);
    1555         232 :     dbid = PG_GETARG_OID(1);
    1556         232 :     queryid = PG_GETARG_INT64(2);
    1557         232 :     minmax_only = PG_GETARG_BOOL(3);
    1558             : 
    1559         232 :     PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
    1560             : }
    1561             : 
    1562             : /*
    1563             :  * Reset statement statistics.
    1564             :  */
    1565             : Datum
    1566           2 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
    1567             : {
    1568           2 :     entry_reset(0, 0, 0, false);
    1569             : 
    1570           2 :     PG_RETURN_VOID();
    1571             : }
    1572             : 
    1573             : /* Number of output arguments (columns) for various API versions */
    1574             : #define PG_STAT_STATEMENTS_COLS_V1_0    14
    1575             : #define PG_STAT_STATEMENTS_COLS_V1_1    18
    1576             : #define PG_STAT_STATEMENTS_COLS_V1_2    19
    1577             : #define PG_STAT_STATEMENTS_COLS_V1_3    23
    1578             : #define PG_STAT_STATEMENTS_COLS_V1_8    32
    1579             : #define PG_STAT_STATEMENTS_COLS_V1_9    33
    1580             : #define PG_STAT_STATEMENTS_COLS_V1_10   43
    1581             : #define PG_STAT_STATEMENTS_COLS_V1_11   49
    1582             : #define PG_STAT_STATEMENTS_COLS_V1_12   52
    1583             : #define PG_STAT_STATEMENTS_COLS_V1_13   54
    1584             : #define PG_STAT_STATEMENTS_COLS         54  /* maximum of above */
    1585             : 
    1586             : /*
    1587             :  * Retrieve statement statistics.
    1588             :  *
    1589             :  * The SQL API of this function has changed multiple times, and will likely
    1590             :  * do so again in future.  To support the case where a newer version of this
    1591             :  * loadable module is being used with an old SQL declaration of the function,
    1592             :  * we continue to support the older API versions.  For 1.2 and later, the
    1593             :  * expected API version is identified by embedding it in the C name of the
    1594             :  * function.  Unfortunately we weren't bright enough to do that for 1.1.
    1595             :  */
    1596             : Datum
    1597         250 : pg_stat_statements_1_13(PG_FUNCTION_ARGS)
    1598             : {
    1599         250 :     bool        showtext = PG_GETARG_BOOL(0);
    1600             : 
    1601         250 :     pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext);
    1602             : 
    1603         250 :     return (Datum) 0;
    1604             : }
    1605             : 
    1606             : Datum
    1607           2 : pg_stat_statements_1_12(PG_FUNCTION_ARGS)
    1608             : {
    1609           2 :     bool        showtext = PG_GETARG_BOOL(0);
    1610             : 
    1611           2 :     pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
    1612             : 
    1613           2 :     return (Datum) 0;
    1614             : }
    1615             : 
    1616             : Datum
    1617           2 : pg_stat_statements_1_11(PG_FUNCTION_ARGS)
    1618             : {
    1619           2 :     bool        showtext = PG_GETARG_BOOL(0);
    1620             : 
    1621           2 :     pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
    1622             : 
    1623           2 :     return (Datum) 0;
    1624             : }
    1625             : 
    1626             : Datum
    1627           2 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
    1628             : {
    1629           2 :     bool        showtext = PG_GETARG_BOOL(0);
    1630             : 
    1631           2 :     pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
    1632             : 
    1633           2 :     return (Datum) 0;
    1634             : }
    1635             : 
    1636             : Datum
    1637           2 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
    1638             : {
    1639           2 :     bool        showtext = PG_GETARG_BOOL(0);
    1640             : 
    1641           2 :     pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
    1642             : 
    1643           2 :     return (Datum) 0;
    1644             : }
    1645             : 
    1646             : Datum
    1647           2 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
    1648             : {
    1649           2 :     bool        showtext = PG_GETARG_BOOL(0);
    1650             : 
    1651           2 :     pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
    1652             : 
    1653           2 :     return (Datum) 0;
    1654             : }
    1655             : 
    1656             : Datum
    1657           2 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
    1658             : {
    1659           2 :     bool        showtext = PG_GETARG_BOOL(0);
    1660             : 
    1661           2 :     pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
    1662             : 
    1663           2 :     return (Datum) 0;
    1664             : }
    1665             : 
    1666             : Datum
    1667           0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
    1668             : {
    1669           0 :     bool        showtext = PG_GETARG_BOOL(0);
    1670             : 
    1671           0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
    1672             : 
    1673           0 :     return (Datum) 0;
    1674             : }
    1675             : 
    1676             : /*
    1677             :  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
    1678             :  * This can be removed someday, perhaps.
    1679             :  */
    1680             : Datum
    1681           0 : pg_stat_statements(PG_FUNCTION_ARGS)
    1682             : {
    1683             :     /* If it's really API 1.1, we'll figure that out below */
    1684           0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
    1685             : 
    1686           0 :     return (Datum) 0;
    1687             : }
    1688             : 
    1689             : /* Common code for all versions of pg_stat_statements() */
    1690             : static void
    1691         262 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
    1692             :                             pgssVersion api_version,
    1693             :                             bool showtext)
    1694             : {
    1695         262 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
    1696         262 :     Oid         userid = GetUserId();
    1697         262 :     bool        is_allowed_role = false;
    1698         262 :     char       *qbuffer = NULL;
    1699         262 :     Size        qbuffer_size = 0;
    1700         262 :     Size        extent = 0;
    1701         262 :     int         gc_count = 0;
    1702             :     HASH_SEQ_STATUS hash_seq;
    1703             :     pgssEntry  *entry;
    1704             : 
    1705             :     /*
    1706             :      * Superusers or roles with the privileges of pg_read_all_stats members
    1707             :      * are allowed
    1708             :      */
    1709         262 :     is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
    1710             : 
    1711             :     /* hash table must exist already */
    1712         262 :     if (!pgss || !pgss_hash)
    1713           0 :         ereport(ERROR,
    1714             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1715             :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    1716             : 
    1717         262 :     InitMaterializedSRF(fcinfo, 0);
    1718             : 
    1719             :     /*
    1720             :      * Check we have the expected number of output arguments.  Aside from
    1721             :      * being a good safety check, we need a kluge here to detect API version
    1722             :      * 1.1, which was wedged into the code in an ill-considered way.
    1723             :      */
    1724         262 :     switch (rsinfo->setDesc->natts)
    1725             :     {
    1726           0 :         case PG_STAT_STATEMENTS_COLS_V1_0:
    1727           0 :             if (api_version != PGSS_V1_0)
    1728           0 :                 elog(ERROR, "incorrect number of output arguments");
    1729           0 :             break;
    1730           0 :         case PG_STAT_STATEMENTS_COLS_V1_1:
    1731             :             /* pg_stat_statements() should have told us 1.0 */
    1732           0 :             if (api_version != PGSS_V1_0)
    1733           0 :                 elog(ERROR, "incorrect number of output arguments");
    1734           0 :             api_version = PGSS_V1_1;
    1735           0 :             break;
    1736           0 :         case PG_STAT_STATEMENTS_COLS_V1_2:
    1737           0 :             if (api_version != PGSS_V1_2)
    1738           0 :                 elog(ERROR, "incorrect number of output arguments");
    1739           0 :             break;
    1740           2 :         case PG_STAT_STATEMENTS_COLS_V1_3:
    1741           2 :             if (api_version != PGSS_V1_3)
    1742           0 :                 elog(ERROR, "incorrect number of output arguments");
    1743           2 :             break;
    1744           2 :         case PG_STAT_STATEMENTS_COLS_V1_8:
    1745           2 :             if (api_version != PGSS_V1_8)
    1746           0 :                 elog(ERROR, "incorrect number of output arguments");
    1747           2 :             break;
    1748           2 :         case PG_STAT_STATEMENTS_COLS_V1_9:
    1749           2 :             if (api_version != PGSS_V1_9)
    1750           0 :                 elog(ERROR, "incorrect number of output arguments");
    1751           2 :             break;
    1752           2 :         case PG_STAT_STATEMENTS_COLS_V1_10:
    1753           2 :             if (api_version != PGSS_V1_10)
    1754           0 :                 elog(ERROR, "incorrect number of output arguments");
    1755           2 :             break;
    1756           2 :         case PG_STAT_STATEMENTS_COLS_V1_11:
    1757           2 :             if (api_version != PGSS_V1_11)
    1758           0 :                 elog(ERROR, "incorrect number of output arguments");
    1759           2 :             break;
    1760           2 :         case PG_STAT_STATEMENTS_COLS_V1_12:
    1761           2 :             if (api_version != PGSS_V1_12)
    1762           0 :                 elog(ERROR, "incorrect number of output arguments");
    1763           2 :             break;
    1764         250 :         case PG_STAT_STATEMENTS_COLS_V1_13:
    1765         250 :             if (api_version != PGSS_V1_13)
    1766           0 :                 elog(ERROR, "incorrect number of output arguments");
    1767         250 :             break;
    1768           0 :         default:
    1769           0 :             elog(ERROR, "incorrect number of output arguments");
    1770             :     }
    1771             : 
    1772             :     /*
    1773             :      * We'd like to load the query text file (if needed) while not holding any
    1774             :      * lock on pgss->lock.  In the worst case we'll have to do this again
    1775             :      * after we have the lock, but it's unlikely enough to make this a win
    1776             :      * despite occasional duplicated work.  We need to reload if anybody
    1777             :      * writes to the file (either a retail qtext_store(), or a garbage
    1778             :      * collection) between this point and where we've gotten shared lock.  If
    1779             :      * a qtext_store is actually in progress when we look, we might as well
    1780             :      * skip the speculative load entirely.
    1781             :      */
    1782         262 :     if (showtext)
    1783             :     {
    1784             :         int         n_writers;
    1785             : 
    1786             :         /* Take the mutex so we can examine variables */
    1787         262 :         SpinLockAcquire(&pgss->mutex);
    1788         262 :         extent = pgss->extent;
    1789         262 :         n_writers = pgss->n_writers;
    1790         262 :         gc_count = pgss->gc_count;
    1791         262 :         SpinLockRelease(&pgss->mutex);
    1792             : 
    1793             :         /* No point in loading file now if there are active writers */
    1794         262 :         if (n_writers == 0)
    1795         262 :             qbuffer = qtext_load_file(&qbuffer_size);
    1796             :     }
    1797             : 
    1798             :     /*
    1799             :      * Get shared lock, load or reload the query text file if we must, and
    1800             :      * iterate over the hashtable entries.
    1801             :      *
    1802             :      * With a large hash table, we might be holding the lock rather longer
    1803             :      * than one could wish.  However, this only blocks creation of new hash
    1804             :      * table entries, and the larger the hash table the less likely that is to
    1805             :      * be needed.  So we can hope this is okay.  Perhaps someday we'll decide
    1806             :      * we need to partition the hash table to limit the time spent holding any
    1807             :      * one lock.
    1808             :      */
    1809         262 :     LWLockAcquire(pgss->lock, LW_SHARED);
    1810             : 
    1811         262 :     if (showtext)
    1812             :     {
    1813             :         /*
    1814             :          * Here it is safe to examine extent and gc_count without taking the
    1815             :          * mutex.  Note that although other processes might change
    1816             :          * pgss->extent just after we look at it, the strings they then write
    1817             :          * into the file cannot yet be referenced in the hashtable, so we
    1818             :          * don't care whether we see them or not.
    1819             :          *
    1820             :          * If qtext_load_file fails, we just press on; we'll return NULL for
    1821             :          * every query text.
    1822             :          */
    1823         262 :         if (qbuffer == NULL ||
    1824         262 :             pgss->extent != extent ||
    1825         262 :             pgss->gc_count != gc_count)
    1826             :         {
    1827           0 :             free(qbuffer);
    1828           0 :             qbuffer = qtext_load_file(&qbuffer_size);
    1829             :         }
    1830             :     }
    1831             : 
    1832         262 :     hash_seq_init(&hash_seq, pgss_hash);
    1833       53972 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    1834             :     {
    1835             :         Datum       values[PG_STAT_STATEMENTS_COLS];
    1836             :         bool        nulls[PG_STAT_STATEMENTS_COLS];
    1837       53710 :         int         i = 0;
    1838             :         Counters    tmp;
    1839             :         double      stddev;
    1840       53710 :         int64       queryid = entry->key.queryid;
    1841             :         TimestampTz stats_since;
    1842             :         TimestampTz minmax_stats_since;
    1843             : 
    1844       53710 :         memset(values, 0, sizeof(values));
    1845       53710 :         memset(nulls, 0, sizeof(nulls));
    1846             : 
    1847       53710 :         values[i++] = ObjectIdGetDatum(entry->key.userid);
    1848       53710 :         values[i++] = ObjectIdGetDatum(entry->key.dbid);
    1849       53710 :         if (api_version >= PGSS_V1_9)
    1850       53686 :             values[i++] = BoolGetDatum(entry->key.toplevel);
    1851             : 
    1852       53710 :         if (is_allowed_role || entry->key.userid == userid)
    1853             :         {
    1854       53702 :             if (api_version >= PGSS_V1_2)
    1855       53702 :                 values[i++] = Int64GetDatumFast(queryid);
    1856             : 
    1857       53702 :             if (showtext)
    1858             :             {
    1859       53702 :                 char       *qstr = qtext_fetch(entry->query_offset,
    1860             :                                                entry->query_len,
    1861             :                                                qbuffer,
    1862             :                                                qbuffer_size);
    1863             : 
    1864       53702 :                 if (qstr)
    1865             :                 {
    1866             :                     char       *enc;
    1867             : 
    1868       53702 :                     enc = pg_any_to_server(qstr,
    1869             :                                            entry->query_len,
    1870             :                                            entry->encoding);
    1871             : 
    1872       53702 :                     values[i++] = CStringGetTextDatum(enc);
    1873             : 
    1874       53702 :                     if (enc != qstr)
    1875           0 :                         pfree(enc);
    1876             :                 }
    1877             :                 else
    1878             :                 {
    1879             :                     /* Just return a null if we fail to find the text */
    1880           0 :                     nulls[i++] = true;
    1881             :                 }
    1882             :             }
    1883             :             else
    1884             :             {
    1885             :                 /* Query text not requested */
    1886           0 :                 nulls[i++] = true;
    1887             :             }
    1888             :         }
    1889             :         else
    1890             :         {
    1891             :             /* Don't show queryid */
    1892           8 :             if (api_version >= PGSS_V1_2)
    1893           8 :                 nulls[i++] = true;
    1894             : 
    1895             :             /*
    1896             :              * Don't show query text, but hint as to the reason for not doing
    1897             :              * so if it was requested
    1898             :              */
    1899           8 :             if (showtext)
    1900           8 :                 values[i++] = CStringGetTextDatum("<insufficient privilege>");
    1901             :             else
    1902           0 :                 nulls[i++] = true;
    1903             :         }
    1904             : 
    1905             :         /* copy counters to a local variable to keep locking time short */
    1906       53710 :         SpinLockAcquire(&entry->mutex);
    1907       53710 :         tmp = entry->counters;
    1908       53710 :         SpinLockRelease(&entry->mutex);
    1909             : 
    1910             :         /*
    1911             :          * The spinlock is not required when reading these two as they are
    1912             :          * always updated when holding pgss->lock exclusively.
    1913             :          */
    1914       53710 :         stats_since = entry->stats_since;
    1915       53710 :         minmax_stats_since = entry->minmax_stats_since;
    1916             : 
    1917             :         /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
    1918       53710 :         if (IS_STICKY(tmp))
    1919          78 :             continue;
    1920             : 
    1921             :         /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
    1922      160896 :         for (int kind = 0; kind < PGSS_NUMKIND; kind++)
    1923             :         {
    1924      107264 :             if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
    1925             :             {
    1926      107256 :                 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
    1927      107256 :                 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
    1928             :             }
    1929             : 
    1930      107264 :             if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
    1931             :                 api_version >= PGSS_V1_8)
    1932             :             {
    1933      107256 :                 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
    1934      107256 :                 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
    1935      107256 :                 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
    1936             : 
    1937             :                 /*
    1938             :                  * Note we are calculating the population variance here, not
    1939             :                  * the sample variance, as we have data for the whole
    1940             :                  * population, so Bessel's correction is not used, and we
    1941             :                  * don't divide by tmp.calls - 1.
    1942             :                  */
    1943      107256 :                 if (tmp.calls[kind] > 1)
    1944       10066 :                     stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
    1945             :                 else
    1946       97190 :                     stddev = 0.0;
    1947      107256 :                 values[i++] = Float8GetDatumFast(stddev);
    1948             :             }
    1949             :         }
    1950       53632 :         values[i++] = Int64GetDatumFast(tmp.rows);
    1951       53632 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
    1952       53632 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
    1953       53632 :         if (api_version >= PGSS_V1_1)
    1954       53632 :             values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
    1955       53632 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
    1956       53632 :         values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
    1957       53632 :         values[i++] = Int64GetDatumFast(tmp.local_blks_read);
    1958       53632 :         if (api_version >= PGSS_V1_1)
    1959       53632 :             values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
    1960       53632 :         values[i++] = Int64GetDatumFast(tmp.local_blks_written);
    1961       53632 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
    1962       53632 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
    1963       53632 :         if (api_version >= PGSS_V1_1)
    1964             :         {
    1965       53632 :             values[i++] = Float8GetDatumFast(tmp.shared_blk_read_time);
    1966       53632 :             values[i++] = Float8GetDatumFast(tmp.shared_blk_write_time);
    1967             :         }
    1968       53632 :         if (api_version >= PGSS_V1_11)
    1969             :         {
    1970       53576 :             values[i++] = Float8GetDatumFast(tmp.local_blk_read_time);
    1971       53576 :             values[i++] = Float8GetDatumFast(tmp.local_blk_write_time);
    1972             :         }
    1973       53632 :         if (api_version >= PGSS_V1_10)
    1974             :         {
    1975       53594 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
    1976       53594 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
    1977             :         }
    1978       53632 :         if (api_version >= PGSS_V1_8)
    1979             :         {
    1980             :             char        buf[256];
    1981             :             Datum       wal_bytes;
    1982             : 
    1983       53624 :             values[i++] = Int64GetDatumFast(tmp.wal_records);
    1984       53624 :             values[i++] = Int64GetDatumFast(tmp.wal_fpi);
    1985             : 
    1986       53624 :             snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
    1987             : 
    1988             :             /* Convert to numeric. */
    1989       53624 :             wal_bytes = DirectFunctionCall3(numeric_in,
    1990             :                                             CStringGetDatum(buf),
    1991             :                                             ObjectIdGetDatum(0),
    1992             :                                             Int32GetDatum(-1));
    1993       53624 :             values[i++] = wal_bytes;
    1994             :         }
    1995       53632 :         if (api_version >= PGSS_V1_12)
    1996             :         {
    1997       53556 :             values[i++] = Int64GetDatumFast(tmp.wal_buffers_full);
    1998             :         }
    1999       53632 :         if (api_version >= PGSS_V1_10)
    2000             :         {
    2001       53594 :             values[i++] = Int64GetDatumFast(tmp.jit_functions);
    2002       53594 :             values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
    2003       53594 :             values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
    2004       53594 :             values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
    2005       53594 :             values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
    2006       53594 :             values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
    2007       53594 :             values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
    2008       53594 :             values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
    2009             :         }
    2010       53632 :         if (api_version >= PGSS_V1_11)
    2011             :         {
    2012       53576 :             values[i++] = Int64GetDatumFast(tmp.jit_deform_count);
    2013       53576 :             values[i++] = Float8GetDatumFast(tmp.jit_deform_time);
    2014             :         }
    2015       53632 :         if (api_version >= PGSS_V1_12)
    2016             :         {
    2017       53556 :             values[i++] = Int64GetDatumFast(tmp.parallel_workers_to_launch);
    2018       53556 :             values[i++] = Int64GetDatumFast(tmp.parallel_workers_launched);
    2019             :         }
    2020       53632 :         if (api_version >= PGSS_V1_13)
    2021             :         {
    2022       53546 :             values[i++] = Int64GetDatumFast(tmp.generic_plan_calls);
    2023       53546 :             values[i++] = Int64GetDatumFast(tmp.custom_plan_calls);
    2024             :         }
    2025       53632 :         if (api_version >= PGSS_V1_11)
    2026             :         {
    2027       53576 :             values[i++] = TimestampTzGetDatum(stats_since);
    2028       53576 :             values[i++] = TimestampTzGetDatum(minmax_stats_since);
    2029             :         }
    2030             : 
    2031             :         Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
    2032             :                      api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
    2033             :                      api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
    2034             :                      api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
    2035             :                      api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
    2036             :                      api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
    2037             :                      api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
    2038             :                      api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
    2039             :                      api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
    2040             :                      api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
    2041             :                      -1 /* fail if you forget to update this assert */ ));
    2042             : 
    2043       53632 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
    2044             :     }
    2045             : 
    2046         262 :     LWLockRelease(pgss->lock);
    2047             : 
    2048         262 :     free(qbuffer);
    2049         262 : }
    2050             : 
    2051             : /* Number of output arguments (columns) for pg_stat_statements_info */
    2052             : #define PG_STAT_STATEMENTS_INFO_COLS    2
    2053             : 
    2054             : /*
    2055             :  * Return statistics of pg_stat_statements.
    2056             :  */
    2057             : Datum
    2058           4 : pg_stat_statements_info(PG_FUNCTION_ARGS)
    2059             : {
    2060             :     pgssGlobalStats stats;
    2061             :     TupleDesc   tupdesc;
    2062           4 :     Datum       values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    2063           4 :     bool        nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    2064             : 
    2065           4 :     if (!pgss || !pgss_hash)
    2066           0 :         ereport(ERROR,
    2067             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    2068             :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    2069             : 
    2070             :     /* Build a tuple descriptor for our result type */
    2071           4 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    2072           0 :         elog(ERROR, "return type must be a row type");
    2073             : 
    2074             :     /* Read global statistics for pg_stat_statements */
    2075           4 :     SpinLockAcquire(&pgss->mutex);
    2076           4 :     stats = pgss->stats;
    2077           4 :     SpinLockRelease(&pgss->mutex);
    2078             : 
    2079           4 :     values[0] = Int64GetDatum(stats.dealloc);
    2080           4 :     values[1] = TimestampTzGetDatum(stats.stats_reset);
    2081             : 
    2082           4 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
    2083             : }
    2084             : 
    2085             : /*
    2086             :  * Estimate shared memory space needed.
    2087             :  */
    2088             : static Size
    2089          14 : pgss_memsize(void)
    2090             : {
    2091             :     Size        size;
    2092             : 
    2093          14 :     size = MAXALIGN(sizeof(pgssSharedState));
    2094          14 :     size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
    2095             : 
    2096          14 :     return size;
    2097             : }
    2098             : 
    2099             : /*
    2100             :  * Allocate a new hashtable entry.
    2101             :  * caller must hold an exclusive lock on pgss->lock
    2102             :  *
    2103             :  * "query" need not be null-terminated; we rely on query_len instead
    2104             :  *
    2105             :  * If "sticky" is true, make the new entry artificially sticky so that it will
    2106             :  * probably still be there when the query finishes execution.  We do this by
    2107             :  * giving it a median usage value rather than the normal value.  (Strictly
    2108             :  * speaking, query strings are normalized on a best effort basis, though it
    2109             :  * would be difficult to demonstrate this even under artificial conditions.)
    2110             :  *
    2111             :  * Note: despite needing exclusive lock, it's not an error for the target
    2112             :  * entry to already exist.  This is because pgss_store releases and
    2113             :  * reacquires lock after failing to find a match; so someone else could
    2114             :  * have made the entry while we waited to get exclusive lock.
    2115             :  */
    2116             : static pgssEntry *
    2117      108872 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
    2118             :             bool sticky)
    2119             : {
    2120             :     pgssEntry  *entry;
    2121             :     bool        found;
    2122             : 
    2123             :     /* Make space if needed */
    2124      108872 :     while (hash_get_num_entries(pgss_hash) >= pgss_max)
    2125           0 :         entry_dealloc();
    2126             : 
    2127             :     /* Find or create an entry with desired hash code */
    2128      108872 :     entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
    2129             : 
    2130      108872 :     if (!found)
    2131             :     {
    2132             :         /* New entry, initialize it */
    2133             : 
    2134             :         /* reset the statistics */
    2135      108872 :         memset(&entry->counters, 0, sizeof(Counters));
    2136             :         /* set the appropriate initial usage count */
    2137      108872 :         entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
    2138             :         /* re-initialize the mutex each time ... we assume no one using it */
    2139      108872 :         SpinLockInit(&entry->mutex);
    2140             :         /* ... and don't forget the query text metadata */
    2141             :         Assert(query_len >= 0);
    2142      108872 :         entry->query_offset = query_offset;
    2143      108872 :         entry->query_len = query_len;
    2144      108872 :         entry->encoding = encoding;
    2145      108872 :         entry->stats_since = GetCurrentTimestamp();
    2146      108872 :         entry->minmax_stats_since = entry->stats_since;
    2147             :     }
    2148             : 
    2149      108872 :     return entry;
    2150             : }
    2151             : 
    2152             : /*
    2153             :  * qsort comparator for sorting into increasing usage order
    2154             :  */
    2155             : static int
    2156           0 : entry_cmp(const void *lhs, const void *rhs)
    2157             : {
    2158           0 :     double      l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
    2159           0 :     double      r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
    2160             : 
    2161           0 :     if (l_usage < r_usage)
    2162           0 :         return -1;
    2163           0 :     else if (l_usage > r_usage)
    2164           0 :         return +1;
    2165             :     else
    2166           0 :         return 0;
    2167             : }
    2168             : 
    2169             : /*
    2170             :  * Deallocate least-used entries.
    2171             :  *
    2172             :  * Caller must hold an exclusive lock on pgss->lock.
    2173             :  */
    2174             : static void
    2175           0 : entry_dealloc(void)
    2176             : {
    2177             :     HASH_SEQ_STATUS hash_seq;
    2178             :     pgssEntry **entries;
    2179             :     pgssEntry  *entry;
    2180             :     int         nvictims;
    2181             :     int         i;
    2182             :     Size        tottextlen;
    2183             :     int         nvalidtexts;
    2184             : 
    2185             :     /*
    2186             :      * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
    2187             :      * While we're scanning the table, apply the decay factor to the usage
    2188             :      * values, and update the mean query length.
    2189             :      *
    2190             :      * Note that the mean query length is almost immediately obsolete, since
    2191             :      * we compute it before not after discarding the least-used entries.
    2192             :      * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
    2193             :      * making two passes to get a more current result.  Likewise, the new
    2194             :      * cur_median_usage includes the entries we're about to zap.
    2195             :      */
    2196             : 
    2197           0 :     entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
    2198             : 
    2199           0 :     i = 0;
    2200           0 :     tottextlen = 0;
    2201           0 :     nvalidtexts = 0;
    2202             : 
    2203           0 :     hash_seq_init(&hash_seq, pgss_hash);
    2204           0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2205             :     {
    2206           0 :         entries[i++] = entry;
    2207             :         /* "Sticky" entries get a different usage decay rate. */
    2208           0 :         if (IS_STICKY(entry->counters))
    2209           0 :             entry->counters.usage *= STICKY_DECREASE_FACTOR;
    2210             :         else
    2211           0 :             entry->counters.usage *= USAGE_DECREASE_FACTOR;
    2212             :         /* In the mean length computation, ignore dropped texts. */
    2213           0 :         if (entry->query_len >= 0)
    2214             :         {
    2215           0 :             tottextlen += entry->query_len + 1;
    2216           0 :             nvalidtexts++;
    2217             :         }
    2218             :     }
    2219             : 
    2220             :     /* Sort into increasing order by usage */
    2221           0 :     qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
    2222             : 
    2223             :     /* Record the (approximate) median usage */
    2224           0 :     if (i > 0)
    2225           0 :         pgss->cur_median_usage = entries[i / 2]->counters.usage;
    2226             :     /* Record the mean query length */
    2227           0 :     if (nvalidtexts > 0)
    2228           0 :         pgss->mean_query_len = tottextlen / nvalidtexts;
    2229             :     else
    2230           0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2231             : 
    2232             :     /* Now zap an appropriate fraction of lowest-usage entries */
    2233           0 :     nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
    2234           0 :     nvictims = Min(nvictims, i);
    2235             : 
    2236           0 :     for (i = 0; i < nvictims; i++)
    2237             :     {
    2238           0 :         hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
    2239             :     }
    2240             : 
    2241           0 :     pfree(entries);
    2242             : 
    2243             :     /* Increment the number of times entries are deallocated */
    2244           0 :     SpinLockAcquire(&pgss->mutex);
    2245           0 :     pgss->stats.dealloc += 1;
    2246           0 :     SpinLockRelease(&pgss->mutex);
    2247           0 : }
    2248             : 
    2249             : /*
    2250             :  * Given a query string (not necessarily null-terminated), allocate a new
    2251             :  * entry in the external query text file and store the string there.
    2252             :  *
    2253             :  * If successful, returns true, and stores the new entry's offset in the file
    2254             :  * into *query_offset.  Also, if gc_count isn't NULL, *gc_count is set to the
    2255             :  * number of garbage collections that have occurred so far.
    2256             :  *
    2257             :  * On failure, returns false.
    2258             :  *
    2259             :  * At least a shared lock on pgss->lock must be held by the caller, so as
    2260             :  * to prevent a concurrent garbage collection.  Share-lock-holding callers
    2261             :  * should pass a gc_count pointer to obtain the number of garbage collections,
    2262             :  * so that they can recheck the count after obtaining exclusive lock to
    2263             :  * detect whether a garbage collection occurred (and removed this entry).
    2264             :  */
    2265             : static bool
    2266       57062 : qtext_store(const char *query, int query_len,
    2267             :             Size *query_offset, int *gc_count)
    2268             : {
    2269             :     Size        off;
    2270             :     int         fd;
    2271             : 
    2272             :     /*
    2273             :      * We use a spinlock to protect extent/n_writers/gc_count, so that
    2274             :      * multiple processes may execute this function concurrently.
    2275             :      */
    2276       57062 :     SpinLockAcquire(&pgss->mutex);
    2277       57062 :     off = pgss->extent;
    2278       57062 :     pgss->extent += query_len + 1;
    2279       57062 :     pgss->n_writers++;
    2280       57062 :     if (gc_count)
    2281       57062 :         *gc_count = pgss->gc_count;
    2282       57062 :     SpinLockRelease(&pgss->mutex);
    2283             : 
    2284       57062 :     *query_offset = off;
    2285             : 
    2286             :     /*
    2287             :      * Don't allow the file to grow larger than what qtext_load_file can
    2288             :      * (theoretically) handle.  This has been seen to be reachable on 32-bit
    2289             :      * platforms.
    2290             :      */
    2291       57062 :     if (unlikely(query_len >= MaxAllocHugeSize - off))
    2292             :     {
    2293           0 :         errno = EFBIG;          /* not quite right, but it'll do */
    2294           0 :         fd = -1;
    2295           0 :         goto error;
    2296             :     }
    2297             : 
    2298             :     /* Now write the data into the successfully-reserved part of the file */
    2299       57062 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
    2300       57062 :     if (fd < 0)
    2301           0 :         goto error;
    2302             : 
    2303       57062 :     if (pg_pwrite(fd, query, query_len, off) != query_len)
    2304           0 :         goto error;
    2305       57062 :     if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
    2306           0 :         goto error;
    2307             : 
    2308       57062 :     CloseTransientFile(fd);
    2309             : 
    2310             :     /* Mark our write complete */
    2311       57062 :     SpinLockAcquire(&pgss->mutex);
    2312       57062 :     pgss->n_writers--;
    2313       57062 :     SpinLockRelease(&pgss->mutex);
    2314             : 
    2315       57062 :     return true;
    2316             : 
    2317           0 : error:
    2318           0 :     ereport(LOG,
    2319             :             (errcode_for_file_access(),
    2320             :              errmsg("could not write file \"%s\": %m",
    2321             :                     PGSS_TEXT_FILE)));
    2322             : 
    2323           0 :     if (fd >= 0)
    2324           0 :         CloseTransientFile(fd);
    2325             : 
    2326             :     /* Mark our write complete */
    2327           0 :     SpinLockAcquire(&pgss->mutex);
    2328           0 :     pgss->n_writers--;
    2329           0 :     SpinLockRelease(&pgss->mutex);
    2330             : 
    2331           0 :     return false;
    2332             : }
    2333             : 
    2334             : /*
    2335             :  * Read the external query text file into a malloc'd buffer.
    2336             :  *
    2337             :  * Returns NULL (without throwing an error) if unable to read, eg
    2338             :  * file not there or insufficient memory.
    2339             :  *
    2340             :  * On success, the buffer size is also returned into *buffer_size.
    2341             :  *
    2342             :  * This can be called without any lock on pgss->lock, but in that case
    2343             :  * the caller is responsible for verifying that the result is sane.
    2344             :  */
    2345             : static char *
    2346         272 : qtext_load_file(Size *buffer_size)
    2347             : {
    2348             :     char       *buf;
    2349             :     int         fd;
    2350             :     struct stat stat;
    2351             :     Size        nread;
    2352             : 
    2353         272 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
    2354         272 :     if (fd < 0)
    2355             :     {
    2356           0 :         if (errno != ENOENT)
    2357           0 :             ereport(LOG,
    2358             :                     (errcode_for_file_access(),
    2359             :                      errmsg("could not read file \"%s\": %m",
    2360             :                             PGSS_TEXT_FILE)));
    2361           0 :         return NULL;
    2362             :     }
    2363             : 
    2364             :     /* Get file length */
    2365         272 :     if (fstat(fd, &stat))
    2366             :     {
    2367           0 :         ereport(LOG,
    2368             :                 (errcode_for_file_access(),
    2369             :                  errmsg("could not stat file \"%s\": %m",
    2370             :                         PGSS_TEXT_FILE)));
    2371           0 :         CloseTransientFile(fd);
    2372           0 :         return NULL;
    2373             :     }
    2374             : 
    2375             :     /* Allocate buffer; beware that off_t might be wider than size_t */
    2376         272 :     if (stat.st_size <= MaxAllocHugeSize)
    2377         272 :         buf = (char *) malloc(stat.st_size);
    2378             :     else
    2379           0 :         buf = NULL;
    2380         272 :     if (buf == NULL)
    2381             :     {
    2382           0 :         ereport(LOG,
    2383             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    2384             :                  errmsg("out of memory"),
    2385             :                  errdetail("Could not allocate enough memory to read file \"%s\".",
    2386             :                            PGSS_TEXT_FILE)));
    2387           0 :         CloseTransientFile(fd);
    2388           0 :         return NULL;
    2389             :     }
    2390             : 
    2391             :     /*
    2392             :      * OK, slurp in the file.  Windows fails if we try to read more than
    2393             :      * INT_MAX bytes at once, and other platforms might not like that either,
    2394             :      * so read a very large file in 1GB segments.
    2395             :      */
    2396         272 :     nread = 0;
    2397         542 :     while (nread < stat.st_size)
    2398             :     {
    2399         270 :         int         toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
    2400             : 
    2401             :         /*
    2402             :          * If we get a short read and errno doesn't get set, the reason is
    2403             :          * probably that garbage collection truncated the file since we did
    2404             :          * the fstat(), so we don't log a complaint --- but we don't return
    2405             :          * the data, either, since it's most likely corrupt due to concurrent
    2406             :          * writes from garbage collection.
    2407             :          */
    2408         270 :         errno = 0;
    2409         270 :         if (read(fd, buf + nread, toread) != toread)
    2410             :         {
    2411           0 :             if (errno)
    2412           0 :                 ereport(LOG,
    2413             :                         (errcode_for_file_access(),
    2414             :                          errmsg("could not read file \"%s\": %m",
    2415             :                                 PGSS_TEXT_FILE)));
    2416           0 :             free(buf);
    2417           0 :             CloseTransientFile(fd);
    2418           0 :             return NULL;
    2419             :         }
    2420         270 :         nread += toread;
    2421             :     }
    2422             : 
    2423         272 :     if (CloseTransientFile(fd) != 0)
    2424           0 :         ereport(LOG,
    2425             :                 (errcode_for_file_access(),
    2426             :                  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
    2427             : 
    2428         272 :     *buffer_size = nread;
    2429         272 :     return buf;
    2430             : }
    2431             : 
    2432             : /*
    2433             :  * Locate a query text in the file image previously read by qtext_load_file().
    2434             :  *
    2435             :  * We validate the given offset/length, and return NULL if bogus.  Otherwise,
    2436             :  * the result points to a null-terminated string within the buffer.
    2437             :  */
    2438             : static char *
    2439      160816 : qtext_fetch(Size query_offset, int query_len,
    2440             :             char *buffer, Size buffer_size)
    2441             : {
    2442             :     /* File read failed? */
    2443      160816 :     if (buffer == NULL)
    2444           0 :         return NULL;
    2445             :     /* Bogus offset/length? */
    2446      160816 :     if (query_len < 0 ||
    2447      160816 :         query_offset + query_len >= buffer_size)
    2448           0 :         return NULL;
    2449             :     /* As a further sanity check, make sure there's a trailing null */
    2450      160816 :     if (buffer[query_offset + query_len] != '\0')
    2451           0 :         return NULL;
    2452             :     /* Looks OK */
    2453      160816 :     return buffer + query_offset;
    2454             : }
    2455             : 
    2456             : /*
    2457             :  * Do we need to garbage-collect the external query text file?
    2458             :  *
    2459             :  * Caller should hold at least a shared lock on pgss->lock.
    2460             :  */
    2461             : static bool
    2462       57062 : need_gc_qtexts(void)
    2463             : {
    2464             :     Size        extent;
    2465             : 
    2466             :     /* Read shared extent pointer */
    2467       57062 :     SpinLockAcquire(&pgss->mutex);
    2468       57062 :     extent = pgss->extent;
    2469       57062 :     SpinLockRelease(&pgss->mutex);
    2470             : 
    2471             :     /*
    2472             :      * Don't proceed if file does not exceed 512 bytes per possible entry.
    2473             :      *
    2474             :      * Here and in the next test, 32-bit machines have overflow hazards if
    2475             :      * pgss_max and/or mean_query_len are large.  Force the multiplications
    2476             :      * and comparisons to be done in uint64 arithmetic to forestall trouble.
    2477             :      */
    2478       57062 :     if ((uint64) extent < (uint64) 512 * pgss_max)
    2479       57062 :         return false;
    2480             : 
    2481             :     /*
    2482             :      * Don't proceed if file is less than about 50% bloat.  Nothing can or
    2483             :      * should be done in the event of unusually large query texts accounting
    2484             :      * for file's large size.  We go to the trouble of maintaining the mean
    2485             :      * query length in order to prevent garbage collection from thrashing
    2486             :      * uselessly.
    2487             :      */
    2488           0 :     if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
    2489           0 :         return false;
    2490             : 
    2491           0 :     return true;
    2492             : }
    2493             : 
    2494             : /*
    2495             :  * Garbage-collect orphaned query texts in external file.
    2496             :  *
    2497             :  * This won't be called often in the typical case, since it's likely that
    2498             :  * there won't be too much churn, and besides, a similar compaction process
    2499             :  * occurs when serializing to disk at shutdown or as part of resetting.
    2500             :  * Despite this, it seems prudent to plan for the edge case where the file
    2501             :  * becomes unreasonably large, with no other method of compaction likely to
    2502             :  * occur in the foreseeable future.
    2503             :  *
    2504             :  * The caller must hold an exclusive lock on pgss->lock.
    2505             :  *
    2506             :  * At the first sign of trouble we unlink the query text file to get a clean
    2507             :  * slate (although existing statistics are retained), rather than risk
    2508             :  * thrashing by allowing the same problem case to recur indefinitely.
    2509             :  */
    2510             : static void
    2511           0 : gc_qtexts(void)
    2512             : {
    2513             :     char       *qbuffer;
    2514             :     Size        qbuffer_size;
    2515           0 :     FILE       *qfile = NULL;
    2516             :     HASH_SEQ_STATUS hash_seq;
    2517             :     pgssEntry  *entry;
    2518             :     Size        extent;
    2519             :     int         nentries;
    2520             : 
    2521             :     /*
    2522             :      * When called from pgss_store, some other session might have proceeded
    2523             :      * with garbage collection in the no-lock-held interim of lock strength
    2524             :      * escalation.  Check once more that this is actually necessary.
    2525             :      */
    2526           0 :     if (!need_gc_qtexts())
    2527           0 :         return;
    2528             : 
    2529             :     /*
    2530             :      * Load the old texts file.  If we fail (out of memory, for instance),
    2531             :      * invalidate query texts.  Hopefully this is rare.  It might seem better
    2532             :      * to leave things alone on an OOM failure, but the problem is that the
    2533             :      * file is only going to get bigger; hoping for a future non-OOM result is
    2534             :      * risky and can easily lead to complete denial of service.
    2535             :      */
    2536           0 :     qbuffer = qtext_load_file(&qbuffer_size);
    2537           0 :     if (qbuffer == NULL)
    2538           0 :         goto gc_fail;
    2539             : 
    2540             :     /*
    2541             :      * We overwrite the query texts file in place, so as to reduce the risk of
    2542             :      * an out-of-disk-space failure.  Since the file is guaranteed not to get
    2543             :      * larger, this should always work on traditional filesystems; though we
    2544             :      * could still lose on copy-on-write filesystems.
    2545             :      */
    2546           0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2547           0 :     if (qfile == NULL)
    2548             :     {
    2549           0 :         ereport(LOG,
    2550             :                 (errcode_for_file_access(),
    2551             :                  errmsg("could not write file \"%s\": %m",
    2552             :                         PGSS_TEXT_FILE)));
    2553           0 :         goto gc_fail;
    2554             :     }
    2555             : 
    2556           0 :     extent = 0;
    2557           0 :     nentries = 0;
    2558             : 
    2559           0 :     hash_seq_init(&hash_seq, pgss_hash);
    2560           0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2561             :     {
    2562           0 :         int         query_len = entry->query_len;
    2563           0 :         char       *qry = qtext_fetch(entry->query_offset,
    2564             :                                       query_len,
    2565             :                                       qbuffer,
    2566             :                                       qbuffer_size);
    2567             : 
    2568           0 :         if (qry == NULL)
    2569             :         {
    2570             :             /* Trouble ... drop the text */
    2571           0 :             entry->query_offset = 0;
    2572           0 :             entry->query_len = -1;
    2573             :             /* entry will not be counted in mean query length computation */
    2574           0 :             continue;
    2575             :         }
    2576             : 
    2577           0 :         if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
    2578             :         {
    2579           0 :             ereport(LOG,
    2580             :                     (errcode_for_file_access(),
    2581             :                      errmsg("could not write file \"%s\": %m",
    2582             :                             PGSS_TEXT_FILE)));
    2583           0 :             hash_seq_term(&hash_seq);
    2584           0 :             goto gc_fail;
    2585             :         }
    2586             : 
    2587           0 :         entry->query_offset = extent;
    2588           0 :         extent += query_len + 1;
    2589           0 :         nentries++;
    2590             :     }
    2591             : 
    2592             :     /*
    2593             :      * Truncate away any now-unused space.  If this fails for some odd reason,
    2594             :      * we log it, but there's no need to fail.
    2595             :      */
    2596           0 :     if (ftruncate(fileno(qfile), extent) != 0)
    2597           0 :         ereport(LOG,
    2598             :                 (errcode_for_file_access(),
    2599             :                  errmsg("could not truncate file \"%s\": %m",
    2600             :                         PGSS_TEXT_FILE)));
    2601             : 
    2602           0 :     if (FreeFile(qfile))
    2603             :     {
    2604           0 :         ereport(LOG,
    2605             :                 (errcode_for_file_access(),
    2606             :                  errmsg("could not write file \"%s\": %m",
    2607             :                         PGSS_TEXT_FILE)));
    2608           0 :         qfile = NULL;
    2609           0 :         goto gc_fail;
    2610             :     }
    2611             : 
    2612           0 :     elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
    2613             :          pgss->extent, extent);
    2614             : 
    2615             :     /* Reset the shared extent pointer */
    2616           0 :     pgss->extent = extent;
    2617             : 
    2618             :     /*
    2619             :      * Also update the mean query length, to be sure that need_gc_qtexts()
    2620             :      * won't still think we have a problem.
    2621             :      */
    2622           0 :     if (nentries > 0)
    2623           0 :         pgss->mean_query_len = extent / nentries;
    2624             :     else
    2625           0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2626             : 
    2627           0 :     free(qbuffer);
    2628             : 
    2629             :     /*
    2630             :      * OK, count a garbage collection cycle.  (Note: even though we have
    2631             :      * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
    2632             :      * other processes may examine gc_count while holding only the mutex.
    2633             :      * Also, we have to advance the count *after* we've rewritten the file,
    2634             :      * else other processes might not realize they read a stale file.)
    2635             :      */
    2636           0 :     record_gc_qtexts();
    2637             : 
    2638           0 :     return;
    2639             : 
    2640           0 : gc_fail:
    2641             :     /* clean up resources */
    2642           0 :     if (qfile)
    2643           0 :         FreeFile(qfile);
    2644           0 :     free(qbuffer);
    2645             : 
    2646             :     /*
    2647             :      * Since the contents of the external file are now uncertain, mark all
    2648             :      * hashtable entries as having invalid texts.
    2649             :      */
    2650           0 :     hash_seq_init(&hash_seq, pgss_hash);
    2651           0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2652             :     {
    2653           0 :         entry->query_offset = 0;
    2654           0 :         entry->query_len = -1;
    2655             :     }
    2656             : 
    2657             :     /*
    2658             :      * Destroy the query text file and create a new, empty one
    2659             :      */
    2660           0 :     (void) unlink(PGSS_TEXT_FILE);
    2661           0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2662           0 :     if (qfile == NULL)
    2663           0 :         ereport(LOG,
    2664             :                 (errcode_for_file_access(),
    2665             :                  errmsg("could not recreate file \"%s\": %m",
    2666             :                         PGSS_TEXT_FILE)));
    2667             :     else
    2668           0 :         FreeFile(qfile);
    2669             : 
    2670             :     /* Reset the shared extent pointer */
    2671           0 :     pgss->extent = 0;
    2672             : 
    2673             :     /* Reset mean_query_len to match the new state */
    2674           0 :     pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2675             : 
    2676             :     /*
    2677             :      * Bump the GC count even though we failed.
    2678             :      *
    2679             :      * This is needed to make concurrent readers of file without any lock on
    2680             :      * pgss->lock notice existence of new version of file.  Once readers
    2681             :      * subsequently observe a change in GC count with pgss->lock held, that
    2682             :      * forces a safe reopen of file.  Writers also require that we bump here,
    2683             :      * of course.  (As required by locking protocol, readers and writers don't
    2684             :      * trust earlier file contents until gc_count is found unchanged after
    2685             :      * pgss->lock acquired in shared or exclusive mode respectively.)
    2686             :      */
    2687           0 :     record_gc_qtexts();
    2688             : }
    2689             : 
    2690             : #define SINGLE_ENTRY_RESET(e) \
    2691             : if (e) { \
    2692             :     if (minmax_only) { \
    2693             :         /* When requested reset only min/max statistics of an entry */ \
    2694             :         for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
    2695             :         { \
    2696             :             e->counters.max_time[kind] = 0; \
    2697             :             e->counters.min_time[kind] = 0; \
    2698             :         } \
    2699             :         e->minmax_stats_since = stats_reset; \
    2700             :     } \
    2701             :     else \
    2702             :     { \
    2703             :         /* Remove the key otherwise  */ \
    2704             :         hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
    2705             :         num_remove++; \
    2706             :     } \
    2707             : }
    2708             : 
    2709             : /*
    2710             :  * Reset entries corresponding to parameters passed.
    2711             :  */
    2712             : static TimestampTz
    2713         236 : entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
    2714             : {
    2715             :     HASH_SEQ_STATUS hash_seq;
    2716             :     pgssEntry  *entry;
    2717             :     FILE       *qfile;
    2718             :     int64       num_entries;
    2719         236 :     int64       num_remove = 0;
    2720             :     pgssHashKey key;
    2721             :     TimestampTz stats_reset;
    2722             : 
    2723         236 :     if (!pgss || !pgss_hash)
    2724           0 :         ereport(ERROR,
    2725             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    2726             :                  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
    2727             : 
    2728         236 :     LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
    2729         236 :     num_entries = hash_get_num_entries(pgss_hash);
    2730             : 
    2731         236 :     stats_reset = GetCurrentTimestamp();
    2732             : 
    2733         236 :     if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
    2734             :     {
    2735             :         /* If all the parameters are available, use the fast path. */
    2736           2 :         memset(&key, 0, sizeof(pgssHashKey));
    2737           2 :         key.userid = userid;
    2738           2 :         key.dbid = dbid;
    2739           2 :         key.queryid = queryid;
    2740             : 
    2741             :         /*
    2742             :          * Reset the entry if it exists, starting with the non-top-level
    2743             :          * entry.
    2744             :          */
    2745           2 :         key.toplevel = false;
    2746           2 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    2747             : 
    2748           2 :         SINGLE_ENTRY_RESET(entry);
    2749             : 
    2750             :         /* Also reset the top-level entry if it exists. */
    2751           2 :         key.toplevel = true;
    2752           2 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    2753             : 
    2754           2 :         SINGLE_ENTRY_RESET(entry);
    2755             :     }
    2756         234 :     else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
    2757             :     {
    2758             :         /* Reset entries corresponding to valid parameters. */
    2759           8 :         hash_seq_init(&hash_seq, pgss_hash);
    2760         102 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2761             :         {
    2762          94 :             if ((!userid || entry->key.userid == userid) &&
    2763          72 :                 (!dbid || entry->key.dbid == dbid) &&
    2764          68 :                 (!queryid || entry->key.queryid == queryid))
    2765             :             {
    2766          14 :                 SINGLE_ENTRY_RESET(entry);
    2767             :             }
    2768             :         }
    2769             :     }
    2770             :     else
    2771             :     {
    2772             :         /* Reset all entries. */
    2773         226 :         hash_seq_init(&hash_seq, pgss_hash);
    2774        2212 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2775             :         {
    2776        1804 :             SINGLE_ENTRY_RESET(entry);
    2777             :         }
    2778             :     }
    2779             : 
    2780             :     /* All entries are removed? */
    2781         236 :     if (num_entries != num_remove)
    2782          12 :         goto release_lock;
    2783             : 
    2784             :     /*
    2785             :      * Reset global statistics for pg_stat_statements since all entries are
    2786             :      * removed.
    2787             :      */
    2788         224 :     SpinLockAcquire(&pgss->mutex);
    2789         224 :     pgss->stats.dealloc = 0;
    2790         224 :     pgss->stats.stats_reset = stats_reset;
    2791         224 :     SpinLockRelease(&pgss->mutex);
    2792             : 
    2793             :     /*
    2794             :      * Write new empty query file, perhaps even creating a new one to recover
    2795             :      * if the file was missing.
    2796             :      */
    2797         224 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2798         224 :     if (qfile == NULL)
    2799             :     {
    2800           0 :         ereport(LOG,
    2801             :                 (errcode_for_file_access(),
    2802             :                  errmsg("could not create file \"%s\": %m",
    2803             :                         PGSS_TEXT_FILE)));
    2804           0 :         goto done;
    2805             :     }
    2806             : 
    2807             :     /* If ftruncate fails, log it, but it's not a fatal problem */
    2808         224 :     if (ftruncate(fileno(qfile), 0) != 0)
    2809           0 :         ereport(LOG,
    2810             :                 (errcode_for_file_access(),
    2811             :                  errmsg("could not truncate file \"%s\": %m",
    2812             :                         PGSS_TEXT_FILE)));
    2813             : 
    2814         224 :     FreeFile(qfile);
    2815             : 
    2816         224 : done:
    2817         224 :     pgss->extent = 0;
    2818             :     /* This counts as a query text garbage collection for our purposes */
    2819         224 :     record_gc_qtexts();
    2820             : 
    2821         236 : release_lock:
    2822         236 :     LWLockRelease(pgss->lock);
    2823             : 
    2824         236 :     return stats_reset;
    2825             : }
    2826             : 
    2827             : /*
    2828             :  * Generate a normalized version of the query string that will be used to
    2829             :  * represent all similar queries.
    2830             :  *
    2831             :  * Note that the normalized representation may well vary depending on
    2832             :  * just which "equivalent" query is used to create the hashtable entry.
    2833             :  * We assume this is OK.
    2834             :  *
    2835             :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2836             :  * the original string start, so we need to translate the provided locations
    2837             :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2838             :  * of interest, so it's worth doing.)
    2839             :  *
    2840             :  * *query_len_p contains the input string length, and is updated with
    2841             :  * the result string length on exit.  The resulting string might be longer
    2842             :  * or shorter depending on what happens with replacement of constants.
    2843             :  *
    2844             :  * Returns a palloc'd string.
    2845             :  */
    2846             : static char *
    2847       21328 : generate_normalized_query(JumbleState *jstate, const char *query,
    2848             :                           int query_loc, int *query_len_p)
    2849             : {
    2850             :     char       *norm_query;
    2851       21328 :     int         query_len = *query_len_p;
    2852             :     int         norm_query_buflen,  /* Space allowed for norm_query */
    2853             :                 len_to_wrt,     /* Length (in bytes) to write */
    2854       21328 :                 quer_loc = 0,   /* Source query byte location */
    2855       21328 :                 n_quer_loc = 0, /* Normalized query byte location */
    2856       21328 :                 last_off = 0,   /* Offset from start for previous tok */
    2857       21328 :                 last_tok_len = 0;   /* Length (in bytes) of that tok */
    2858       21328 :     int         num_constants_replaced = 0;
    2859             : 
    2860             :     /*
    2861             :      * Get constants' lengths (core system only gives us locations).  Note
    2862             :      * this also ensures the items are sorted by location.
    2863             :      */
    2864       21328 :     fill_in_constant_lengths(jstate, query, query_loc);
    2865             : 
    2866             :     /*
    2867             :      * Allow for $n symbols to be longer than the constants they replace.
    2868             :      * Constants must take at least one byte in text form, while a $n symbol
    2869             :      * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
    2870             :      * could refine that limit based on the max value of n for the current
    2871             :      * query, but it hardly seems worth any extra effort to do so.
    2872             :      */
    2873       21328 :     norm_query_buflen = query_len + jstate->clocations_count * 10;
    2874             : 
    2875             :     /* Allocate result buffer */
    2876       21328 :     norm_query = palloc(norm_query_buflen + 1);
    2877             : 
    2878       84138 :     for (int i = 0; i < jstate->clocations_count; i++)
    2879             :     {
    2880             :         int         off,        /* Offset from start for cur tok */
    2881             :                     tok_len;    /* Length (in bytes) of that tok */
    2882             : 
    2883             :         /*
    2884             :          * If we have an external param at this location, but no lists are
    2885             :          * being squashed across the query, then we skip here; this will make
    2886             :          * us print the characters found in the original query that represent
    2887             :          * the parameter in the next iteration (or after the loop is done),
    2888             :          * which is a bit odd but seems to work okay in most cases.
    2889             :          */
    2890       62810 :         if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
    2891         298 :             continue;
    2892             : 
    2893       62512 :         off = jstate->clocations[i].location;
    2894             : 
    2895             :         /* Adjust recorded location if we're dealing with partial string */
    2896       62512 :         off -= query_loc;
    2897             : 
    2898       62512 :         tok_len = jstate->clocations[i].length;
    2899             : 
    2900       62512 :         if (tok_len < 0)
    2901         482 :             continue;           /* ignore any duplicates */
    2902             : 
    2903             :         /* Copy next chunk (what precedes the next constant) */
    2904       62030 :         len_to_wrt = off - last_off;
    2905       62030 :         len_to_wrt -= last_tok_len;
    2906             :         Assert(len_to_wrt >= 0);
    2907       62030 :         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2908       62030 :         n_quer_loc += len_to_wrt;
    2909             : 
    2910             :         /*
    2911             :          * And insert a param symbol in place of the constant token; and, if
    2912             :          * we have a squashable list, insert a placeholder comment starting
    2913             :          * from the list's second value.
    2914             :          */
    2915       62030 :         n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
    2916       62030 :                               num_constants_replaced + 1 + jstate->highest_extern_param_id,
    2917       62030 :                               jstate->clocations[i].squashed ? " /*, ... */" : "");
    2918       62030 :         num_constants_replaced++;
    2919             : 
    2920             :         /* move forward */
    2921       62030 :         quer_loc = off + tok_len;
    2922       62030 :         last_off = off;
    2923       62030 :         last_tok_len = tok_len;
    2924             :     }
    2925             : 
    2926             :     /*
    2927             :      * We've copied up until the last ignorable constant.  Copy over the
    2928             :      * remaining bytes of the original query string.
    2929             :      */
    2930       21328 :     len_to_wrt = query_len - quer_loc;
    2931             : 
    2932             :     Assert(len_to_wrt >= 0);
    2933       21328 :     memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2934       21328 :     n_quer_loc += len_to_wrt;
    2935             : 
    2936             :     Assert(n_quer_loc <= norm_query_buflen);
    2937       21328 :     norm_query[n_quer_loc] = '\0';
    2938             : 
    2939       21328 :     *query_len_p = n_quer_loc;
    2940       21328 :     return norm_query;
    2941             : }
    2942             : 
    2943             : /*
    2944             :  * Given a valid SQL string and an array of constant-location records,
    2945             :  * fill in the textual lengths of those constants.
    2946             :  *
    2947             :  * The constants may use any allowed constant syntax, such as float literals,
    2948             :  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
    2949             :  * accomplished by using the public API for the core scanner.
    2950             :  *
    2951             :  * It is the caller's job to ensure that the string is a valid SQL statement
    2952             :  * with constants at the indicated locations.  Since in practice the string
    2953             :  * has already been parsed, and the locations that the caller provides will
    2954             :  * have originated from within the authoritative parser, this should not be
    2955             :  * a problem.
    2956             :  *
    2957             :  * Duplicate constant pointers are possible, and will have their lengths
    2958             :  * marked as '-1', so that they are later ignored.  (Actually, we assume the
    2959             :  * lengths were initialized as -1 to start with, and don't change them here.)
    2960             :  *
    2961             :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2962             :  * the original string start, so we need to translate the provided locations
    2963             :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2964             :  * of interest, so it's worth doing.)
    2965             :  *
    2966             :  * N.B. There is an assumption that a '-' character at a Const location begins
    2967             :  * a negative numeric constant.  This precludes there ever being another
    2968             :  * reason for a constant to start with a '-'.
    2969             :  */
    2970             : static void
    2971       21328 : fill_in_constant_lengths(JumbleState *jstate, const char *query,
    2972             :                          int query_loc)
    2973             : {
    2974             :     LocationLen *locs;
    2975             :     core_yyscan_t yyscanner;
    2976             :     core_yy_extra_type yyextra;
    2977             :     core_YYSTYPE yylval;
    2978             :     YYLTYPE     yylloc;
    2979       21328 :     int         last_loc = -1;
    2980             :     int         i;
    2981             : 
    2982             :     /*
    2983             :      * Sort the records by location so that we can process them in order while
    2984             :      * scanning the query text.
    2985             :      */
    2986       21328 :     if (jstate->clocations_count > 1)
    2987       13492 :         qsort(jstate->clocations, jstate->clocations_count,
    2988             :               sizeof(LocationLen), comp_location);
    2989       21328 :     locs = jstate->clocations;
    2990             : 
    2991             :     /* initialize the flex scanner --- should match raw_parser() */
    2992       21328 :     yyscanner = scanner_init(query,
    2993             :                              &yyextra,
    2994             :                              &ScanKeywords,
    2995             :                              ScanKeywordTokens);
    2996             : 
    2997             :     /* we don't want to re-emit any escape string warnings */
    2998       21328 :     yyextra.escape_string_warning = false;
    2999             : 
    3000             :     /* Search for each constant, in sequence */
    3001       84138 :     for (i = 0; i < jstate->clocations_count; i++)
    3002             :     {
    3003       62810 :         int         loc = locs[i].location;
    3004             :         int         tok;
    3005             : 
    3006             :         /* Adjust recorded location if we're dealing with partial string */
    3007       62810 :         loc -= query_loc;
    3008             : 
    3009             :         Assert(loc >= 0);
    3010             : 
    3011       62810 :         if (locs[i].squashed)
    3012        1256 :             continue;           /* squashable list, ignore */
    3013             : 
    3014       61554 :         if (loc <= last_loc)
    3015         486 :             continue;           /* Duplicate constant, ignore */
    3016             : 
    3017             :         /* Lex tokens until we find the desired constant */
    3018             :         for (;;)
    3019             :         {
    3020      478218 :             tok = core_yylex(&yylval, &yylloc, yyscanner);
    3021             : 
    3022             :             /* We should not hit end-of-string, but if we do, behave sanely */
    3023      478218 :             if (tok == 0)
    3024           0 :                 break;          /* out of inner for-loop */
    3025             : 
    3026             :             /*
    3027             :              * We should find the token position exactly, but if we somehow
    3028             :              * run past it, work with that.
    3029             :              */
    3030      478218 :             if (yylloc >= loc)
    3031             :             {
    3032       61068 :                 if (query[loc] == '-')
    3033             :                 {
    3034             :                     /*
    3035             :                      * It's a negative value - this is the one and only case
    3036             :                      * where we replace more than a single token.
    3037             :                      *
    3038             :                      * Do not compensate for the core system's special-case
    3039             :                      * adjustment of location to that of the leading '-'
    3040             :                      * operator in the event of a negative constant.  It is
    3041             :                      * also useful for our purposes to start from the minus
    3042             :                      * symbol.  In this way, queries like "select * from foo
    3043             :                      * where bar = 1" and "select * from foo where bar = -2"
    3044             :                      * will have identical normalized query strings.
    3045             :                      */
    3046         736 :                     tok = core_yylex(&yylval, &yylloc, yyscanner);
    3047         736 :                     if (tok == 0)
    3048           0 :                         break;  /* out of inner for-loop */
    3049             :                 }
    3050             : 
    3051             :                 /*
    3052             :                  * We now rely on the assumption that flex has placed a zero
    3053             :                  * byte after the text of the current token in scanbuf.
    3054             :                  */
    3055       61068 :                 locs[i].length = strlen(yyextra.scanbuf + loc);
    3056       61068 :                 break;          /* out of inner for-loop */
    3057             :             }
    3058             :         }
    3059             : 
    3060             :         /* If we hit end-of-string, give up, leaving remaining lengths -1 */
    3061       61068 :         if (tok == 0)
    3062           0 :             break;
    3063             : 
    3064       61068 :         last_loc = loc;
    3065             :     }
    3066             : 
    3067       21328 :     scanner_finish(yyscanner);
    3068       21328 : }
    3069             : 
    3070             : /*
    3071             :  * comp_location: comparator for qsorting LocationLen structs by location
    3072             :  */
    3073             : static int
    3074       69776 : comp_location(const void *a, const void *b)
    3075             : {
    3076       69776 :     int         l = ((const LocationLen *) a)->location;
    3077       69776 :     int         r = ((const LocationLen *) b)->location;
    3078             : 
    3079       69776 :     return pg_cmp_s32(l, r);
    3080             : }

Generated by: LCOV version 1.16