LCOV - code coverage report
Current view: top level - contrib/pg_stat_statements - pg_stat_statements.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 619 901 68.7 %
Date: 2023-05-31 02:11:47 Functions: 37 45 82.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pg_stat_statements.c
       4             :  *      Track statement planning and execution times as well as resource
       5             :  *      usage across a whole database cluster.
       6             :  *
       7             :  * Execution costs are totaled for each distinct source query, and kept in
       8             :  * a shared hashtable.  (We track only as many distinct queries as will fit
       9             :  * in the designated amount of shared memory.)
      10             :  *
      11             :  * Starting in Postgres 9.2, this module normalized query entries.  As of
      12             :  * Postgres 14, the normalization is done by the core if compute_query_id is
      13             :  * enabled, or optionally by third-party modules.
      14             :  *
      15             :  * To facilitate presenting entries to users, we create "representative" query
      16             :  * strings in which constants are replaced with parameter symbols ($n), to
      17             :  * make it clearer what a normalized entry can represent.  To save on shared
      18             :  * memory, and to avoid having to truncate oversized query strings, we store
      19             :  * these strings in a temporary external query-texts file.  Offsets into this
      20             :  * file are kept in shared memory.
      21             :  *
      22             :  * Note about locking issues: to create or delete an entry in the shared
      23             :  * hashtable, one must hold pgss->lock exclusively.  Modifying any field
      24             :  * in an entry except the counters requires the same.  To look up an entry,
      25             :  * one must hold the lock shared.  To read or update the counters within
      26             :  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
      27             :  * disappear!) and also take the entry's mutex spinlock.
      28             :  * The shared state variable pgss->extent (the next free spot in the external
      29             :  * query-text file) should be accessed only while holding either the
      30             :  * pgss->mutex spinlock, or exclusive lock on pgss->lock.  We use the mutex to
      31             :  * allow reserving file space while holding only shared lock on pgss->lock.
      32             :  * Rewriting the entire external query-text file, eg for garbage collection,
      33             :  * requires holding pgss->lock exclusively; this allows individual entries
      34             :  * in the file to be read or written while holding only shared lock.
      35             :  *
      36             :  *
      37             :  * Copyright (c) 2008-2023, PostgreSQL Global Development Group
      38             :  *
      39             :  * IDENTIFICATION
      40             :  *    contrib/pg_stat_statements/pg_stat_statements.c
      41             :  *
      42             :  *-------------------------------------------------------------------------
      43             :  */
      44             : #include "postgres.h"
      45             : 
      46             : #include <math.h>
      47             : #include <sys/stat.h>
      48             : #include <unistd.h>
      49             : 
      50             : #include "access/parallel.h"
      51             : #include "catalog/pg_authid.h"
      52             : #include "common/hashfn.h"
      53             : #include "executor/instrument.h"
      54             : #include "funcapi.h"
      55             : #include "jit/jit.h"
      56             : #include "mb/pg_wchar.h"
      57             : #include "miscadmin.h"
      58             : #include "nodes/queryjumble.h"
      59             : #include "optimizer/planner.h"
      60             : #include "parser/analyze.h"
      61             : #include "parser/parsetree.h"
      62             : #include "parser/scanner.h"
      63             : #include "parser/scansup.h"
      64             : #include "pgstat.h"
      65             : #include "storage/fd.h"
      66             : #include "storage/ipc.h"
      67             : #include "storage/lwlock.h"
      68             : #include "storage/shmem.h"
      69             : #include "storage/spin.h"
      70             : #include "tcop/utility.h"
      71             : #include "utils/acl.h"
      72             : #include "utils/builtins.h"
      73             : #include "utils/memutils.h"
      74             : #include "utils/timestamp.h"
      75             : 
      76           8 : PG_MODULE_MAGIC;
      77             : 
      78             : /* Location of permanent stats file (valid when database is shut down) */
      79             : #define PGSS_DUMP_FILE  PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
      80             : 
      81             : /*
      82             :  * Location of external query text file.
      83             :  */
      84             : #define PGSS_TEXT_FILE  PG_STAT_TMP_DIR "/pgss_query_texts.stat"
      85             : 
      86             : /* Magic number identifying the stats file format */
      87             : static const uint32 PGSS_FILE_HEADER = 0x20220408;
      88             : 
      89             : /* PostgreSQL major version number, changes in which invalidate all entries */
      90             : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
      91             : 
      92             : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
      93             : #define USAGE_EXEC(duration)    (1.0)
      94             : #define USAGE_INIT              (1.0)   /* including initial planning */
      95             : #define ASSUMED_MEDIAN_INIT     (10.0)  /* initial assumed median usage */
      96             : #define ASSUMED_LENGTH_INIT     1024    /* initial assumed mean query length */
      97             : #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
      98             : #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
      99             : #define USAGE_DEALLOC_PERCENT   5   /* free this % of entries at once */
     100             : #define IS_STICKY(c)    ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
     101             : 
     102             : /*
     103             :  * Utility statements that pgss_ProcessUtility and pgss_post_parse_analyze
     104             :  * ignores.
     105             :  */
     106             : #define PGSS_HANDLED_UTILITY(n)     (!IsA(n, ExecuteStmt) && \
     107             :                                     !IsA(n, PrepareStmt) && \
     108             :                                     !IsA(n, DeallocateStmt))
     109             : 
     110             : /*
     111             :  * Extension version number, for supporting older extension versions' objects
     112             :  */
     113             : typedef enum pgssVersion
     114             : {
     115             :     PGSS_V1_0 = 0,
     116             :     PGSS_V1_1,
     117             :     PGSS_V1_2,
     118             :     PGSS_V1_3,
     119             :     PGSS_V1_8,
     120             :     PGSS_V1_9,
     121             :     PGSS_V1_10
     122             : } pgssVersion;
     123             : 
     124             : typedef enum pgssStoreKind
     125             : {
     126             :     PGSS_INVALID = -1,
     127             : 
     128             :     /*
     129             :      * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
     130             :      * reference the underlying values in the arrays in the Counters struct,
     131             :      * and this order is required in pg_stat_statements_internal().
     132             :      */
     133             :     PGSS_PLAN = 0,
     134             :     PGSS_EXEC,
     135             : 
     136             :     PGSS_NUMKIND                /* Must be last value of this enum */
     137             : } pgssStoreKind;
     138             : 
     139             : /*
     140             :  * Hashtable key that defines the identity of a hashtable entry.  We separate
     141             :  * queries by user and by database even if they are otherwise identical.
     142             :  *
     143             :  * If you add a new key to this struct, make sure to teach pgss_store() to
     144             :  * zero the padding bytes.  Otherwise, things will break, because pgss_hash is
     145             :  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
     146             : 
     147             :  */
     148             : typedef struct pgssHashKey
     149             : {
     150             :     Oid         userid;         /* user OID */
     151             :     Oid         dbid;           /* database OID */
     152             :     uint64      queryid;        /* query identifier */
     153             :     bool        toplevel;       /* query executed at top level */
     154             : } pgssHashKey;
     155             : 
     156             : /*
     157             :  * The actual stats counters kept within pgssEntry.
     158             :  */
     159             : typedef struct Counters
     160             : {
     161             :     int64       calls[PGSS_NUMKIND];    /* # of times planned/executed */
     162             :     double      total_time[PGSS_NUMKIND];   /* total planning/execution time,
     163             :                                              * in msec */
     164             :     double      min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
     165             :                                          * msec */
     166             :     double      max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
     167             :                                          * msec */
     168             :     double      mean_time[PGSS_NUMKIND];    /* mean planning/execution time in
     169             :                                              * msec */
     170             :     double      sum_var_time[PGSS_NUMKIND]; /* sum of variances in
     171             :                                              * planning/execution time in msec */
     172             :     int64       rows;           /* total # of retrieved or affected rows */
     173             :     int64       shared_blks_hit;    /* # of shared buffer hits */
     174             :     int64       shared_blks_read;   /* # of shared disk blocks read */
     175             :     int64       shared_blks_dirtied;    /* # of shared disk blocks dirtied */
     176             :     int64       shared_blks_written;    /* # of shared disk blocks written */
     177             :     int64       local_blks_hit; /* # of local buffer hits */
     178             :     int64       local_blks_read;    /* # of local disk blocks read */
     179             :     int64       local_blks_dirtied; /* # of local disk blocks dirtied */
     180             :     int64       local_blks_written; /* # of local disk blocks written */
     181             :     int64       temp_blks_read; /* # of temp blocks read */
     182             :     int64       temp_blks_written;  /* # of temp blocks written */
     183             :     double      blk_read_time;  /* time spent reading blocks, in msec */
     184             :     double      blk_write_time; /* time spent writing blocks, in msec */
     185             :     double      temp_blk_read_time; /* time spent reading temp blocks, in msec */
     186             :     double      temp_blk_write_time;    /* time spent writing temp blocks, in
     187             :                                          * msec */
     188             :     double      usage;          /* usage factor */
     189             :     int64       wal_records;    /* # of WAL records generated */
     190             :     int64       wal_fpi;        /* # of WAL full page images generated */
     191             :     uint64      wal_bytes;      /* total amount of WAL generated in bytes */
     192             :     int64       jit_functions;  /* total number of JIT functions emitted */
     193             :     double      jit_generation_time;    /* total time to generate jit code */
     194             :     int64       jit_inlining_count; /* number of times inlining time has been
     195             :                                      * > 0 */
     196             :     double      jit_inlining_time;  /* total time to inline jit code */
     197             :     int64       jit_optimization_count; /* number of times optimization time
     198             :                                          * has been > 0 */
     199             :     double      jit_optimization_time;  /* total time to optimize jit code */
     200             :     int64       jit_emission_count; /* number of times emission time has been
     201             :                                      * > 0 */
     202             :     double      jit_emission_time;  /* total time to emit jit code */
     203             : } Counters;
     204             : 
     205             : /*
     206             :  * Global statistics for pg_stat_statements
     207             :  */
     208             : typedef struct pgssGlobalStats
     209             : {
     210             :     int64       dealloc;        /* # of times entries were deallocated */
     211             :     TimestampTz stats_reset;    /* timestamp with all stats reset */
     212             : } pgssGlobalStats;
     213             : 
     214             : /*
     215             :  * Statistics per statement
     216             :  *
     217             :  * Note: in event of a failure in garbage collection of the query text file,
     218             :  * we reset query_offset to zero and query_len to -1.  This will be seen as
     219             :  * an invalid state by qtext_fetch().
     220             :  */
     221             : typedef struct pgssEntry
     222             : {
     223             :     pgssHashKey key;            /* hash key of entry - MUST BE FIRST */
     224             :     Counters    counters;       /* the statistics for this query */
     225             :     Size        query_offset;   /* query text offset in external file */
     226             :     int         query_len;      /* # of valid bytes in query string, or -1 */
     227             :     int         encoding;       /* query text encoding */
     228             :     slock_t     mutex;          /* protects the counters only */
     229             : } pgssEntry;
     230             : 
     231             : /*
     232             :  * Global shared state
     233             :  */
     234             : typedef struct pgssSharedState
     235             : {
     236             :     LWLock     *lock;           /* protects hashtable search/modification */
     237             :     double      cur_median_usage;   /* current median usage in hashtable */
     238             :     Size        mean_query_len; /* current mean entry text length */
     239             :     slock_t     mutex;          /* protects following fields only: */
     240             :     Size        extent;         /* current extent of query file */
     241             :     int         n_writers;      /* number of active writers to query file */
     242             :     int         gc_count;       /* query file garbage collection cycle count */
     243             :     pgssGlobalStats stats;      /* global statistics for pgss */
     244             : } pgssSharedState;
     245             : 
     246             : /*---- Local variables ----*/
     247             : 
     248             : /* Current nesting depth of ExecutorRun+ProcessUtility calls */
     249             : static int  exec_nested_level = 0;
     250             : 
     251             : /* Current nesting depth of planner calls */
     252             : static int  plan_nested_level = 0;
     253             : 
     254             : /* Saved hook values in case of unload */
     255             : static shmem_request_hook_type prev_shmem_request_hook = NULL;
     256             : static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
     257             : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
     258             : static planner_hook_type prev_planner_hook = NULL;
     259             : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
     260             : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
     261             : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
     262             : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
     263             : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
     264             : 
     265             : /* Links to shared memory state */
     266             : static pgssSharedState *pgss = NULL;
     267             : static HTAB *pgss_hash = NULL;
     268             : 
     269             : /*---- GUC variables ----*/
     270             : 
     271             : typedef enum
     272             : {
     273             :     PGSS_TRACK_NONE,            /* track no statements */
     274             :     PGSS_TRACK_TOP,             /* only top level statements */
     275             :     PGSS_TRACK_ALL              /* all statements, including nested ones */
     276             : }           PGSSTrackLevel;
     277             : 
     278             : static const struct config_enum_entry track_options[] =
     279             : {
     280             :     {"none", PGSS_TRACK_NONE, false},
     281             :     {"top", PGSS_TRACK_TOP, false},
     282             :     {"all", PGSS_TRACK_ALL, false},
     283             :     {NULL, 0, false}
     284             : };
     285             : 
     286             : static int  pgss_max = 5000;    /* max # statements to track */
     287             : static int  pgss_track = PGSS_TRACK_TOP;    /* tracking level */
     288             : static bool pgss_track_utility = true;  /* whether to track utility commands */
     289             : static bool pgss_track_planning = false;    /* whether to track planning
     290             :                                              * duration */
     291             : static bool pgss_save = true;   /* whether to save stats across shutdown */
     292             : 
     293             : 
     294             : #define pgss_enabled(level) \
     295             :     (!IsParallelWorker() && \
     296             :     (pgss_track == PGSS_TRACK_ALL || \
     297             :     (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
     298             : 
     299             : #define record_gc_qtexts() \
     300             :     do { \
     301             :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
     302             :         SpinLockAcquire(&s->mutex); \
     303             :         s->gc_count++; \
     304             :         SpinLockRelease(&s->mutex); \
     305             :     } while(0)
     306             : 
     307             : /*---- Function declarations ----*/
     308             : 
     309          10 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
     310          24 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
     311           0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
     312          10 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
     313           8 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
     314          10 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
     315          28 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
     316           0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
     317          10 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
     318             : 
     319             : static void pgss_shmem_request(void);
     320             : static void pgss_shmem_startup(void);
     321             : static void pgss_shmem_shutdown(int code, Datum arg);
     322             : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
     323             :                                     JumbleState *jstate);
     324             : static PlannedStmt *pgss_planner(Query *parse,
     325             :                                  const char *query_string,
     326             :                                  int cursorOptions,
     327             :                                  ParamListInfo boundParams);
     328             : static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
     329             : static void pgss_ExecutorRun(QueryDesc *queryDesc,
     330             :                              ScanDirection direction,
     331             :                              uint64 count, bool execute_once);
     332             : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
     333             : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
     334             : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
     335             :                                 bool readOnlyTree,
     336             :                                 ProcessUtilityContext context, ParamListInfo params,
     337             :                                 QueryEnvironment *queryEnv,
     338             :                                 DestReceiver *dest, QueryCompletion *qc);
     339             : static void pgss_store(const char *query, uint64 queryId,
     340             :                        int query_location, int query_len,
     341             :                        pgssStoreKind kind,
     342             :                        double total_time, uint64 rows,
     343             :                        const BufferUsage *bufusage,
     344             :                        const WalUsage *walusage,
     345             :                        const struct JitInstrumentation *jitusage,
     346             :                        JumbleState *jstate);
     347             : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
     348             :                                         pgssVersion api_version,
     349             :                                         bool showtext);
     350             : static Size pgss_memsize(void);
     351             : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
     352             :                               int encoding, bool sticky);
     353             : static void entry_dealloc(void);
     354             : static bool qtext_store(const char *query, int query_len,
     355             :                         Size *query_offset, int *gc_count);
     356             : static char *qtext_load_file(Size *buffer_size);
     357             : static char *qtext_fetch(Size query_offset, int query_len,
     358             :                          char *buffer, Size buffer_size);
     359             : static bool need_gc_qtexts(void);
     360             : static void gc_qtexts(void);
     361             : static void entry_reset(Oid userid, Oid dbid, uint64 queryid);
     362             : static char *generate_normalized_query(JumbleState *jstate, const char *query,
     363             :                                        int query_loc, int *query_len_p);
     364             : static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
     365             :                                      int query_loc);
     366             : static int  comp_location(const void *a, const void *b);
     367             : 
     368             : 
     369             : /*
     370             :  * Module load callback
     371             :  */
     372             : void
     373           8 : _PG_init(void)
     374             : {
     375             :     /*
     376             :      * In order to create our shared memory area, we have to be loaded via
     377             :      * shared_preload_libraries.  If not, fall out without hooking into any of
     378             :      * the main system.  (We don't throw error here because it seems useful to
     379             :      * allow the pg_stat_statements functions to be created even when the
     380             :      * module isn't active.  The functions must protect themselves against
     381             :      * being called then, however.)
     382             :      */
     383           8 :     if (!process_shared_preload_libraries_in_progress)
     384           2 :         return;
     385             : 
     386             :     /*
     387             :      * Inform the postmaster that we want to enable query_id calculation if
     388             :      * compute_query_id is set to auto.
     389             :      */
     390           6 :     EnableQueryId();
     391             : 
     392             :     /*
     393             :      * Define (or redefine) custom GUC variables.
     394             :      */
     395           6 :     DefineCustomIntVariable("pg_stat_statements.max",
     396             :                             "Sets the maximum number of statements tracked by pg_stat_statements.",
     397             :                             NULL,
     398             :                             &pgss_max,
     399             :                             5000,
     400             :                             100,
     401             :                             INT_MAX / 2,
     402             :                             PGC_POSTMASTER,
     403             :                             0,
     404             :                             NULL,
     405             :                             NULL,
     406             :                             NULL);
     407             : 
     408           6 :     DefineCustomEnumVariable("pg_stat_statements.track",
     409             :                              "Selects which statements are tracked by pg_stat_statements.",
     410             :                              NULL,
     411             :                              &pgss_track,
     412             :                              PGSS_TRACK_TOP,
     413             :                              track_options,
     414             :                              PGC_SUSET,
     415             :                              0,
     416             :                              NULL,
     417             :                              NULL,
     418             :                              NULL);
     419             : 
     420           6 :     DefineCustomBoolVariable("pg_stat_statements.track_utility",
     421             :                              "Selects whether utility commands are tracked by pg_stat_statements.",
     422             :                              NULL,
     423             :                              &pgss_track_utility,
     424             :                              true,
     425             :                              PGC_SUSET,
     426             :                              0,
     427             :                              NULL,
     428             :                              NULL,
     429             :                              NULL);
     430             : 
     431           6 :     DefineCustomBoolVariable("pg_stat_statements.track_planning",
     432             :                              "Selects whether planning duration is tracked by pg_stat_statements.",
     433             :                              NULL,
     434             :                              &pgss_track_planning,
     435             :                              false,
     436             :                              PGC_SUSET,
     437             :                              0,
     438             :                              NULL,
     439             :                              NULL,
     440             :                              NULL);
     441             : 
     442           6 :     DefineCustomBoolVariable("pg_stat_statements.save",
     443             :                              "Save pg_stat_statements statistics across server shutdowns.",
     444             :                              NULL,
     445             :                              &pgss_save,
     446             :                              true,
     447             :                              PGC_SIGHUP,
     448             :                              0,
     449             :                              NULL,
     450             :                              NULL,
     451             :                              NULL);
     452             : 
     453           6 :     MarkGUCPrefixReserved("pg_stat_statements");
     454             : 
     455             :     /*
     456             :      * Install hooks.
     457             :      */
     458           6 :     prev_shmem_request_hook = shmem_request_hook;
     459           6 :     shmem_request_hook = pgss_shmem_request;
     460           6 :     prev_shmem_startup_hook = shmem_startup_hook;
     461           6 :     shmem_startup_hook = pgss_shmem_startup;
     462           6 :     prev_post_parse_analyze_hook = post_parse_analyze_hook;
     463           6 :     post_parse_analyze_hook = pgss_post_parse_analyze;
     464           6 :     prev_planner_hook = planner_hook;
     465           6 :     planner_hook = pgss_planner;
     466           6 :     prev_ExecutorStart = ExecutorStart_hook;
     467           6 :     ExecutorStart_hook = pgss_ExecutorStart;
     468           6 :     prev_ExecutorRun = ExecutorRun_hook;
     469           6 :     ExecutorRun_hook = pgss_ExecutorRun;
     470           6 :     prev_ExecutorFinish = ExecutorFinish_hook;
     471           6 :     ExecutorFinish_hook = pgss_ExecutorFinish;
     472           6 :     prev_ExecutorEnd = ExecutorEnd_hook;
     473           6 :     ExecutorEnd_hook = pgss_ExecutorEnd;
     474           6 :     prev_ProcessUtility = ProcessUtility_hook;
     475           6 :     ProcessUtility_hook = pgss_ProcessUtility;
     476             : }
     477             : 
     478             : /*
     479             :  * shmem_request hook: request additional shared resources.  We'll allocate or
     480             :  * attach to the shared resources in pgss_shmem_startup().
     481             :  */
     482             : static void
     483           6 : pgss_shmem_request(void)
     484             : {
     485           6 :     if (prev_shmem_request_hook)
     486           0 :         prev_shmem_request_hook();
     487             : 
     488           6 :     RequestAddinShmemSpace(pgss_memsize());
     489           6 :     RequestNamedLWLockTranche("pg_stat_statements", 1);
     490           6 : }
     491             : 
     492             : /*
     493             :  * shmem_startup hook: allocate or attach to shared memory,
     494             :  * then load any pre-existing statistics from file.
     495             :  * Also create and load the query-texts file, which is expected to exist
     496             :  * (even if empty) while the module is enabled.
     497             :  */
     498             : static void
     499           6 : pgss_shmem_startup(void)
     500             : {
     501             :     bool        found;
     502             :     HASHCTL     info;
     503           6 :     FILE       *file = NULL;
     504           6 :     FILE       *qfile = NULL;
     505             :     uint32      header;
     506             :     int32       num;
     507             :     int32       pgver;
     508             :     int32       i;
     509             :     int         buffer_size;
     510           6 :     char       *buffer = NULL;
     511             : 
     512           6 :     if (prev_shmem_startup_hook)
     513           0 :         prev_shmem_startup_hook();
     514             : 
     515             :     /* reset in case this is a restart within the postmaster */
     516           6 :     pgss = NULL;
     517           6 :     pgss_hash = NULL;
     518             : 
     519             :     /*
     520             :      * Create or attach to the shared memory state, including hash table
     521             :      */
     522           6 :     LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
     523             : 
     524           6 :     pgss = ShmemInitStruct("pg_stat_statements",
     525             :                            sizeof(pgssSharedState),
     526             :                            &found);
     527             : 
     528           6 :     if (!found)
     529             :     {
     530             :         /* First time through ... */
     531           6 :         pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
     532           6 :         pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
     533           6 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
     534           6 :         SpinLockInit(&pgss->mutex);
     535           6 :         pgss->extent = 0;
     536           6 :         pgss->n_writers = 0;
     537           6 :         pgss->gc_count = 0;
     538           6 :         pgss->stats.dealloc = 0;
     539           6 :         pgss->stats.stats_reset = GetCurrentTimestamp();
     540             :     }
     541             : 
     542           6 :     info.keysize = sizeof(pgssHashKey);
     543           6 :     info.entrysize = sizeof(pgssEntry);
     544           6 :     pgss_hash = ShmemInitHash("pg_stat_statements hash",
     545             :                               pgss_max, pgss_max,
     546             :                               &info,
     547             :                               HASH_ELEM | HASH_BLOBS);
     548             : 
     549           6 :     LWLockRelease(AddinShmemInitLock);
     550             : 
     551             :     /*
     552             :      * If we're in the postmaster (or a standalone backend...), set up a shmem
     553             :      * exit hook to dump the statistics to disk.
     554             :      */
     555           6 :     if (!IsUnderPostmaster)
     556           6 :         on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
     557             : 
     558             :     /*
     559             :      * Done if some other process already completed our initialization.
     560             :      */
     561           6 :     if (found)
     562           6 :         return;
     563             : 
     564             :     /*
     565             :      * Note: we don't bother with locks here, because there should be no other
     566             :      * processes running when this code is reached.
     567             :      */
     568             : 
     569             :     /* Unlink query text file possibly left over from crash */
     570           6 :     unlink(PGSS_TEXT_FILE);
     571             : 
     572             :     /* Allocate new query text temp file */
     573           6 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
     574           6 :     if (qfile == NULL)
     575           0 :         goto write_error;
     576             : 
     577             :     /*
     578             :      * If we were told not to load old statistics, we're done.  (Note we do
     579             :      * not try to unlink any old dump file in this case.  This seems a bit
     580             :      * questionable but it's the historical behavior.)
     581             :      */
     582           6 :     if (!pgss_save)
     583             :     {
     584           0 :         FreeFile(qfile);
     585           0 :         return;
     586             :     }
     587             : 
     588             :     /*
     589             :      * Attempt to load old statistics from the dump file.
     590             :      */
     591           6 :     file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
     592           6 :     if (file == NULL)
     593             :     {
     594           6 :         if (errno != ENOENT)
     595           0 :             goto read_error;
     596             :         /* No existing persisted stats file, so we're done */
     597           6 :         FreeFile(qfile);
     598           6 :         return;
     599             :     }
     600             : 
     601           0 :     buffer_size = 2048;
     602           0 :     buffer = (char *) palloc(buffer_size);
     603             : 
     604           0 :     if (fread(&header, sizeof(uint32), 1, file) != 1 ||
     605           0 :         fread(&pgver, sizeof(uint32), 1, file) != 1 ||
     606           0 :         fread(&num, sizeof(int32), 1, file) != 1)
     607           0 :         goto read_error;
     608             : 
     609           0 :     if (header != PGSS_FILE_HEADER ||
     610           0 :         pgver != PGSS_PG_MAJOR_VERSION)
     611           0 :         goto data_error;
     612             : 
     613           0 :     for (i = 0; i < num; i++)
     614             :     {
     615             :         pgssEntry   temp;
     616             :         pgssEntry  *entry;
     617             :         Size        query_offset;
     618             : 
     619           0 :         if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
     620           0 :             goto read_error;
     621             : 
     622             :         /* Encoding is the only field we can easily sanity-check */
     623           0 :         if (!PG_VALID_BE_ENCODING(temp.encoding))
     624           0 :             goto data_error;
     625             : 
     626             :         /* Resize buffer as needed */
     627           0 :         if (temp.query_len >= buffer_size)
     628             :         {
     629           0 :             buffer_size = Max(buffer_size * 2, temp.query_len + 1);
     630           0 :             buffer = repalloc(buffer, buffer_size);
     631             :         }
     632             : 
     633           0 :         if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
     634           0 :             goto read_error;
     635             : 
     636             :         /* Should have a trailing null, but let's make sure */
     637           0 :         buffer[temp.query_len] = '\0';
     638             : 
     639             :         /* Skip loading "sticky" entries */
     640           0 :         if (IS_STICKY(temp.counters))
     641           0 :             continue;
     642             : 
     643             :         /* Store the query text */
     644           0 :         query_offset = pgss->extent;
     645           0 :         if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
     646           0 :             goto write_error;
     647           0 :         pgss->extent += temp.query_len + 1;
     648             : 
     649             :         /* make the hashtable entry (discards old entries if too many) */
     650           0 :         entry = entry_alloc(&temp.key, query_offset, temp.query_len,
     651             :                             temp.encoding,
     652             :                             false);
     653             : 
     654             :         /* copy in the actual stats */
     655           0 :         entry->counters = temp.counters;
     656             :     }
     657             : 
     658             :     /* Read global statistics for pg_stat_statements */
     659           0 :     if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     660           0 :         goto read_error;
     661             : 
     662           0 :     pfree(buffer);
     663           0 :     FreeFile(file);
     664           0 :     FreeFile(qfile);
     665             : 
     666             :     /*
     667             :      * Remove the persisted stats file so it's not included in
     668             :      * backups/replication standbys, etc.  A new file will be written on next
     669             :      * shutdown.
     670             :      *
     671             :      * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
     672             :      * because we remove that file on startup; it acts inversely to
     673             :      * PGSS_DUMP_FILE, in that it is only supposed to be around when the
     674             :      * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
     675             :      * when the server is not running.  Leaving the file creates no danger of
     676             :      * a newly restored database having a spurious record of execution costs,
     677             :      * which is what we're really concerned about here.
     678             :      */
     679           0 :     unlink(PGSS_DUMP_FILE);
     680             : 
     681           0 :     return;
     682             : 
     683           0 : read_error:
     684           0 :     ereport(LOG,
     685             :             (errcode_for_file_access(),
     686             :              errmsg("could not read file \"%s\": %m",
     687             :                     PGSS_DUMP_FILE)));
     688           0 :     goto fail;
     689           0 : data_error:
     690           0 :     ereport(LOG,
     691             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     692             :              errmsg("ignoring invalid data in file \"%s\"",
     693             :                     PGSS_DUMP_FILE)));
     694           0 :     goto fail;
     695           0 : write_error:
     696           0 :     ereport(LOG,
     697             :             (errcode_for_file_access(),
     698             :              errmsg("could not write file \"%s\": %m",
     699             :                     PGSS_TEXT_FILE)));
     700           0 : fail:
     701           0 :     if (buffer)
     702           0 :         pfree(buffer);
     703           0 :     if (file)
     704           0 :         FreeFile(file);
     705           0 :     if (qfile)
     706           0 :         FreeFile(qfile);
     707             :     /* If possible, throw away the bogus file; ignore any error */
     708           0 :     unlink(PGSS_DUMP_FILE);
     709             : 
     710             :     /*
     711             :      * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
     712             :      * server is running with pg_stat_statements enabled
     713             :      */
     714             : }
     715             : 
     716             : /*
     717             :  * shmem_shutdown hook: Dump statistics into file.
     718             :  *
     719             :  * Note: we don't bother with acquiring lock, because there should be no
     720             :  * other processes running when this is called.
     721             :  */
     722             : static void
     723           6 : pgss_shmem_shutdown(int code, Datum arg)
     724             : {
     725             :     FILE       *file;
     726           6 :     char       *qbuffer = NULL;
     727           6 :     Size        qbuffer_size = 0;
     728             :     HASH_SEQ_STATUS hash_seq;
     729             :     int32       num_entries;
     730             :     pgssEntry  *entry;
     731             : 
     732             :     /* Don't try to dump during a crash. */
     733           6 :     if (code)
     734           6 :         return;
     735             : 
     736             :     /* Safety check ... shouldn't get here unless shmem is set up. */
     737           6 :     if (!pgss || !pgss_hash)
     738           0 :         return;
     739             : 
     740             :     /* Don't dump if told not to. */
     741           6 :     if (!pgss_save)
     742           0 :         return;
     743             : 
     744           6 :     file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
     745           6 :     if (file == NULL)
     746           0 :         goto error;
     747             : 
     748           6 :     if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
     749           0 :         goto error;
     750           6 :     if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
     751           0 :         goto error;
     752           6 :     num_entries = hash_get_num_entries(pgss_hash);
     753           6 :     if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
     754           0 :         goto error;
     755             : 
     756           6 :     qbuffer = qtext_load_file(&qbuffer_size);
     757           6 :     if (qbuffer == NULL)
     758           0 :         goto error;
     759             : 
     760             :     /*
     761             :      * When serializing to disk, we store query texts immediately after their
     762             :      * entry data.  Any orphaned query texts are thereby excluded.
     763             :      */
     764           6 :     hash_seq_init(&hash_seq, pgss_hash);
     765       48798 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
     766             :     {
     767       48792 :         int         len = entry->query_len;
     768       48792 :         char       *qstr = qtext_fetch(entry->query_offset, len,
     769             :                                        qbuffer, qbuffer_size);
     770             : 
     771       48792 :         if (qstr == NULL)
     772           0 :             continue;           /* Ignore any entries with bogus texts */
     773             : 
     774       48792 :         if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
     775       48792 :             fwrite(qstr, 1, len + 1, file) != len + 1)
     776             :         {
     777             :             /* note: we assume hash_seq_term won't change errno */
     778           0 :             hash_seq_term(&hash_seq);
     779           0 :             goto error;
     780             :         }
     781             :     }
     782             : 
     783             :     /* Dump global statistics for pg_stat_statements */
     784           6 :     if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     785           0 :         goto error;
     786             : 
     787           6 :     free(qbuffer);
     788           6 :     qbuffer = NULL;
     789             : 
     790           6 :     if (FreeFile(file))
     791             :     {
     792           0 :         file = NULL;
     793           0 :         goto error;
     794             :     }
     795             : 
     796             :     /*
     797             :      * Rename file into place, so we atomically replace any old one.
     798             :      */
     799           6 :     (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
     800             : 
     801             :     /* Unlink query-texts file; it's not needed while shutdown */
     802           6 :     unlink(PGSS_TEXT_FILE);
     803             : 
     804           6 :     return;
     805             : 
     806           0 : error:
     807           0 :     ereport(LOG,
     808             :             (errcode_for_file_access(),
     809             :              errmsg("could not write file \"%s\": %m",
     810             :                     PGSS_DUMP_FILE ".tmp")));
     811           0 :     free(qbuffer);
     812           0 :     if (file)
     813           0 :         FreeFile(file);
     814           0 :     unlink(PGSS_DUMP_FILE ".tmp");
     815           0 :     unlink(PGSS_TEXT_FILE);
     816             : }
     817             : 
     818             : /*
     819             :  * Post-parse-analysis hook: mark query with a queryId
     820             :  */
     821             : static void
     822      120838 : pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
     823             : {
     824      120838 :     if (prev_post_parse_analyze_hook)
     825           0 :         prev_post_parse_analyze_hook(pstate, query, jstate);
     826             : 
     827             :     /* Safety check... */
     828      120838 :     if (!pgss || !pgss_hash || !pgss_enabled(exec_nested_level))
     829       22930 :         return;
     830             : 
     831             :     /*
     832             :      * Clear queryId for prepared statements related utility, as those will
     833             :      * inherit from the underlying statement's one (except DEALLOCATE which is
     834             :      * entirely untracked).
     835             :      */
     836       97908 :     if (query->utilityStmt)
     837             :     {
     838       45922 :         if (pgss_track_utility && !PGSS_HANDLED_UTILITY(query->utilityStmt))
     839             :         {
     840        2968 :             query->queryId = UINT64CONST(0);
     841        2968 :             return;
     842             :         }
     843             :     }
     844             : 
     845             :     /*
     846             :      * If query jumbling were able to identify any ignorable constants, we
     847             :      * immediately create a hash table entry for the query, so that we can
     848             :      * record the normalized form of the query string.  If there were no such
     849             :      * constants, the normalized string would be the same as the query text
     850             :      * anyway, so there's no need for an early entry.
     851             :      */
     852       94940 :     if (jstate && jstate->clocations_count > 0)
     853       46846 :         pgss_store(pstate->p_sourcetext,
     854             :                    query->queryId,
     855             :                    query->stmt_location,
     856             :                    query->stmt_len,
     857             :                    PGSS_INVALID,
     858             :                    0,
     859             :                    0,
     860             :                    NULL,
     861             :                    NULL,
     862             :                    NULL,
     863             :                    jstate);
     864             : }
     865             : 
     866             : /*
     867             :  * Planner hook: forward to regular planner, but measure planning time
     868             :  * if needed.
     869             :  */
     870             : static PlannedStmt *
     871       73524 : pgss_planner(Query *parse,
     872             :              const char *query_string,
     873             :              int cursorOptions,
     874             :              ParamListInfo boundParams)
     875             : {
     876             :     PlannedStmt *result;
     877             : 
     878             :     /*
     879             :      * We can't process the query if no query_string is provided, as
     880             :      * pgss_store needs it.  We also ignore query without queryid, as it would
     881             :      * be treated as a utility statement, which may not be the case.
     882             :      *
     883             :      * Note that planner_hook can be called from the planner itself, so we
     884             :      * have a specific nesting level for the planner.  However, utility
     885             :      * commands containing optimizable statements can also call the planner,
     886             :      * same for regular DML (for instance for underlying foreign key queries).
     887             :      * So testing the planner nesting level only is not enough to detect real
     888             :      * top level planner call.
     889             :      */
     890       73524 :     if (pgss_enabled(plan_nested_level + exec_nested_level)
     891       52296 :         && pgss_track_planning && query_string
     892         118 :         && parse->queryId != UINT64CONST(0))
     893         116 :     {
     894             :         instr_time  start;
     895             :         instr_time  duration;
     896             :         BufferUsage bufusage_start,
     897             :                     bufusage;
     898             :         WalUsage    walusage_start,
     899             :                     walusage;
     900             : 
     901             :         /* We need to track buffer usage as the planner can access them. */
     902         116 :         bufusage_start = pgBufferUsage;
     903             : 
     904             :         /*
     905             :          * Similarly the planner could write some WAL records in some cases
     906             :          * (e.g. setting a hint bit with those being WAL-logged)
     907             :          */
     908         116 :         walusage_start = pgWalUsage;
     909         116 :         INSTR_TIME_SET_CURRENT(start);
     910             : 
     911         116 :         plan_nested_level++;
     912         116 :         PG_TRY();
     913             :         {
     914         116 :             if (prev_planner_hook)
     915           0 :                 result = prev_planner_hook(parse, query_string, cursorOptions,
     916             :                                            boundParams);
     917             :             else
     918         116 :                 result = standard_planner(parse, query_string, cursorOptions,
     919             :                                           boundParams);
     920             :         }
     921           0 :         PG_FINALLY();
     922             :         {
     923         116 :             plan_nested_level--;
     924             :         }
     925         116 :         PG_END_TRY();
     926             : 
     927         116 :         INSTR_TIME_SET_CURRENT(duration);
     928         116 :         INSTR_TIME_SUBTRACT(duration, start);
     929             : 
     930             :         /* calc differences of buffer counters. */
     931         116 :         memset(&bufusage, 0, sizeof(BufferUsage));
     932         116 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
     933             : 
     934             :         /* calc differences of WAL counters. */
     935         116 :         memset(&walusage, 0, sizeof(WalUsage));
     936         116 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
     937             : 
     938         116 :         pgss_store(query_string,
     939             :                    parse->queryId,
     940             :                    parse->stmt_location,
     941             :                    parse->stmt_len,
     942             :                    PGSS_PLAN,
     943         116 :                    INSTR_TIME_GET_MILLISEC(duration),
     944             :                    0,
     945             :                    &bufusage,
     946             :                    &walusage,
     947             :                    NULL,
     948             :                    NULL);
     949             :     }
     950             :     else
     951             :     {
     952       73408 :         if (prev_planner_hook)
     953           0 :             result = prev_planner_hook(parse, query_string, cursorOptions,
     954             :                                        boundParams);
     955             :         else
     956       73408 :             result = standard_planner(parse, query_string, cursorOptions,
     957             :                                       boundParams);
     958             :     }
     959             : 
     960       72502 :     return result;
     961             : }
     962             : 
     963             : /*
     964             :  * ExecutorStart hook: start up tracking if needed
     965             :  */
     966             : static void
     967       86926 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
     968             : {
     969       86926 :     if (prev_ExecutorStart)
     970           0 :         prev_ExecutorStart(queryDesc, eflags);
     971             :     else
     972       86926 :         standard_ExecutorStart(queryDesc, eflags);
     973             : 
     974             :     /*
     975             :      * If query has queryId zero, don't track it.  This prevents double
     976             :      * counting of optimizable statements that are directly contained in
     977             :      * utility statements.
     978             :      */
     979       86400 :     if (pgss_enabled(exec_nested_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
     980             :     {
     981             :         /*
     982             :          * Set up to track total elapsed time in ExecutorRun.  Make sure the
     983             :          * space is allocated in the per-query context so it will go away at
     984             :          * ExecutorEnd.
     985             :          */
     986       52900 :         if (queryDesc->totaltime == NULL)
     987             :         {
     988             :             MemoryContext oldcxt;
     989             : 
     990       52900 :             oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
     991       52900 :             queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
     992       52900 :             MemoryContextSwitchTo(oldcxt);
     993             :         }
     994             :     }
     995       86400 : }
     996             : 
     997             : /*
     998             :  * ExecutorRun hook: all we need do is track nesting depth
     999             :  */
    1000             : static void
    1001       84882 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
    1002             :                  bool execute_once)
    1003             : {
    1004       84882 :     exec_nested_level++;
    1005       84882 :     PG_TRY();
    1006             :     {
    1007       84882 :         if (prev_ExecutorRun)
    1008           0 :             prev_ExecutorRun(queryDesc, direction, count, execute_once);
    1009             :         else
    1010       84882 :             standard_ExecutorRun(queryDesc, direction, count, execute_once);
    1011             :     }
    1012        6222 :     PG_FINALLY();
    1013             :     {
    1014       84882 :         exec_nested_level--;
    1015             :     }
    1016       84882 :     PG_END_TRY();
    1017       78660 : }
    1018             : 
    1019             : /*
    1020             :  * ExecutorFinish hook: all we need do is track nesting depth
    1021             :  */
    1022             : static void
    1023       75402 : pgss_ExecutorFinish(QueryDesc *queryDesc)
    1024             : {
    1025       75402 :     exec_nested_level++;
    1026       75402 :     PG_TRY();
    1027             :     {
    1028       75402 :         if (prev_ExecutorFinish)
    1029           0 :             prev_ExecutorFinish(queryDesc);
    1030             :         else
    1031       75402 :             standard_ExecutorFinish(queryDesc);
    1032             :     }
    1033         262 :     PG_FINALLY();
    1034             :     {
    1035       75402 :         exec_nested_level--;
    1036             :     }
    1037       75402 :     PG_END_TRY();
    1038       75140 : }
    1039             : 
    1040             : /*
    1041             :  * ExecutorEnd hook: store results if needed
    1042             :  */
    1043             : static void
    1044       79836 : pgss_ExecutorEnd(QueryDesc *queryDesc)
    1045             : {
    1046       79836 :     uint64      queryId = queryDesc->plannedstmt->queryId;
    1047             : 
    1048       79836 :     if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
    1049       50734 :         pgss_enabled(exec_nested_level))
    1050             :     {
    1051             :         /*
    1052             :          * Make sure stats accumulation is done.  (Note: it's okay if several
    1053             :          * levels of hook all do this.)
    1054             :          */
    1055       50734 :         InstrEndLoop(queryDesc->totaltime);
    1056             : 
    1057       50542 :         pgss_store(queryDesc->sourceText,
    1058             :                    queryId,
    1059       50734 :                    queryDesc->plannedstmt->stmt_location,
    1060       50734 :                    queryDesc->plannedstmt->stmt_len,
    1061             :                    PGSS_EXEC,
    1062       50734 :                    queryDesc->totaltime->total * 1000.0,  /* convert to msec */
    1063       50734 :                    queryDesc->estate->es_total_processed,
    1064       50734 :                    &queryDesc->totaltime->bufusage,
    1065       50734 :                    &queryDesc->totaltime->walusage,
    1066       50734 :                    queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
    1067             :                    NULL);
    1068             :     }
    1069             : 
    1070       79836 :     if (prev_ExecutorEnd)
    1071           0 :         prev_ExecutorEnd(queryDesc);
    1072             :     else
    1073       79836 :         standard_ExecutorEnd(queryDesc);
    1074       79836 : }
    1075             : 
    1076             : /*
    1077             :  * ProcessUtility hook
    1078             :  */
    1079             : static void
    1080       54504 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
    1081             :                     bool readOnlyTree,
    1082             :                     ProcessUtilityContext context,
    1083             :                     ParamListInfo params, QueryEnvironment *queryEnv,
    1084             :                     DestReceiver *dest, QueryCompletion *qc)
    1085             : {
    1086       54504 :     Node       *parsetree = pstmt->utilityStmt;
    1087       54504 :     uint64      saved_queryId = pstmt->queryId;
    1088       54504 :     int         saved_stmt_location = pstmt->stmt_location;
    1089       54504 :     int         saved_stmt_len = pstmt->stmt_len;
    1090             : 
    1091             :     /*
    1092             :      * Force utility statements to get queryId zero.  We do this even in cases
    1093             :      * where the statement contains an optimizable statement for which a
    1094             :      * queryId could be derived (such as EXPLAIN or DECLARE CURSOR).  For such
    1095             :      * cases, runtime control will first go through ProcessUtility and then
    1096             :      * the executor, and we don't want the executor hooks to do anything,
    1097             :      * since we are already measuring the statement's costs at the utility
    1098             :      * level.
    1099             :      *
    1100             :      * Note that this is only done if pg_stat_statements is enabled and
    1101             :      * configured to track utility statements, in the unlikely possibility
    1102             :      * that user configured another extension to handle utility statements
    1103             :      * only.
    1104             :      */
    1105       54504 :     if (pgss_enabled(exec_nested_level) && pgss_track_utility)
    1106       45842 :         pstmt->queryId = UINT64CONST(0);
    1107             : 
    1108             :     /*
    1109             :      * If it's an EXECUTE statement, we don't track it and don't increment the
    1110             :      * nesting level.  This allows the cycles to be charged to the underlying
    1111             :      * PREPARE instead (by the Executor hooks), which is much more useful.
    1112             :      *
    1113             :      * We also don't track execution of PREPARE.  If we did, we would get one
    1114             :      * hash table entry for the PREPARE (with hash calculated from the query
    1115             :      * string), and then a different one with the same query string (but hash
    1116             :      * calculated from the query tree) would be used to accumulate costs of
    1117             :      * ensuing EXECUTEs.  This would be confusing, and inconsistent with other
    1118             :      * cases where planning time is not included at all.
    1119             :      *
    1120             :      * Likewise, we don't track execution of DEALLOCATE.
    1121             :      */
    1122       54504 :     if (pgss_track_utility && pgss_enabled(exec_nested_level) &&
    1123       45842 :         PGSS_HANDLED_UTILITY(parsetree))
    1124       38884 :     {
    1125             :         instr_time  start;
    1126             :         instr_time  duration;
    1127             :         uint64      rows;
    1128             :         BufferUsage bufusage_start,
    1129             :                     bufusage;
    1130             :         WalUsage    walusage_start,
    1131             :                     walusage;
    1132             : 
    1133       42876 :         bufusage_start = pgBufferUsage;
    1134       42876 :         walusage_start = pgWalUsage;
    1135       42876 :         INSTR_TIME_SET_CURRENT(start);
    1136             : 
    1137       42876 :         exec_nested_level++;
    1138       42876 :         PG_TRY();
    1139             :         {
    1140       42876 :             if (prev_ProcessUtility)
    1141           0 :                 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1142             :                                     context, params, queryEnv,
    1143             :                                     dest, qc);
    1144             :             else
    1145       42876 :                 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1146             :                                         context, params, queryEnv,
    1147             :                                         dest, qc);
    1148             :         }
    1149        3992 :         PG_FINALLY();
    1150             :         {
    1151       42876 :             exec_nested_level--;
    1152             :         }
    1153       42876 :         PG_END_TRY();
    1154             : 
    1155             :         /*
    1156             :          * CAUTION: do not access the *pstmt data structure again below here.
    1157             :          * If it was a ROLLBACK or similar, that data structure may have been
    1158             :          * freed.  We must copy everything we still need into local variables,
    1159             :          * which we did above.
    1160             :          *
    1161             :          * For the same reason, we can't risk restoring pstmt->queryId to its
    1162             :          * former value, which'd otherwise be a good idea.
    1163             :          */
    1164             : 
    1165       38884 :         INSTR_TIME_SET_CURRENT(duration);
    1166       38884 :         INSTR_TIME_SUBTRACT(duration, start);
    1167             : 
    1168             :         /*
    1169             :          * Track the total number of rows retrieved or affected by the utility
    1170             :          * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
    1171             :          * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
    1172             :          */
    1173       38884 :         rows = (qc && (qc->commandTag == CMDTAG_COPY ||
    1174       36338 :                        qc->commandTag == CMDTAG_FETCH ||
    1175       35824 :                        qc->commandTag == CMDTAG_SELECT ||
    1176       35480 :                        qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
    1177       77768 :             qc->nprocessed : 0;
    1178             : 
    1179             :         /* calc differences of buffer counters. */
    1180       38884 :         memset(&bufusage, 0, sizeof(BufferUsage));
    1181       38884 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
    1182             : 
    1183             :         /* calc differences of WAL counters. */
    1184       38884 :         memset(&walusage, 0, sizeof(WalUsage));
    1185       38884 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
    1186             : 
    1187       38884 :         pgss_store(queryString,
    1188             :                    saved_queryId,
    1189             :                    saved_stmt_location,
    1190             :                    saved_stmt_len,
    1191             :                    PGSS_EXEC,
    1192       38884 :                    INSTR_TIME_GET_MILLISEC(duration),
    1193             :                    rows,
    1194             :                    &bufusage,
    1195             :                    &walusage,
    1196             :                    NULL,
    1197             :                    NULL);
    1198             :     }
    1199             :     else
    1200             :     {
    1201       11628 :         if (prev_ProcessUtility)
    1202           0 :             prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1203             :                                 context, params, queryEnv,
    1204             :                                 dest, qc);
    1205             :         else
    1206       11628 :             standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1207             :                                     context, params, queryEnv,
    1208             :                                     dest, qc);
    1209             :     }
    1210       50320 : }
    1211             : 
    1212             : /*
    1213             :  * Store some statistics for a statement.
    1214             :  *
    1215             :  * If jstate is not NULL then we're trying to create an entry for which
    1216             :  * we have no statistics as yet; we just want to record the normalized
    1217             :  * query string.  total_time, rows, bufusage and walusage are ignored in this
    1218             :  * case.
    1219             :  *
    1220             :  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
    1221             :  * for the arrays in the Counters field.
    1222             :  */
    1223             : static void
    1224      136580 : pgss_store(const char *query, uint64 queryId,
    1225             :            int query_location, int query_len,
    1226             :            pgssStoreKind kind,
    1227             :            double total_time, uint64 rows,
    1228             :            const BufferUsage *bufusage,
    1229             :            const WalUsage *walusage,
    1230             :            const struct JitInstrumentation *jitusage,
    1231             :            JumbleState *jstate)
    1232             : {
    1233             :     pgssHashKey key;
    1234             :     pgssEntry  *entry;
    1235      136580 :     char       *norm_query = NULL;
    1236      136580 :     int         encoding = GetDatabaseEncoding();
    1237             : 
    1238             :     Assert(query != NULL);
    1239             : 
    1240             :     /* Safety check... */
    1241      136580 :     if (!pgss || !pgss_hash)
    1242           0 :         return;
    1243             : 
    1244             :     /*
    1245             :      * Nothing to do if compute_query_id isn't enabled and no other module
    1246             :      * computed a query identifier.
    1247             :      */
    1248      136580 :     if (queryId == UINT64CONST(0))
    1249           0 :         return;
    1250             : 
    1251             :     /*
    1252             :      * Confine our attention to the relevant part of the string, if the query
    1253             :      * is a portion of a multi-statement source string, and update query
    1254             :      * location and length if needed.
    1255             :      */
    1256      136580 :     query = CleanQuerytext(query, &query_location, &query_len);
    1257             : 
    1258             :     /* Set up key for hashtable search */
    1259             : 
    1260             :     /* memset() is required when pgssHashKey is without padding only */
    1261      136580 :     memset(&key, 0, sizeof(pgssHashKey));
    1262             : 
    1263      136580 :     key.userid = GetUserId();
    1264      136580 :     key.dbid = MyDatabaseId;
    1265      136580 :     key.queryid = queryId;
    1266      136580 :     key.toplevel = (exec_nested_level == 0);
    1267             : 
    1268             :     /* Lookup the hash table entry with shared lock. */
    1269      136580 :     LWLockAcquire(pgss->lock, LW_SHARED);
    1270             : 
    1271      136580 :     entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    1272             : 
    1273             :     /* Create new entry, if not present */
    1274      136580 :     if (!entry)
    1275             :     {
    1276             :         Size        query_offset;
    1277             :         int         gc_count;
    1278             :         bool        stored;
    1279             :         bool        do_gc;
    1280             : 
    1281             :         /*
    1282             :          * Create a new, normalized query string if caller asked.  We don't
    1283             :          * need to hold the lock while doing this work.  (Note: in any case,
    1284             :          * it's possible that someone else creates a duplicate hashtable entry
    1285             :          * in the interval where we don't hold the lock below.  That case is
    1286             :          * handled by entry_alloc.)
    1287             :          */
    1288       49400 :         if (jstate)
    1289             :         {
    1290       17604 :             LWLockRelease(pgss->lock);
    1291       17604 :             norm_query = generate_normalized_query(jstate, query,
    1292             :                                                    query_location,
    1293             :                                                    &query_len);
    1294       17604 :             LWLockAcquire(pgss->lock, LW_SHARED);
    1295             :         }
    1296             : 
    1297             :         /* Append new query text to file with only shared lock held */
    1298       49400 :         stored = qtext_store(norm_query ? norm_query : query, query_len,
    1299             :                              &query_offset, &gc_count);
    1300             : 
    1301             :         /*
    1302             :          * Determine whether we need to garbage collect external query texts
    1303             :          * while the shared lock is still held.  This micro-optimization
    1304             :          * avoids taking the time to decide this while holding exclusive lock.
    1305             :          */
    1306       49400 :         do_gc = need_gc_qtexts();
    1307             : 
    1308             :         /* Need exclusive lock to make a new hashtable entry - promote */
    1309       49400 :         LWLockRelease(pgss->lock);
    1310       49400 :         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
    1311             : 
    1312             :         /*
    1313             :          * A garbage collection may have occurred while we weren't holding the
    1314             :          * lock.  In the unlikely event that this happens, the query text we
    1315             :          * stored above will have been garbage collected, so write it again.
    1316             :          * This should be infrequent enough that doing it while holding
    1317             :          * exclusive lock isn't a performance problem.
    1318             :          */
    1319       49400 :         if (!stored || pgss->gc_count != gc_count)
    1320           0 :             stored = qtext_store(norm_query ? norm_query : query, query_len,
    1321             :                                  &query_offset, NULL);
    1322             : 
    1323             :         /* If we failed to write to the text file, give up */
    1324       49400 :         if (!stored)
    1325           0 :             goto done;
    1326             : 
    1327             :         /* OK to create a new hashtable entry */
    1328       49400 :         entry = entry_alloc(&key, query_offset, query_len, encoding,
    1329             :                             jstate != NULL);
    1330             : 
    1331             :         /* If needed, perform garbage collection while exclusive lock held */
    1332       49400 :         if (do_gc)
    1333           0 :             gc_qtexts();
    1334             :     }
    1335             : 
    1336             :     /* Increment the counts, except when jstate is not NULL */
    1337      136580 :     if (!jstate)
    1338             :     {
    1339             :         /*
    1340             :          * Grab the spinlock while updating the counters (see comment about
    1341             :          * locking rules at the head of the file)
    1342             :          */
    1343       89734 :         volatile pgssEntry *e = (volatile pgssEntry *) entry;
    1344             : 
    1345             :         Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
    1346             : 
    1347       89734 :         SpinLockAcquire(&e->mutex);
    1348             : 
    1349             :         /* "Unstick" entry if it was previously sticky */
    1350       89734 :         if (IS_STICKY(e->counters))
    1351       48278 :             e->counters.usage = USAGE_INIT;
    1352             : 
    1353       89734 :         e->counters.calls[kind] += 1;
    1354       89734 :         e->counters.total_time[kind] += total_time;
    1355             : 
    1356       89734 :         if (e->counters.calls[kind] == 1)
    1357             :         {
    1358       48362 :             e->counters.min_time[kind] = total_time;
    1359       48362 :             e->counters.max_time[kind] = total_time;
    1360       48362 :             e->counters.mean_time[kind] = total_time;
    1361             :         }
    1362             :         else
    1363             :         {
    1364             :             /*
    1365             :              * Welford's method for accurately computing variance. See
    1366             :              * <http://www.johndcook.com/blog/standard_deviation/>
    1367             :              */
    1368       41372 :             double      old_mean = e->counters.mean_time[kind];
    1369             : 
    1370       41372 :             e->counters.mean_time[kind] +=
    1371       41372 :                 (total_time - old_mean) / e->counters.calls[kind];
    1372       41372 :             e->counters.sum_var_time[kind] +=
    1373       41372 :                 (total_time - old_mean) * (total_time - e->counters.mean_time[kind]);
    1374             : 
    1375             :             /* calculate min and max time */
    1376       41372 :             if (e->counters.min_time[kind] > total_time)
    1377       11144 :                 e->counters.min_time[kind] = total_time;
    1378       41372 :             if (e->counters.max_time[kind] < total_time)
    1379        4766 :                 e->counters.max_time[kind] = total_time;
    1380             :         }
    1381       89734 :         e->counters.rows += rows;
    1382       89734 :         e->counters.shared_blks_hit += bufusage->shared_blks_hit;
    1383       89734 :         e->counters.shared_blks_read += bufusage->shared_blks_read;
    1384       89734 :         e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
    1385       89734 :         e->counters.shared_blks_written += bufusage->shared_blks_written;
    1386       89734 :         e->counters.local_blks_hit += bufusage->local_blks_hit;
    1387       89734 :         e->counters.local_blks_read += bufusage->local_blks_read;
    1388       89734 :         e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
    1389       89734 :         e->counters.local_blks_written += bufusage->local_blks_written;
    1390       89734 :         e->counters.temp_blks_read += bufusage->temp_blks_read;
    1391       89734 :         e->counters.temp_blks_written += bufusage->temp_blks_written;
    1392       89734 :         e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
    1393       89734 :         e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
    1394       89734 :         e->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
    1395       89734 :         e->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
    1396       89734 :         e->counters.usage += USAGE_EXEC(total_time);
    1397       89734 :         e->counters.wal_records += walusage->wal_records;
    1398       89734 :         e->counters.wal_fpi += walusage->wal_fpi;
    1399       89734 :         e->counters.wal_bytes += walusage->wal_bytes;
    1400       89734 :         if (jitusage)
    1401             :         {
    1402         192 :             e->counters.jit_functions += jitusage->created_functions;
    1403         192 :             e->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
    1404             : 
    1405         192 :             if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
    1406         132 :                 e->counters.jit_inlining_count++;
    1407         192 :             e->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
    1408             : 
    1409         192 :             if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
    1410         188 :                 e->counters.jit_optimization_count++;
    1411         192 :             e->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
    1412             : 
    1413         192 :             if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
    1414         188 :                 e->counters.jit_emission_count++;
    1415         192 :             e->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
    1416             :         }
    1417             : 
    1418       89734 :         SpinLockRelease(&e->mutex);
    1419             :     }
    1420             : 
    1421       46846 : done:
    1422      136580 :     LWLockRelease(pgss->lock);
    1423             : 
    1424             :     /* We postpone this clean-up until we're out of the lock */
    1425      136580 :     if (norm_query)
    1426       17604 :         pfree(norm_query);
    1427             : }
    1428             : 
    1429             : /*
    1430             :  * Reset statement statistics corresponding to userid, dbid, and queryid.
    1431             :  */
    1432             : Datum
    1433          80 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
    1434             : {
    1435             :     Oid         userid;
    1436             :     Oid         dbid;
    1437             :     uint64      queryid;
    1438             : 
    1439          80 :     userid = PG_GETARG_OID(0);
    1440          80 :     dbid = PG_GETARG_OID(1);
    1441          80 :     queryid = (uint64) PG_GETARG_INT64(2);
    1442             : 
    1443          80 :     entry_reset(userid, dbid, queryid);
    1444             : 
    1445          80 :     PG_RETURN_VOID();
    1446             : }
    1447             : 
    1448             : /*
    1449             :  * Reset statement statistics.
    1450             :  */
    1451             : Datum
    1452           2 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
    1453             : {
    1454           2 :     entry_reset(0, 0, 0);
    1455             : 
    1456           2 :     PG_RETURN_VOID();
    1457             : }
    1458             : 
    1459             : /* Number of output arguments (columns) for various API versions */
    1460             : #define PG_STAT_STATEMENTS_COLS_V1_0    14
    1461             : #define PG_STAT_STATEMENTS_COLS_V1_1    18
    1462             : #define PG_STAT_STATEMENTS_COLS_V1_2    19
    1463             : #define PG_STAT_STATEMENTS_COLS_V1_3    23
    1464             : #define PG_STAT_STATEMENTS_COLS_V1_8    32
    1465             : #define PG_STAT_STATEMENTS_COLS_V1_9    33
    1466             : #define PG_STAT_STATEMENTS_COLS_V1_10   43
    1467             : #define PG_STAT_STATEMENTS_COLS         43  /* maximum of above */
    1468             : 
    1469             : /*
    1470             :  * Retrieve statement statistics.
    1471             :  *
    1472             :  * The SQL API of this function has changed multiple times, and will likely
    1473             :  * do so again in future.  To support the case where a newer version of this
    1474             :  * loadable module is being used with an old SQL declaration of the function,
    1475             :  * we continue to support the older API versions.  For 1.2 and later, the
    1476             :  * expected API version is identified by embedding it in the C name of the
    1477             :  * function.  Unfortunately we weren't bright enough to do that for 1.1.
    1478             :  */
    1479             : Datum
    1480          82 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
    1481             : {
    1482          82 :     bool        showtext = PG_GETARG_BOOL(0);
    1483             : 
    1484          82 :     pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
    1485             : 
    1486          82 :     return (Datum) 0;
    1487             : }
    1488             : 
    1489             : Datum
    1490           2 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
    1491             : {
    1492           2 :     bool        showtext = PG_GETARG_BOOL(0);
    1493             : 
    1494           2 :     pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
    1495             : 
    1496           2 :     return (Datum) 0;
    1497             : }
    1498             : 
    1499             : Datum
    1500           0 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
    1501             : {
    1502           0 :     bool        showtext = PG_GETARG_BOOL(0);
    1503             : 
    1504           0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
    1505             : 
    1506           0 :     return (Datum) 0;
    1507             : }
    1508             : 
    1509             : Datum
    1510           2 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
    1511             : {
    1512           2 :     bool        showtext = PG_GETARG_BOOL(0);
    1513             : 
    1514           2 :     pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
    1515             : 
    1516           2 :     return (Datum) 0;
    1517             : }
    1518             : 
    1519             : Datum
    1520           0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
    1521             : {
    1522           0 :     bool        showtext = PG_GETARG_BOOL(0);
    1523             : 
    1524           0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
    1525             : 
    1526           0 :     return (Datum) 0;
    1527             : }
    1528             : 
    1529             : /*
    1530             :  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
    1531             :  * This can be removed someday, perhaps.
    1532             :  */
    1533             : Datum
    1534           0 : pg_stat_statements(PG_FUNCTION_ARGS)
    1535             : {
    1536             :     /* If it's really API 1.1, we'll figure that out below */
    1537           0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
    1538             : 
    1539           0 :     return (Datum) 0;
    1540             : }
    1541             : 
    1542             : /* Common code for all versions of pg_stat_statements() */
    1543             : static void
    1544          86 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
    1545             :                             pgssVersion api_version,
    1546             :                             bool showtext)
    1547             : {
    1548          86 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
    1549          86 :     Oid         userid = GetUserId();
    1550          86 :     bool        is_allowed_role = false;
    1551          86 :     char       *qbuffer = NULL;
    1552          86 :     Size        qbuffer_size = 0;
    1553          86 :     Size        extent = 0;
    1554          86 :     int         gc_count = 0;
    1555             :     HASH_SEQ_STATUS hash_seq;
    1556             :     pgssEntry  *entry;
    1557             : 
    1558             :     /*
    1559             :      * Superusers or roles with the privileges of pg_read_all_stats members
    1560             :      * are allowed
    1561             :      */
    1562          86 :     is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
    1563             : 
    1564             :     /* hash table must exist already */
    1565          86 :     if (!pgss || !pgss_hash)
    1566           0 :         ereport(ERROR,
    1567             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1568             :                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
    1569             : 
    1570          86 :     InitMaterializedSRF(fcinfo, 0);
    1571             : 
    1572             :     /*
    1573             :      * Check we have the expected number of output arguments.  Aside from
    1574             :      * being a good safety check, we need a kluge here to detect API version
    1575             :      * 1.1, which was wedged into the code in an ill-considered way.
    1576             :      */
    1577          86 :     switch (rsinfo->setDesc->natts)
    1578             :     {
    1579           0 :         case PG_STAT_STATEMENTS_COLS_V1_0:
    1580           0 :             if (api_version != PGSS_V1_0)
    1581           0 :                 elog(ERROR, "incorrect number of output arguments");
    1582           0 :             break;
    1583           0 :         case PG_STAT_STATEMENTS_COLS_V1_1:
    1584             :             /* pg_stat_statements() should have told us 1.0 */
    1585           0 :             if (api_version != PGSS_V1_0)
    1586           0 :                 elog(ERROR, "incorrect number of output arguments");
    1587           0 :             api_version = PGSS_V1_1;
    1588           0 :             break;
    1589           0 :         case PG_STAT_STATEMENTS_COLS_V1_2:
    1590           0 :             if (api_version != PGSS_V1_2)
    1591           0 :                 elog(ERROR, "incorrect number of output arguments");
    1592           0 :             break;
    1593           2 :         case PG_STAT_STATEMENTS_COLS_V1_3:
    1594           2 :             if (api_version != PGSS_V1_3)
    1595           0 :                 elog(ERROR, "incorrect number of output arguments");
    1596           2 :             break;
    1597           0 :         case PG_STAT_STATEMENTS_COLS_V1_8:
    1598           0 :             if (api_version != PGSS_V1_8)
    1599           0 :                 elog(ERROR, "incorrect number of output arguments");
    1600           0 :             break;
    1601           2 :         case PG_STAT_STATEMENTS_COLS_V1_9:
    1602           2 :             if (api_version != PGSS_V1_9)
    1603           0 :                 elog(ERROR, "incorrect number of output arguments");
    1604           2 :             break;
    1605          82 :         case PG_STAT_STATEMENTS_COLS_V1_10:
    1606          82 :             if (api_version != PGSS_V1_10)
    1607           0 :                 elog(ERROR, "incorrect number of output arguments");
    1608          82 :             break;
    1609           0 :         default:
    1610           0 :             elog(ERROR, "incorrect number of output arguments");
    1611             :     }
    1612             : 
    1613             :     /*
    1614             :      * We'd like to load the query text file (if needed) while not holding any
    1615             :      * lock on pgss->lock.  In the worst case we'll have to do this again
    1616             :      * after we have the lock, but it's unlikely enough to make this a win
    1617             :      * despite occasional duplicated work.  We need to reload if anybody
    1618             :      * writes to the file (either a retail qtext_store(), or a garbage
    1619             :      * collection) between this point and where we've gotten shared lock.  If
    1620             :      * a qtext_store is actually in progress when we look, we might as well
    1621             :      * skip the speculative load entirely.
    1622             :      */
    1623          86 :     if (showtext)
    1624             :     {
    1625             :         int         n_writers;
    1626             : 
    1627             :         /* Take the mutex so we can examine variables */
    1628             :         {
    1629          86 :             volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    1630             : 
    1631          86 :             SpinLockAcquire(&s->mutex);
    1632          86 :             extent = s->extent;
    1633          86 :             n_writers = s->n_writers;
    1634          86 :             gc_count = s->gc_count;
    1635          86 :             SpinLockRelease(&s->mutex);
    1636             :         }
    1637             : 
    1638             :         /* No point in loading file now if there are active writers */
    1639          86 :         if (n_writers == 0)
    1640          86 :             qbuffer = qtext_load_file(&qbuffer_size);
    1641             :     }
    1642             : 
    1643             :     /*
    1644             :      * Get shared lock, load or reload the query text file if we must, and
    1645             :      * iterate over the hashtable entries.
    1646             :      *
    1647             :      * With a large hash table, we might be holding the lock rather longer
    1648             :      * than one could wish.  However, this only blocks creation of new hash
    1649             :      * table entries, and the larger the hash table the less likely that is to
    1650             :      * be needed.  So we can hope this is okay.  Perhaps someday we'll decide
    1651             :      * we need to partition the hash table to limit the time spent holding any
    1652             :      * one lock.
    1653             :      */
    1654          86 :     LWLockAcquire(pgss->lock, LW_SHARED);
    1655             : 
    1656          86 :     if (showtext)
    1657             :     {
    1658             :         /*
    1659             :          * Here it is safe to examine extent and gc_count without taking the
    1660             :          * mutex.  Note that although other processes might change
    1661             :          * pgss->extent just after we look at it, the strings they then write
    1662             :          * into the file cannot yet be referenced in the hashtable, so we
    1663             :          * don't care whether we see them or not.
    1664             :          *
    1665             :          * If qtext_load_file fails, we just press on; we'll return NULL for
    1666             :          * every query text.
    1667             :          */
    1668          86 :         if (qbuffer == NULL ||
    1669          86 :             pgss->extent != extent ||
    1670          86 :             pgss->gc_count != gc_count)
    1671             :         {
    1672           0 :             free(qbuffer);
    1673           0 :             qbuffer = qtext_load_file(&qbuffer_size);
    1674             :         }
    1675             :     }
    1676             : 
    1677          86 :     hash_seq_init(&hash_seq, pgss_hash);
    1678       48018 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    1679             :     {
    1680             :         Datum       values[PG_STAT_STATEMENTS_COLS];
    1681             :         bool        nulls[PG_STAT_STATEMENTS_COLS];
    1682       47932 :         int         i = 0;
    1683             :         Counters    tmp;
    1684             :         double      stddev;
    1685       47932 :         int64       queryid = entry->key.queryid;
    1686             : 
    1687       47932 :         memset(values, 0, sizeof(values));
    1688       47932 :         memset(nulls, 0, sizeof(nulls));
    1689             : 
    1690       47932 :         values[i++] = ObjectIdGetDatum(entry->key.userid);
    1691       47932 :         values[i++] = ObjectIdGetDatum(entry->key.dbid);
    1692       47932 :         if (api_version >= PGSS_V1_9)
    1693       47912 :             values[i++] = BoolGetDatum(entry->key.toplevel);
    1694             : 
    1695       47932 :         if (is_allowed_role || entry->key.userid == userid)
    1696             :         {
    1697       47932 :             if (api_version >= PGSS_V1_2)
    1698       47932 :                 values[i++] = Int64GetDatumFast(queryid);
    1699             : 
    1700       47932 :             if (showtext)
    1701             :             {
    1702       47932 :                 char       *qstr = qtext_fetch(entry->query_offset,
    1703             :                                                entry->query_len,
    1704             :                                                qbuffer,
    1705             :                                                qbuffer_size);
    1706             : 
    1707       47932 :                 if (qstr)
    1708             :                 {
    1709             :                     char       *enc;
    1710             : 
    1711       47932 :                     enc = pg_any_to_server(qstr,
    1712             :                                            entry->query_len,
    1713             :                                            entry->encoding);
    1714             : 
    1715       47932 :                     values[i++] = CStringGetTextDatum(enc);
    1716             : 
    1717       47932 :                     if (enc != qstr)
    1718           0 :                         pfree(enc);
    1719             :                 }
    1720             :                 else
    1721             :                 {
    1722             :                     /* Just return a null if we fail to find the text */
    1723           0 :                     nulls[i++] = true;
    1724             :                 }
    1725             :             }
    1726             :             else
    1727             :             {
    1728             :                 /* Query text not requested */
    1729           0 :                 nulls[i++] = true;
    1730             :             }
    1731             :         }
    1732             :         else
    1733             :         {
    1734             :             /* Don't show queryid */
    1735           0 :             if (api_version >= PGSS_V1_2)
    1736           0 :                 nulls[i++] = true;
    1737             : 
    1738             :             /*
    1739             :              * Don't show query text, but hint as to the reason for not doing
    1740             :              * so if it was requested
    1741             :              */
    1742           0 :             if (showtext)
    1743           0 :                 values[i++] = CStringGetTextDatum("<insufficient privilege>");
    1744             :             else
    1745           0 :                 nulls[i++] = true;
    1746             :         }
    1747             : 
    1748             :         /* copy counters to a local variable to keep locking time short */
    1749             :         {
    1750       47932 :             volatile pgssEntry *e = (volatile pgssEntry *) entry;
    1751             : 
    1752       47932 :             SpinLockAcquire(&e->mutex);
    1753       47932 :             tmp = e->counters;
    1754       47932 :             SpinLockRelease(&e->mutex);
    1755             :         }
    1756             : 
    1757             :         /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
    1758       47932 :         if (IS_STICKY(tmp))
    1759        1140 :             continue;
    1760             : 
    1761             :         /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
    1762      140376 :         for (int kind = 0; kind < PGSS_NUMKIND; kind++)
    1763             :         {
    1764       93584 :             if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
    1765             :             {
    1766       93566 :                 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
    1767       93566 :                 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
    1768             :             }
    1769             : 
    1770       93584 :             if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
    1771             :                 api_version >= PGSS_V1_8)
    1772             :             {
    1773       93566 :                 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
    1774       93566 :                 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
    1775       93566 :                 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
    1776             : 
    1777             :                 /*
    1778             :                  * Note we are calculating the population variance here, not
    1779             :                  * the sample variance, as we have data for the whole
    1780             :                  * population, so Bessel's correction is not used, and we
    1781             :                  * don't divide by tmp.calls - 1.
    1782             :                  */
    1783       93566 :                 if (tmp.calls[kind] > 1)
    1784        8228 :                     stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
    1785             :                 else
    1786       85338 :                     stddev = 0.0;
    1787       93566 :                 values[i++] = Float8GetDatumFast(stddev);
    1788             :             }
    1789             :         }
    1790       46792 :         values[i++] = Int64GetDatumFast(tmp.rows);
    1791       46792 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
    1792       46792 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
    1793       46792 :         if (api_version >= PGSS_V1_1)
    1794       46792 :             values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
    1795       46792 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
    1796       46792 :         values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
    1797       46792 :         values[i++] = Int64GetDatumFast(tmp.local_blks_read);
    1798       46792 :         if (api_version >= PGSS_V1_1)
    1799       46792 :             values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
    1800       46792 :         values[i++] = Int64GetDatumFast(tmp.local_blks_written);
    1801       46792 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
    1802       46792 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
    1803       46792 :         if (api_version >= PGSS_V1_1)
    1804             :         {
    1805       46792 :             values[i++] = Float8GetDatumFast(tmp.blk_read_time);
    1806       46792 :             values[i++] = Float8GetDatumFast(tmp.blk_write_time);
    1807             :         }
    1808       46792 :         if (api_version >= PGSS_V1_10)
    1809             :         {
    1810       46750 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
    1811       46750 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
    1812             :         }
    1813       46792 :         if (api_version >= PGSS_V1_8)
    1814             :         {
    1815             :             char        buf[256];
    1816             :             Datum       wal_bytes;
    1817             : 
    1818       46774 :             values[i++] = Int64GetDatumFast(tmp.wal_records);
    1819       46774 :             values[i++] = Int64GetDatumFast(tmp.wal_fpi);
    1820             : 
    1821       46774 :             snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
    1822             : 
    1823             :             /* Convert to numeric. */
    1824       46774 :             wal_bytes = DirectFunctionCall3(numeric_in,
    1825             :                                             CStringGetDatum(buf),
    1826             :                                             ObjectIdGetDatum(0),
    1827             :                                             Int32GetDatum(-1));
    1828       46774 :             values[i++] = wal_bytes;
    1829             :         }
    1830       46792 :         if (api_version >= PGSS_V1_10)
    1831             :         {
    1832       46750 :             values[i++] = Int64GetDatumFast(tmp.jit_functions);
    1833       46750 :             values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
    1834       46750 :             values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
    1835       46750 :             values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
    1836       46750 :             values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
    1837       46750 :             values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
    1838       46750 :             values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
    1839       46750 :             values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
    1840             :         }
    1841             : 
    1842             :         Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
    1843             :                      api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
    1844             :                      api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
    1845             :                      api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
    1846             :                      api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
    1847             :                      api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
    1848             :                      api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
    1849             :                      -1 /* fail if you forget to update this assert */ ));
    1850             : 
    1851       46792 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
    1852             :     }
    1853             : 
    1854          86 :     LWLockRelease(pgss->lock);
    1855             : 
    1856          86 :     free(qbuffer);
    1857          86 : }
    1858             : 
    1859             : /* Number of output arguments (columns) for pg_stat_statements_info */
    1860             : #define PG_STAT_STATEMENTS_INFO_COLS    2
    1861             : 
    1862             : /*
    1863             :  * Return statistics of pg_stat_statements.
    1864             :  */
    1865             : Datum
    1866           2 : pg_stat_statements_info(PG_FUNCTION_ARGS)
    1867             : {
    1868             :     pgssGlobalStats stats;
    1869             :     TupleDesc   tupdesc;
    1870           2 :     Datum       values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    1871           2 :     bool        nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    1872             : 
    1873           2 :     if (!pgss || !pgss_hash)
    1874           0 :         ereport(ERROR,
    1875             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1876             :                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
    1877             : 
    1878             :     /* Build a tuple descriptor for our result type */
    1879           2 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    1880           0 :         elog(ERROR, "return type must be a row type");
    1881             : 
    1882             :     /* Read global statistics for pg_stat_statements */
    1883             :     {
    1884           2 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    1885             : 
    1886           2 :         SpinLockAcquire(&s->mutex);
    1887           2 :         stats = s->stats;
    1888           2 :         SpinLockRelease(&s->mutex);
    1889             :     }
    1890             : 
    1891           2 :     values[0] = Int64GetDatum(stats.dealloc);
    1892           2 :     values[1] = TimestampTzGetDatum(stats.stats_reset);
    1893             : 
    1894           2 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
    1895             : }
    1896             : 
    1897             : /*
    1898             :  * Estimate shared memory space needed.
    1899             :  */
    1900             : static Size
    1901           6 : pgss_memsize(void)
    1902             : {
    1903             :     Size        size;
    1904             : 
    1905           6 :     size = MAXALIGN(sizeof(pgssSharedState));
    1906           6 :     size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
    1907             : 
    1908           6 :     return size;
    1909             : }
    1910             : 
    1911             : /*
    1912             :  * Allocate a new hashtable entry.
    1913             :  * caller must hold an exclusive lock on pgss->lock
    1914             :  *
    1915             :  * "query" need not be null-terminated; we rely on query_len instead
    1916             :  *
    1917             :  * If "sticky" is true, make the new entry artificially sticky so that it will
    1918             :  * probably still be there when the query finishes execution.  We do this by
    1919             :  * giving it a median usage value rather than the normal value.  (Strictly
    1920             :  * speaking, query strings are normalized on a best effort basis, though it
    1921             :  * would be difficult to demonstrate this even under artificial conditions.)
    1922             :  *
    1923             :  * Note: despite needing exclusive lock, it's not an error for the target
    1924             :  * entry to already exist.  This is because pgss_store releases and
    1925             :  * reacquires lock after failing to find a match; so someone else could
    1926             :  * have made the entry while we waited to get exclusive lock.
    1927             :  */
    1928             : static pgssEntry *
    1929       49400 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
    1930             :             bool sticky)
    1931             : {
    1932             :     pgssEntry  *entry;
    1933             :     bool        found;
    1934             : 
    1935             :     /* Make space if needed */
    1936       49400 :     while (hash_get_num_entries(pgss_hash) >= pgss_max)
    1937           0 :         entry_dealloc();
    1938             : 
    1939             :     /* Find or create an entry with desired hash code */
    1940       49400 :     entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
    1941             : 
    1942       49400 :     if (!found)
    1943             :     {
    1944             :         /* New entry, initialize it */
    1945             : 
    1946             :         /* reset the statistics */
    1947       49400 :         memset(&entry->counters, 0, sizeof(Counters));
    1948             :         /* set the appropriate initial usage count */
    1949       49400 :         entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
    1950             :         /* re-initialize the mutex each time ... we assume no one using it */
    1951       49400 :         SpinLockInit(&entry->mutex);
    1952             :         /* ... and don't forget the query text metadata */
    1953             :         Assert(query_len >= 0);
    1954       49400 :         entry->query_offset = query_offset;
    1955       49400 :         entry->query_len = query_len;
    1956       49400 :         entry->encoding = encoding;
    1957             :     }
    1958             : 
    1959       49400 :     return entry;
    1960             : }
    1961             : 
    1962             : /*
    1963             :  * qsort comparator for sorting into increasing usage order
    1964             :  */
    1965             : static int
    1966           0 : entry_cmp(const void *lhs, const void *rhs)
    1967             : {
    1968           0 :     double      l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
    1969           0 :     double      r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
    1970             : 
    1971           0 :     if (l_usage < r_usage)
    1972           0 :         return -1;
    1973           0 :     else if (l_usage > r_usage)
    1974           0 :         return +1;
    1975             :     else
    1976           0 :         return 0;
    1977             : }
    1978             : 
    1979             : /*
    1980             :  * Deallocate least-used entries.
    1981             :  *
    1982             :  * Caller must hold an exclusive lock on pgss->lock.
    1983             :  */
    1984             : static void
    1985           0 : entry_dealloc(void)
    1986             : {
    1987             :     HASH_SEQ_STATUS hash_seq;
    1988             :     pgssEntry **entries;
    1989             :     pgssEntry  *entry;
    1990             :     int         nvictims;
    1991             :     int         i;
    1992             :     Size        tottextlen;
    1993             :     int         nvalidtexts;
    1994             : 
    1995             :     /*
    1996             :      * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
    1997             :      * While we're scanning the table, apply the decay factor to the usage
    1998             :      * values, and update the mean query length.
    1999             :      *
    2000             :      * Note that the mean query length is almost immediately obsolete, since
    2001             :      * we compute it before not after discarding the least-used entries.
    2002             :      * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
    2003             :      * making two passes to get a more current result.  Likewise, the new
    2004             :      * cur_median_usage includes the entries we're about to zap.
    2005             :      */
    2006             : 
    2007           0 :     entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
    2008             : 
    2009           0 :     i = 0;
    2010           0 :     tottextlen = 0;
    2011           0 :     nvalidtexts = 0;
    2012             : 
    2013           0 :     hash_seq_init(&hash_seq, pgss_hash);
    2014           0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2015             :     {
    2016           0 :         entries[i++] = entry;
    2017             :         /* "Sticky" entries get a different usage decay rate. */
    2018           0 :         if (IS_STICKY(entry->counters))
    2019           0 :             entry->counters.usage *= STICKY_DECREASE_FACTOR;
    2020             :         else
    2021           0 :             entry->counters.usage *= USAGE_DECREASE_FACTOR;
    2022             :         /* In the mean length computation, ignore dropped texts. */
    2023           0 :         if (entry->query_len >= 0)
    2024             :         {
    2025           0 :             tottextlen += entry->query_len + 1;
    2026           0 :             nvalidtexts++;
    2027             :         }
    2028             :     }
    2029             : 
    2030             :     /* Sort into increasing order by usage */
    2031           0 :     qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
    2032             : 
    2033             :     /* Record the (approximate) median usage */
    2034           0 :     if (i > 0)
    2035           0 :         pgss->cur_median_usage = entries[i / 2]->counters.usage;
    2036             :     /* Record the mean query length */
    2037           0 :     if (nvalidtexts > 0)
    2038           0 :         pgss->mean_query_len = tottextlen / nvalidtexts;
    2039             :     else
    2040           0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2041             : 
    2042             :     /* Now zap an appropriate fraction of lowest-usage entries */
    2043           0 :     nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
    2044           0 :     nvictims = Min(nvictims, i);
    2045             : 
    2046           0 :     for (i = 0; i < nvictims; i++)
    2047             :     {
    2048           0 :         hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
    2049             :     }
    2050             : 
    2051           0 :     pfree(entries);
    2052             : 
    2053             :     /* Increment the number of times entries are deallocated */
    2054             :     {
    2055           0 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2056             : 
    2057           0 :         SpinLockAcquire(&s->mutex);
    2058           0 :         s->stats.dealloc += 1;
    2059           0 :         SpinLockRelease(&s->mutex);
    2060             :     }
    2061           0 : }
    2062             : 
    2063             : /*
    2064             :  * Given a query string (not necessarily null-terminated), allocate a new
    2065             :  * entry in the external query text file and store the string there.
    2066             :  *
    2067             :  * If successful, returns true, and stores the new entry's offset in the file
    2068             :  * into *query_offset.  Also, if gc_count isn't NULL, *gc_count is set to the
    2069             :  * number of garbage collections that have occurred so far.
    2070             :  *
    2071             :  * On failure, returns false.
    2072             :  *
    2073             :  * At least a shared lock on pgss->lock must be held by the caller, so as
    2074             :  * to prevent a concurrent garbage collection.  Share-lock-holding callers
    2075             :  * should pass a gc_count pointer to obtain the number of garbage collections,
    2076             :  * so that they can recheck the count after obtaining exclusive lock to
    2077             :  * detect whether a garbage collection occurred (and removed this entry).
    2078             :  */
    2079             : static bool
    2080       49400 : qtext_store(const char *query, int query_len,
    2081             :             Size *query_offset, int *gc_count)
    2082             : {
    2083             :     Size        off;
    2084             :     int         fd;
    2085             : 
    2086             :     /*
    2087             :      * We use a spinlock to protect extent/n_writers/gc_count, so that
    2088             :      * multiple processes may execute this function concurrently.
    2089             :      */
    2090             :     {
    2091       49400 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2092             : 
    2093       49400 :         SpinLockAcquire(&s->mutex);
    2094       49400 :         off = s->extent;
    2095       49400 :         s->extent += query_len + 1;
    2096       49400 :         s->n_writers++;
    2097       49400 :         if (gc_count)
    2098       49400 :             *gc_count = s->gc_count;
    2099       49400 :         SpinLockRelease(&s->mutex);
    2100             :     }
    2101             : 
    2102       49400 :     *query_offset = off;
    2103             : 
    2104             :     /*
    2105             :      * Don't allow the file to grow larger than what qtext_load_file can
    2106             :      * (theoretically) handle.  This has been seen to be reachable on 32-bit
    2107             :      * platforms.
    2108             :      */
    2109       49400 :     if (unlikely(query_len >= MaxAllocHugeSize - off))
    2110             :     {
    2111           0 :         errno = EFBIG;          /* not quite right, but it'll do */
    2112           0 :         fd = -1;
    2113           0 :         goto error;
    2114             :     }
    2115             : 
    2116             :     /* Now write the data into the successfully-reserved part of the file */
    2117       49400 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
    2118       49400 :     if (fd < 0)
    2119           0 :         goto error;
    2120             : 
    2121       49400 :     if (pg_pwrite(fd, query, query_len, off) != query_len)
    2122           0 :         goto error;
    2123       49400 :     if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
    2124           0 :         goto error;
    2125             : 
    2126       49400 :     CloseTransientFile(fd);
    2127             : 
    2128             :     /* Mark our write complete */
    2129             :     {
    2130       49400 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2131             : 
    2132       49400 :         SpinLockAcquire(&s->mutex);
    2133       49400 :         s->n_writers--;
    2134       49400 :         SpinLockRelease(&s->mutex);
    2135             :     }
    2136             : 
    2137       49400 :     return true;
    2138             : 
    2139           0 : error:
    2140           0 :     ereport(LOG,
    2141             :             (errcode_for_file_access(),
    2142             :              errmsg("could not write file \"%s\": %m",
    2143             :                     PGSS_TEXT_FILE)));
    2144             : 
    2145           0 :     if (fd >= 0)
    2146           0 :         CloseTransientFile(fd);
    2147             : 
    2148             :     /* Mark our write complete */
    2149             :     {
    2150           0 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2151             : 
    2152           0 :         SpinLockAcquire(&s->mutex);
    2153           0 :         s->n_writers--;
    2154           0 :         SpinLockRelease(&s->mutex);
    2155             :     }
    2156             : 
    2157           0 :     return false;
    2158             : }
    2159             : 
    2160             : /*
    2161             :  * Read the external query text file into a malloc'd buffer.
    2162             :  *
    2163             :  * Returns NULL (without throwing an error) if unable to read, eg
    2164             :  * file not there or insufficient memory.
    2165             :  *
    2166             :  * On success, the buffer size is also returned into *buffer_size.
    2167             :  *
    2168             :  * This can be called without any lock on pgss->lock, but in that case
    2169             :  * the caller is responsible for verifying that the result is sane.
    2170             :  */
    2171             : static char *
    2172          92 : qtext_load_file(Size *buffer_size)
    2173             : {
    2174             :     char       *buf;
    2175             :     int         fd;
    2176             :     struct stat stat;
    2177             :     Size        nread;
    2178             : 
    2179          92 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
    2180          92 :     if (fd < 0)
    2181             :     {
    2182           0 :         if (errno != ENOENT)
    2183           0 :             ereport(LOG,
    2184             :                     (errcode_for_file_access(),
    2185             :                      errmsg("could not read file \"%s\": %m",
    2186             :                             PGSS_TEXT_FILE)));
    2187           0 :         return NULL;
    2188             :     }
    2189             : 
    2190             :     /* Get file length */
    2191          92 :     if (fstat(fd, &stat))
    2192             :     {
    2193           0 :         ereport(LOG,
    2194             :                 (errcode_for_file_access(),
    2195             :                  errmsg("could not stat file \"%s\": %m",
    2196             :                         PGSS_TEXT_FILE)));
    2197           0 :         CloseTransientFile(fd);
    2198           0 :         return NULL;
    2199             :     }
    2200             : 
    2201             :     /* Allocate buffer; beware that off_t might be wider than size_t */
    2202          92 :     if (stat.st_size <= MaxAllocHugeSize)
    2203          92 :         buf = (char *) malloc(stat.st_size);
    2204             :     else
    2205           0 :         buf = NULL;
    2206          92 :     if (buf == NULL)
    2207             :     {
    2208           0 :         ereport(LOG,
    2209             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    2210             :                  errmsg("out of memory"),
    2211             :                  errdetail("Could not allocate enough memory to read file \"%s\".",
    2212             :                            PGSS_TEXT_FILE)));
    2213           0 :         CloseTransientFile(fd);
    2214           0 :         return NULL;
    2215             :     }
    2216             : 
    2217             :     /*
    2218             :      * OK, slurp in the file.  Windows fails if we try to read more than
    2219             :      * INT_MAX bytes at once, and other platforms might not like that either,
    2220             :      * so read a very large file in 1GB segments.
    2221             :      */
    2222          92 :     nread = 0;
    2223         182 :     while (nread < stat.st_size)
    2224             :     {
    2225          90 :         int         toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
    2226             : 
    2227             :         /*
    2228             :          * If we get a short read and errno doesn't get set, the reason is
    2229             :          * probably that garbage collection truncated the file since we did
    2230             :          * the fstat(), so we don't log a complaint --- but we don't return
    2231             :          * the data, either, since it's most likely corrupt due to concurrent
    2232             :          * writes from garbage collection.
    2233             :          */
    2234          90 :         errno = 0;
    2235          90 :         if (read(fd, buf + nread, toread) != toread)
    2236             :         {
    2237           0 :             if (errno)
    2238           0 :                 ereport(LOG,
    2239             :                         (errcode_for_file_access(),
    2240             :                          errmsg("could not read file \"%s\": %m",
    2241             :                                 PGSS_TEXT_FILE)));
    2242           0 :             free(buf);
    2243           0 :             CloseTransientFile(fd);
    2244           0 :             return NULL;
    2245             :         }
    2246          90 :         nread += toread;
    2247             :     }
    2248             : 
    2249          92 :     if (CloseTransientFile(fd) != 0)
    2250           0 :         ereport(LOG,
    2251             :                 (errcode_for_file_access(),
    2252             :                  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
    2253             : 
    2254          92 :     *buffer_size = nread;
    2255          92 :     return buf;
    2256             : }
    2257             : 
    2258             : /*
    2259             :  * Locate a query text in the file image previously read by qtext_load_file().
    2260             :  *
    2261             :  * We validate the given offset/length, and return NULL if bogus.  Otherwise,
    2262             :  * the result points to a null-terminated string within the buffer.
    2263             :  */
    2264             : static char *
    2265       96724 : qtext_fetch(Size query_offset, int query_len,
    2266             :             char *buffer, Size buffer_size)
    2267             : {
    2268             :     /* File read failed? */
    2269       96724 :     if (buffer == NULL)
    2270           0 :         return NULL;
    2271             :     /* Bogus offset/length? */
    2272       96724 :     if (query_len < 0 ||
    2273       96724 :         query_offset + query_len >= buffer_size)
    2274           0 :         return NULL;
    2275             :     /* As a further sanity check, make sure there's a trailing null */
    2276       96724 :     if (buffer[query_offset + query_len] != '\0')
    2277           0 :         return NULL;
    2278             :     /* Looks OK */
    2279       96724 :     return buffer + query_offset;
    2280             : }
    2281             : 
    2282             : /*
    2283             :  * Do we need to garbage-collect the external query text file?
    2284             :  *
    2285             :  * Caller should hold at least a shared lock on pgss->lock.
    2286             :  */
    2287             : static bool
    2288       49400 : need_gc_qtexts(void)
    2289             : {
    2290             :     Size        extent;
    2291             : 
    2292             :     /* Read shared extent pointer */
    2293             :     {
    2294       49400 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2295             : 
    2296       49400 :         SpinLockAcquire(&s->mutex);
    2297       49400 :         extent = s->extent;
    2298       49400 :         SpinLockRelease(&s->mutex);
    2299             :     }
    2300             : 
    2301             :     /*
    2302             :      * Don't proceed if file does not exceed 512 bytes per possible entry.
    2303             :      *
    2304             :      * Here and in the next test, 32-bit machines have overflow hazards if
    2305             :      * pgss_max and/or mean_query_len are large.  Force the multiplications
    2306             :      * and comparisons to be done in uint64 arithmetic to forestall trouble.
    2307             :      */
    2308       49400 :     if ((uint64) extent < (uint64) 512 * pgss_max)
    2309       49400 :         return false;
    2310             : 
    2311             :     /*
    2312             :      * Don't proceed if file is less than about 50% bloat.  Nothing can or
    2313             :      * should be done in the event of unusually large query texts accounting
    2314             :      * for file's large size.  We go to the trouble of maintaining the mean
    2315             :      * query length in order to prevent garbage collection from thrashing
    2316             :      * uselessly.
    2317             :      */
    2318           0 :     if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
    2319           0 :         return false;
    2320             : 
    2321           0 :     return true;
    2322             : }
    2323             : 
    2324             : /*
    2325             :  * Garbage-collect orphaned query texts in external file.
    2326             :  *
    2327             :  * This won't be called often in the typical case, since it's likely that
    2328             :  * there won't be too much churn, and besides, a similar compaction process
    2329             :  * occurs when serializing to disk at shutdown or as part of resetting.
    2330             :  * Despite this, it seems prudent to plan for the edge case where the file
    2331             :  * becomes unreasonably large, with no other method of compaction likely to
    2332             :  * occur in the foreseeable future.
    2333             :  *
    2334             :  * The caller must hold an exclusive lock on pgss->lock.
    2335             :  *
    2336             :  * At the first sign of trouble we unlink the query text file to get a clean
    2337             :  * slate (although existing statistics are retained), rather than risk
    2338             :  * thrashing by allowing the same problem case to recur indefinitely.
    2339             :  */
    2340             : static void
    2341           0 : gc_qtexts(void)
    2342             : {
    2343             :     char       *qbuffer;
    2344             :     Size        qbuffer_size;
    2345           0 :     FILE       *qfile = NULL;
    2346             :     HASH_SEQ_STATUS hash_seq;
    2347             :     pgssEntry  *entry;
    2348             :     Size        extent;
    2349             :     int         nentries;
    2350             : 
    2351             :     /*
    2352             :      * When called from pgss_store, some other session might have proceeded
    2353             :      * with garbage collection in the no-lock-held interim of lock strength
    2354             :      * escalation.  Check once more that this is actually necessary.
    2355             :      */
    2356           0 :     if (!need_gc_qtexts())
    2357           0 :         return;
    2358             : 
    2359             :     /*
    2360             :      * Load the old texts file.  If we fail (out of memory, for instance),
    2361             :      * invalidate query texts.  Hopefully this is rare.  It might seem better
    2362             :      * to leave things alone on an OOM failure, but the problem is that the
    2363             :      * file is only going to get bigger; hoping for a future non-OOM result is
    2364             :      * risky and can easily lead to complete denial of service.
    2365             :      */
    2366           0 :     qbuffer = qtext_load_file(&qbuffer_size);
    2367           0 :     if (qbuffer == NULL)
    2368           0 :         goto gc_fail;
    2369             : 
    2370             :     /*
    2371             :      * We overwrite the query texts file in place, so as to reduce the risk of
    2372             :      * an out-of-disk-space failure.  Since the file is guaranteed not to get
    2373             :      * larger, this should always work on traditional filesystems; though we
    2374             :      * could still lose on copy-on-write filesystems.
    2375             :      */
    2376           0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2377           0 :     if (qfile == NULL)
    2378             :     {
    2379           0 :         ereport(LOG,
    2380             :                 (errcode_for_file_access(),
    2381             :                  errmsg("could not write file \"%s\": %m",
    2382             :                         PGSS_TEXT_FILE)));
    2383           0 :         goto gc_fail;
    2384             :     }
    2385             : 
    2386           0 :     extent = 0;
    2387           0 :     nentries = 0;
    2388             : 
    2389           0 :     hash_seq_init(&hash_seq, pgss_hash);
    2390           0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2391             :     {
    2392           0 :         int         query_len = entry->query_len;
    2393           0 :         char       *qry = qtext_fetch(entry->query_offset,
    2394             :                                       query_len,
    2395             :                                       qbuffer,
    2396             :                                       qbuffer_size);
    2397             : 
    2398           0 :         if (qry == NULL)
    2399             :         {
    2400             :             /* Trouble ... drop the text */
    2401           0 :             entry->query_offset = 0;
    2402           0 :             entry->query_len = -1;
    2403             :             /* entry will not be counted in mean query length computation */
    2404           0 :             continue;
    2405             :         }
    2406             : 
    2407           0 :         if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
    2408             :         {
    2409           0 :             ereport(LOG,
    2410             :                     (errcode_for_file_access(),
    2411             :                      errmsg("could not write file \"%s\": %m",
    2412             :                             PGSS_TEXT_FILE)));
    2413           0 :             hash_seq_term(&hash_seq);
    2414           0 :             goto gc_fail;
    2415             :         }
    2416             : 
    2417           0 :         entry->query_offset = extent;
    2418           0 :         extent += query_len + 1;
    2419           0 :         nentries++;
    2420             :     }
    2421             : 
    2422             :     /*
    2423             :      * Truncate away any now-unused space.  If this fails for some odd reason,
    2424             :      * we log it, but there's no need to fail.
    2425             :      */
    2426           0 :     if (ftruncate(fileno(qfile), extent) != 0)
    2427           0 :         ereport(LOG,
    2428             :                 (errcode_for_file_access(),
    2429             :                  errmsg("could not truncate file \"%s\": %m",
    2430             :                         PGSS_TEXT_FILE)));
    2431             : 
    2432           0 :     if (FreeFile(qfile))
    2433             :     {
    2434           0 :         ereport(LOG,
    2435             :                 (errcode_for_file_access(),
    2436             :                  errmsg("could not write file \"%s\": %m",
    2437             :                         PGSS_TEXT_FILE)));
    2438           0 :         qfile = NULL;
    2439           0 :         goto gc_fail;
    2440             :     }
    2441             : 
    2442           0 :     elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
    2443             :          pgss->extent, extent);
    2444             : 
    2445             :     /* Reset the shared extent pointer */
    2446           0 :     pgss->extent = extent;
    2447             : 
    2448             :     /*
    2449             :      * Also update the mean query length, to be sure that need_gc_qtexts()
    2450             :      * won't still think we have a problem.
    2451             :      */
    2452           0 :     if (nentries > 0)
    2453           0 :         pgss->mean_query_len = extent / nentries;
    2454             :     else
    2455           0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2456             : 
    2457           0 :     free(qbuffer);
    2458             : 
    2459             :     /*
    2460             :      * OK, count a garbage collection cycle.  (Note: even though we have
    2461             :      * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
    2462             :      * other processes may examine gc_count while holding only the mutex.
    2463             :      * Also, we have to advance the count *after* we've rewritten the file,
    2464             :      * else other processes might not realize they read a stale file.)
    2465             :      */
    2466           0 :     record_gc_qtexts();
    2467             : 
    2468           0 :     return;
    2469             : 
    2470           0 : gc_fail:
    2471             :     /* clean up resources */
    2472           0 :     if (qfile)
    2473           0 :         FreeFile(qfile);
    2474           0 :     free(qbuffer);
    2475             : 
    2476             :     /*
    2477             :      * Since the contents of the external file are now uncertain, mark all
    2478             :      * hashtable entries as having invalid texts.
    2479             :      */
    2480           0 :     hash_seq_init(&hash_seq, pgss_hash);
    2481           0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2482             :     {
    2483           0 :         entry->query_offset = 0;
    2484           0 :         entry->query_len = -1;
    2485             :     }
    2486             : 
    2487             :     /*
    2488             :      * Destroy the query text file and create a new, empty one
    2489             :      */
    2490           0 :     (void) unlink(PGSS_TEXT_FILE);
    2491           0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2492           0 :     if (qfile == NULL)
    2493           0 :         ereport(LOG,
    2494             :                 (errcode_for_file_access(),
    2495             :                  errmsg("could not recreate file \"%s\": %m",
    2496             :                         PGSS_TEXT_FILE)));
    2497             :     else
    2498           0 :         FreeFile(qfile);
    2499             : 
    2500             :     /* Reset the shared extent pointer */
    2501           0 :     pgss->extent = 0;
    2502             : 
    2503             :     /* Reset mean_query_len to match the new state */
    2504           0 :     pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2505             : 
    2506             :     /*
    2507             :      * Bump the GC count even though we failed.
    2508             :      *
    2509             :      * This is needed to make concurrent readers of file without any lock on
    2510             :      * pgss->lock notice existence of new version of file.  Once readers
    2511             :      * subsequently observe a change in GC count with pgss->lock held, that
    2512             :      * forces a safe reopen of file.  Writers also require that we bump here,
    2513             :      * of course.  (As required by locking protocol, readers and writers don't
    2514             :      * trust earlier file contents until gc_count is found unchanged after
    2515             :      * pgss->lock acquired in shared or exclusive mode respectively.)
    2516             :      */
    2517           0 :     record_gc_qtexts();
    2518             : }
    2519             : 
    2520             : /*
    2521             :  * Release entries corresponding to parameters passed.
    2522             :  */
    2523             : static void
    2524          82 : entry_reset(Oid userid, Oid dbid, uint64 queryid)
    2525             : {
    2526             :     HASH_SEQ_STATUS hash_seq;
    2527             :     pgssEntry  *entry;
    2528             :     FILE       *qfile;
    2529             :     long        num_entries;
    2530          82 :     long        num_remove = 0;
    2531             :     pgssHashKey key;
    2532             : 
    2533          82 :     if (!pgss || !pgss_hash)
    2534           0 :         ereport(ERROR,
    2535             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    2536             :                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
    2537             : 
    2538          82 :     LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
    2539          82 :     num_entries = hash_get_num_entries(pgss_hash);
    2540             : 
    2541          82 :     if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
    2542             :     {
    2543             :         /* If all the parameters are available, use the fast path. */
    2544           2 :         memset(&key, 0, sizeof(pgssHashKey));
    2545           2 :         key.userid = userid;
    2546           2 :         key.dbid = dbid;
    2547           2 :         key.queryid = queryid;
    2548             : 
    2549             :         /* Remove the key if it exists, starting with the top-level entry  */
    2550           2 :         key.toplevel = false;
    2551           2 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
    2552           2 :         if (entry)              /* found */
    2553           0 :             num_remove++;
    2554             : 
    2555             :         /* Also remove entries for top level statements */
    2556           2 :         key.toplevel = true;
    2557             : 
    2558             :         /* Remove the key if exists */
    2559           2 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
    2560           2 :         if (entry)              /* found */
    2561           2 :             num_remove++;
    2562             :     }
    2563          80 :     else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
    2564             :     {
    2565             :         /* Remove entries corresponding to valid parameters. */
    2566           6 :         hash_seq_init(&hash_seq, pgss_hash);
    2567          78 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2568             :         {
    2569          72 :             if ((!userid || entry->key.userid == userid) &&
    2570          52 :                 (!dbid || entry->key.dbid == dbid) &&
    2571          48 :                 (!queryid || entry->key.queryid == queryid))
    2572             :             {
    2573           8 :                 hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
    2574           8 :                 num_remove++;
    2575             :             }
    2576             :         }
    2577             :     }
    2578             :     else
    2579             :     {
    2580             :         /* Remove all entries. */
    2581          74 :         hash_seq_init(&hash_seq, pgss_hash);
    2582         672 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2583             :         {
    2584         598 :             hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
    2585         598 :             num_remove++;
    2586             :         }
    2587             :     }
    2588             : 
    2589             :     /* All entries are removed? */
    2590          82 :     if (num_entries != num_remove)
    2591           8 :         goto release_lock;
    2592             : 
    2593             :     /*
    2594             :      * Reset global statistics for pg_stat_statements since all entries are
    2595             :      * removed.
    2596             :      */
    2597             :     {
    2598          74 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2599          74 :         TimestampTz stats_reset = GetCurrentTimestamp();
    2600             : 
    2601          74 :         SpinLockAcquire(&s->mutex);
    2602          74 :         s->stats.dealloc = 0;
    2603          74 :         s->stats.stats_reset = stats_reset;
    2604          74 :         SpinLockRelease(&s->mutex);
    2605             :     }
    2606             : 
    2607             :     /*
    2608             :      * Write new empty query file, perhaps even creating a new one to recover
    2609             :      * if the file was missing.
    2610             :      */
    2611          74 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2612          74 :     if (qfile == NULL)
    2613             :     {
    2614           0 :         ereport(LOG,
    2615             :                 (errcode_for_file_access(),
    2616             :                  errmsg("could not create file \"%s\": %m",
    2617             :                         PGSS_TEXT_FILE)));
    2618           0 :         goto done;
    2619             :     }
    2620             : 
    2621             :     /* If ftruncate fails, log it, but it's not a fatal problem */
    2622          74 :     if (ftruncate(fileno(qfile), 0) != 0)
    2623           0 :         ereport(LOG,
    2624             :                 (errcode_for_file_access(),
    2625             :                  errmsg("could not truncate file \"%s\": %m",
    2626             :                         PGSS_TEXT_FILE)));
    2627             : 
    2628          74 :     FreeFile(qfile);
    2629             : 
    2630          74 : done:
    2631          74 :     pgss->extent = 0;
    2632             :     /* This counts as a query text garbage collection for our purposes */
    2633          74 :     record_gc_qtexts();
    2634             : 
    2635          82 : release_lock:
    2636          82 :     LWLockRelease(pgss->lock);
    2637          82 : }
    2638             : 
    2639             : /*
    2640             :  * Generate a normalized version of the query string that will be used to
    2641             :  * represent all similar queries.
    2642             :  *
    2643             :  * Note that the normalized representation may well vary depending on
    2644             :  * just which "equivalent" query is used to create the hashtable entry.
    2645             :  * We assume this is OK.
    2646             :  *
    2647             :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2648             :  * the original string start, so we need to translate the provided locations
    2649             :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2650             :  * of interest, so it's worth doing.)
    2651             :  *
    2652             :  * *query_len_p contains the input string length, and is updated with
    2653             :  * the result string length on exit.  The resulting string might be longer
    2654             :  * or shorter depending on what happens with replacement of constants.
    2655             :  *
    2656             :  * Returns a palloc'd string.
    2657             :  */
    2658             : static char *
    2659       17604 : generate_normalized_query(JumbleState *jstate, const char *query,
    2660             :                           int query_loc, int *query_len_p)
    2661             : {
    2662             :     char       *norm_query;
    2663       17604 :     int         query_len = *query_len_p;
    2664             :     int         i,
    2665             :                 norm_query_buflen,  /* Space allowed for norm_query */
    2666             :                 len_to_wrt,     /* Length (in bytes) to write */
    2667       17604 :                 quer_loc = 0,   /* Source query byte location */
    2668       17604 :                 n_quer_loc = 0, /* Normalized query byte location */
    2669       17604 :                 last_off = 0,   /* Offset from start for previous tok */
    2670       17604 :                 last_tok_len = 0;   /* Length (in bytes) of that tok */
    2671             : 
    2672             :     /*
    2673             :      * Get constants' lengths (core system only gives us locations).  Note
    2674             :      * this also ensures the items are sorted by location.
    2675             :      */
    2676       17604 :     fill_in_constant_lengths(jstate, query, query_loc);
    2677             : 
    2678             :     /*
    2679             :      * Allow for $n symbols to be longer than the constants they replace.
    2680             :      * Constants must take at least one byte in text form, while a $n symbol
    2681             :      * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
    2682             :      * could refine that limit based on the max value of n for the current
    2683             :      * query, but it hardly seems worth any extra effort to do so.
    2684             :      */
    2685       17604 :     norm_query_buflen = query_len + jstate->clocations_count * 10;
    2686             : 
    2687             :     /* Allocate result buffer */
    2688       17604 :     norm_query = palloc(norm_query_buflen + 1);
    2689             : 
    2690       73258 :     for (i = 0; i < jstate->clocations_count; i++)
    2691             :     {
    2692             :         int         off,        /* Offset from start for cur tok */
    2693             :                     tok_len;    /* Length (in bytes) of that tok */
    2694             : 
    2695       55654 :         off = jstate->clocations[i].location;
    2696             :         /* Adjust recorded location if we're dealing with partial string */
    2697       55654 :         off -= query_loc;
    2698             : 
    2699       55654 :         tok_len = jstate->clocations[i].length;
    2700             : 
    2701       55654 :         if (tok_len < 0)
    2702         320 :             continue;           /* ignore any duplicates */
    2703             : 
    2704             :         /* Copy next chunk (what precedes the next constant) */
    2705       55334 :         len_to_wrt = off - last_off;
    2706       55334 :         len_to_wrt -= last_tok_len;
    2707             : 
    2708             :         Assert(len_to_wrt >= 0);
    2709       55334 :         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2710       55334 :         n_quer_loc += len_to_wrt;
    2711             : 
    2712             :         /* And insert a param symbol in place of the constant token */
    2713      110668 :         n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
    2714       55334 :                               i + 1 + jstate->highest_extern_param_id);
    2715             : 
    2716       55334 :         quer_loc = off + tok_len;
    2717       55334 :         last_off = off;
    2718       55334 :         last_tok_len = tok_len;
    2719             :     }
    2720             : 
    2721             :     /*
    2722             :      * We've copied up until the last ignorable constant.  Copy over the
    2723             :      * remaining bytes of the original query string.
    2724             :      */
    2725       17604 :     len_to_wrt = query_len - quer_loc;
    2726             : 
    2727             :     Assert(len_to_wrt >= 0);
    2728       17604 :     memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2729       17604 :     n_quer_loc += len_to_wrt;
    2730             : 
    2731             :     Assert(n_quer_loc <= norm_query_buflen);
    2732       17604 :     norm_query[n_quer_loc] = '\0';
    2733             : 
    2734       17604 :     *query_len_p = n_quer_loc;
    2735       17604 :     return norm_query;
    2736             : }
    2737             : 
    2738             : /*
    2739             :  * Given a valid SQL string and an array of constant-location records,
    2740             :  * fill in the textual lengths of those constants.
    2741             :  *
    2742             :  * The constants may use any allowed constant syntax, such as float literals,
    2743             :  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
    2744             :  * accomplished by using the public API for the core scanner.
    2745             :  *
    2746             :  * It is the caller's job to ensure that the string is a valid SQL statement
    2747             :  * with constants at the indicated locations.  Since in practice the string
    2748             :  * has already been parsed, and the locations that the caller provides will
    2749             :  * have originated from within the authoritative parser, this should not be
    2750             :  * a problem.
    2751             :  *
    2752             :  * Duplicate constant pointers are possible, and will have their lengths
    2753             :  * marked as '-1', so that they are later ignored.  (Actually, we assume the
    2754             :  * lengths were initialized as -1 to start with, and don't change them here.)
    2755             :  *
    2756             :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2757             :  * the original string start, so we need to translate the provided locations
    2758             :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2759             :  * of interest, so it's worth doing.)
    2760             :  *
    2761             :  * N.B. There is an assumption that a '-' character at a Const location begins
    2762             :  * a negative numeric constant.  This precludes there ever being another
    2763             :  * reason for a constant to start with a '-'.
    2764             :  */
    2765             : static void
    2766       17604 : fill_in_constant_lengths(JumbleState *jstate, const char *query,
    2767             :                          int query_loc)
    2768             : {
    2769             :     LocationLen *locs;
    2770             :     core_yyscan_t yyscanner;
    2771             :     core_yy_extra_type yyextra;
    2772             :     core_YYSTYPE yylval;
    2773             :     YYLTYPE     yylloc;
    2774       17604 :     int         last_loc = -1;
    2775             :     int         i;
    2776             : 
    2777             :     /*
    2778             :      * Sort the records by location so that we can process them in order while
    2779             :      * scanning the query text.
    2780             :      */
    2781       17604 :     if (jstate->clocations_count > 1)
    2782       11510 :         qsort(jstate->clocations, jstate->clocations_count,
    2783             :               sizeof(LocationLen), comp_location);
    2784       17604 :     locs = jstate->clocations;
    2785             : 
    2786             :     /* initialize the flex scanner --- should match raw_parser() */
    2787       17604 :     yyscanner = scanner_init(query,
    2788             :                              &yyextra,
    2789             :                              &ScanKeywords,
    2790             :                              ScanKeywordTokens);
    2791             : 
    2792             :     /* we don't want to re-emit any escape string warnings */
    2793       17604 :     yyextra.escape_string_warning = false;
    2794             : 
    2795             :     /* Search for each constant, in sequence */
    2796       73258 :     for (i = 0; i < jstate->clocations_count; i++)
    2797             :     {
    2798       55654 :         int         loc = locs[i].location;
    2799             :         int         tok;
    2800             : 
    2801             :         /* Adjust recorded location if we're dealing with partial string */
    2802       55654 :         loc -= query_loc;
    2803             : 
    2804             :         Assert(loc >= 0);
    2805             : 
    2806       55654 :         if (loc <= last_loc)
    2807         320 :             continue;           /* Duplicate constant, ignore */
    2808             : 
    2809             :         /* Lex tokens until we find the desired constant */
    2810             :         for (;;)
    2811             :         {
    2812      414506 :             tok = core_yylex(&yylval, &yylloc, yyscanner);
    2813             : 
    2814             :             /* We should not hit end-of-string, but if we do, behave sanely */
    2815      414506 :             if (tok == 0)
    2816           0 :                 break;          /* out of inner for-loop */
    2817             : 
    2818             :             /*
    2819             :              * We should find the token position exactly, but if we somehow
    2820             :              * run past it, work with that.
    2821             :              */
    2822      414506 :             if (yylloc >= loc)
    2823             :             {
    2824       55334 :                 if (query[loc] == '-')
    2825             :                 {
    2826             :                     /*
    2827             :                      * It's a negative value - this is the one and only case
    2828             :                      * where we replace more than a single token.
    2829             :                      *
    2830             :                      * Do not compensate for the core system's special-case
    2831             :                      * adjustment of location to that of the leading '-'
    2832             :                      * operator in the event of a negative constant.  It is
    2833             :                      * also useful for our purposes to start from the minus
    2834             :                      * symbol.  In this way, queries like "select * from foo
    2835             :                      * where bar = 1" and "select * from foo where bar = -2"
    2836             :                      * will have identical normalized query strings.
    2837             :                      */
    2838         718 :                     tok = core_yylex(&yylval, &yylloc, yyscanner);
    2839         718 :                     if (tok == 0)
    2840           0 :                         break;  /* out of inner for-loop */
    2841             :                 }
    2842             : 
    2843             :                 /*
    2844             :                  * We now rely on the assumption that flex has placed a zero
    2845             :                  * byte after the text of the current token in scanbuf.
    2846             :                  */
    2847       55334 :                 locs[i].length = strlen(yyextra.scanbuf + loc);
    2848       55334 :                 break;          /* out of inner for-loop */
    2849             :             }
    2850             :         }
    2851             : 
    2852             :         /* If we hit end-of-string, give up, leaving remaining lengths -1 */
    2853       55334 :         if (tok == 0)
    2854           0 :             break;
    2855             : 
    2856       55334 :         last_loc = loc;
    2857             :     }
    2858             : 
    2859       17604 :     scanner_finish(yyscanner);
    2860       17604 : }
    2861             : 
    2862             : /*
    2863             :  * comp_location: comparator for qsorting LocationLen structs by location
    2864             :  */
    2865             : static int
    2866       65986 : comp_location(const void *a, const void *b)
    2867             : {
    2868       65986 :     int         l = ((const LocationLen *) a)->location;
    2869       65986 :     int         r = ((const LocationLen *) b)->location;
    2870             : 
    2871       65986 :     if (l < r)
    2872       44942 :         return -1;
    2873       21044 :     else if (l > r)
    2874       20710 :         return +1;
    2875             :     else
    2876         334 :         return 0;
    2877             : }

Generated by: LCOV version 1.14