Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_stat_statements.c
4 : * Track statement planning and execution times as well as resource
5 : * usage across a whole database cluster.
6 : *
7 : * Execution costs are totaled for each distinct source query, and kept in
8 : * a shared hashtable. (We track only as many distinct queries as will fit
9 : * in the designated amount of shared memory.)
10 : *
11 : * Starting in Postgres 9.2, this module normalized query entries. As of
12 : * Postgres 14, the normalization is done by the core if compute_query_id is
13 : * enabled, or optionally by third-party modules.
14 : *
15 : * To facilitate presenting entries to users, we create "representative" query
16 : * strings in which constants are replaced with parameter symbols ($n), to
17 : * make it clearer what a normalized entry can represent. To save on shared
18 : * memory, and to avoid having to truncate oversized query strings, we store
19 : * these strings in a temporary external query-texts file. Offsets into this
20 : * file are kept in shared memory.
21 : *
22 : * Note about locking issues: to create or delete an entry in the shared
23 : * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 : * in an entry except the counters requires the same. To look up an entry,
25 : * one must hold the lock shared. To read or update the counters within
26 : * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 : * disappear!) and also take the entry's mutex spinlock.
28 : * The shared state variable pgss->extent (the next free spot in the external
29 : * query-text file) should be accessed only while holding either the
30 : * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 : * allow reserving file space while holding only shared lock on pgss->lock.
32 : * Rewriting the entire external query-text file, eg for garbage collection,
33 : * requires holding pgss->lock exclusively; this allows individual entries
34 : * in the file to be read or written while holding only shared lock.
35 : *
36 : *
37 : * Copyright (c) 2008-2026, PostgreSQL Global Development Group
38 : *
39 : * IDENTIFICATION
40 : * contrib/pg_stat_statements/pg_stat_statements.c
41 : *
42 : *-------------------------------------------------------------------------
43 : */
44 : #include "postgres.h"
45 :
46 : #include <math.h>
47 : #include <sys/stat.h>
48 : #include <unistd.h>
49 :
50 : #include "access/htup_details.h"
51 : #include "access/parallel.h"
52 : #include "catalog/pg_authid.h"
53 : #include "common/int.h"
54 : #include "executor/instrument.h"
55 : #include "funcapi.h"
56 : #include "jit/jit.h"
57 : #include "mb/pg_wchar.h"
58 : #include "miscadmin.h"
59 : #include "nodes/queryjumble.h"
60 : #include "optimizer/planner.h"
61 : #include "parser/analyze.h"
62 : #include "parser/scanner.h"
63 : #include "pgstat.h"
64 : #include "storage/fd.h"
65 : #include "storage/ipc.h"
66 : #include "storage/lwlock.h"
67 : #include "storage/shmem.h"
68 : #include "storage/spin.h"
69 : #include "tcop/utility.h"
70 : #include "utils/acl.h"
71 : #include "utils/builtins.h"
72 : #include "utils/memutils.h"
73 : #include "utils/timestamp.h"
74 : #include "utils/tuplestore.h"
75 :
76 8 : PG_MODULE_MAGIC_EXT(
77 : .name = "pg_stat_statements",
78 : .version = PG_VERSION
79 : );
80 :
81 : /* Location of permanent stats file (valid when database is shut down) */
82 : #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
83 :
84 : /*
85 : * Location of external query text file.
86 : */
87 : #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
88 :
89 : /* Magic number identifying the stats file format */
90 : static const uint32 PGSS_FILE_HEADER = 0x20250731;
91 :
92 : /* PostgreSQL major version number, changes in which invalidate all entries */
93 : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
94 :
95 : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
96 : #define USAGE_EXEC(duration) (1.0)
97 : #define USAGE_INIT (1.0) /* including initial planning */
98 : #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
99 : #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
100 : #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
101 : #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
102 : #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
103 : #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
104 :
105 : /*
106 : * Extension version number, for supporting older extension versions' objects
107 : */
108 : typedef enum pgssVersion
109 : {
110 : PGSS_V1_0 = 0,
111 : PGSS_V1_1,
112 : PGSS_V1_2,
113 : PGSS_V1_3,
114 : PGSS_V1_8,
115 : PGSS_V1_9,
116 : PGSS_V1_10,
117 : PGSS_V1_11,
118 : PGSS_V1_12,
119 : PGSS_V1_13,
120 : } pgssVersion;
121 :
122 : typedef enum pgssStoreKind
123 : {
124 : PGSS_INVALID = -1,
125 :
126 : /*
127 : * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
128 : * reference the underlying values in the arrays in the Counters struct,
129 : * and this order is required in pg_stat_statements_internal().
130 : */
131 : PGSS_PLAN = 0,
132 : PGSS_EXEC,
133 : } pgssStoreKind;
134 :
135 : #define PGSS_NUMKIND (PGSS_EXEC + 1)
136 :
137 : /*
138 : * Hashtable key that defines the identity of a hashtable entry. We separate
139 : * queries by user and by database even if they are otherwise identical.
140 : *
141 : * If you add a new key to this struct, make sure to teach pgss_store() to
142 : * zero the padding bytes. Otherwise, things will break, because pgss_hash is
143 : * created using HASH_BLOBS, and thus tag_hash is used to hash this.
144 : */
145 : typedef struct pgssHashKey
146 : {
147 : Oid userid; /* user OID */
148 : Oid dbid; /* database OID */
149 : int64 queryid; /* query identifier */
150 : bool toplevel; /* query executed at top level */
151 : } pgssHashKey;
152 :
153 : /*
154 : * The actual stats counters kept within pgssEntry.
155 : */
156 : typedef struct Counters
157 : {
158 : int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
159 : double total_time[PGSS_NUMKIND]; /* total planning/execution time,
160 : * in msec */
161 : double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
162 : * msec since min/max reset */
163 : double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
164 : * msec since min/max reset */
165 : double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
166 : * msec */
167 : double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
168 : * planning/execution time in msec */
169 : int64 rows; /* total # of retrieved or affected rows */
170 : int64 shared_blks_hit; /* # of shared buffer hits */
171 : int64 shared_blks_read; /* # of shared disk blocks read */
172 : int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
173 : int64 shared_blks_written; /* # of shared disk blocks written */
174 : int64 local_blks_hit; /* # of local buffer hits */
175 : int64 local_blks_read; /* # of local disk blocks read */
176 : int64 local_blks_dirtied; /* # of local disk blocks dirtied */
177 : int64 local_blks_written; /* # of local disk blocks written */
178 : int64 temp_blks_read; /* # of temp blocks read */
179 : int64 temp_blks_written; /* # of temp blocks written */
180 : double shared_blk_read_time; /* time spent reading shared blocks,
181 : * in msec */
182 : double shared_blk_write_time; /* time spent writing shared blocks,
183 : * in msec */
184 : double local_blk_read_time; /* time spent reading local blocks, in
185 : * msec */
186 : double local_blk_write_time; /* time spent writing local blocks, in
187 : * msec */
188 : double temp_blk_read_time; /* time spent reading temp blocks, in msec */
189 : double temp_blk_write_time; /* time spent writing temp blocks, in
190 : * msec */
191 : double usage; /* usage factor */
192 : int64 wal_records; /* # of WAL records generated */
193 : int64 wal_fpi; /* # of WAL full page images generated */
194 : uint64 wal_bytes; /* total amount of WAL generated in bytes */
195 : int64 wal_buffers_full; /* # of times the WAL buffers became full */
196 : int64 jit_functions; /* total number of JIT functions emitted */
197 : double jit_generation_time; /* total time to generate jit code */
198 : int64 jit_inlining_count; /* number of times inlining time has been
199 : * > 0 */
200 : double jit_deform_time; /* total time to deform tuples in jit code */
201 : int64 jit_deform_count; /* number of times deform time has been >
202 : * 0 */
203 :
204 : double jit_inlining_time; /* total time to inline jit code */
205 : int64 jit_optimization_count; /* number of times optimization time
206 : * has been > 0 */
207 : double jit_optimization_time; /* total time to optimize jit code */
208 : int64 jit_emission_count; /* number of times emission time has been
209 : * > 0 */
210 : double jit_emission_time; /* total time to emit jit code */
211 : int64 parallel_workers_to_launch; /* # of parallel workers planned
212 : * to be launched */
213 : int64 parallel_workers_launched; /* # of parallel workers actually
214 : * launched */
215 : int64 generic_plan_calls; /* number of calls using a generic plan */
216 : int64 custom_plan_calls; /* number of calls using a custom plan */
217 : } Counters;
218 :
219 : /*
220 : * Global statistics for pg_stat_statements
221 : */
222 : typedef struct pgssGlobalStats
223 : {
224 : int64 dealloc; /* # of times entries were deallocated */
225 : TimestampTz stats_reset; /* timestamp with all stats reset */
226 : } pgssGlobalStats;
227 :
228 : /*
229 : * Statistics per statement
230 : *
231 : * Note: in event of a failure in garbage collection of the query text file,
232 : * we reset query_offset to zero and query_len to -1. This will be seen as
233 : * an invalid state by qtext_fetch().
234 : */
235 : typedef struct pgssEntry
236 : {
237 : pgssHashKey key; /* hash key of entry - MUST BE FIRST */
238 : Counters counters; /* the statistics for this query */
239 : Size query_offset; /* query text offset in external file */
240 : int query_len; /* # of valid bytes in query string, or -1 */
241 : int encoding; /* query text encoding */
242 : TimestampTz stats_since; /* timestamp of entry allocation */
243 : TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
244 : slock_t mutex; /* protects the counters only */
245 : } pgssEntry;
246 :
247 : /*
248 : * Global shared state
249 : */
250 : typedef struct pgssSharedState
251 : {
252 : LWLock *lock; /* protects hashtable search/modification */
253 : double cur_median_usage; /* current median usage in hashtable */
254 : Size mean_query_len; /* current mean entry text length */
255 : slock_t mutex; /* protects following fields only: */
256 : Size extent; /* current extent of query file */
257 : int n_writers; /* number of active writers to query file */
258 : int gc_count; /* query file garbage collection cycle count */
259 : pgssGlobalStats stats; /* global statistics for pgss */
260 : } pgssSharedState;
261 :
262 : /*---- Local variables ----*/
263 :
264 : /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
265 : static int nesting_level = 0;
266 :
267 : /* Saved hook values */
268 : static shmem_request_hook_type prev_shmem_request_hook = NULL;
269 : static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
270 : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
271 : static planner_hook_type prev_planner_hook = NULL;
272 : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
273 : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
274 : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
275 : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
276 : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
277 :
278 : /* Links to shared memory state */
279 : static pgssSharedState *pgss = NULL;
280 : static HTAB *pgss_hash = NULL;
281 :
282 : /*---- GUC variables ----*/
283 :
284 : typedef enum
285 : {
286 : PGSS_TRACK_NONE, /* track no statements */
287 : PGSS_TRACK_TOP, /* only top level statements */
288 : PGSS_TRACK_ALL, /* all statements, including nested ones */
289 : } PGSSTrackLevel;
290 :
291 : static const struct config_enum_entry track_options[] =
292 : {
293 : {"none", PGSS_TRACK_NONE, false},
294 : {"top", PGSS_TRACK_TOP, false},
295 : {"all", PGSS_TRACK_ALL, false},
296 : {NULL, 0, false}
297 : };
298 :
299 : static int pgss_max = 5000; /* max # statements to track */
300 : static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
301 : static bool pgss_track_utility = true; /* whether to track utility commands */
302 : static bool pgss_track_planning = false; /* whether to track planning
303 : * duration */
304 : static bool pgss_save = true; /* whether to save stats across shutdown */
305 :
306 : #define pgss_enabled(level) \
307 : (!IsParallelWorker() && \
308 : (pgss_track == PGSS_TRACK_ALL || \
309 : (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
310 :
311 : #define record_gc_qtexts() \
312 : do { \
313 : SpinLockAcquire(&pgss->mutex); \
314 : pgss->gc_count++; \
315 : SpinLockRelease(&pgss->mutex); \
316 : } while(0)
317 :
318 : /*---- Function declarations ----*/
319 :
320 6 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
321 6 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
322 20 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_11);
323 0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
324 6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
325 6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
326 6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
327 6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
328 6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_11);
329 6 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_12);
330 24 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_13);
331 0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
332 7 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
333 :
334 : static void pgss_shmem_request(void);
335 : static void pgss_shmem_startup(void);
336 : static void pgss_shmem_shutdown(int code, Datum arg);
337 : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
338 : JumbleState *jstate);
339 : static PlannedStmt *pgss_planner(Query *parse,
340 : const char *query_string,
341 : int cursorOptions,
342 : ParamListInfo boundParams,
343 : ExplainState *es);
344 : static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
345 : static void pgss_ExecutorRun(QueryDesc *queryDesc,
346 : ScanDirection direction,
347 : uint64 count);
348 : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
349 : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
350 : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
351 : bool readOnlyTree,
352 : ProcessUtilityContext context, ParamListInfo params,
353 : QueryEnvironment *queryEnv,
354 : DestReceiver *dest, QueryCompletion *qc);
355 : static void pgss_store(const char *query, int64 queryId,
356 : int query_location, int query_len,
357 : pgssStoreKind kind,
358 : double total_time, uint64 rows,
359 : const BufferUsage *bufusage,
360 : const WalUsage *walusage,
361 : const struct JitInstrumentation *jitusage,
362 : JumbleState *jstate,
363 : int parallel_workers_to_launch,
364 : int parallel_workers_launched,
365 : PlannedStmtOrigin planOrigin);
366 : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
367 : pgssVersion api_version,
368 : bool showtext);
369 : static Size pgss_memsize(void);
370 : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
371 : int encoding, bool sticky);
372 : static void entry_dealloc(void);
373 : static bool qtext_store(const char *query, int query_len,
374 : Size *query_offset, int *gc_count);
375 : static char *qtext_load_file(Size *buffer_size);
376 : static char *qtext_fetch(Size query_offset, int query_len,
377 : char *buffer, Size buffer_size);
378 : static bool need_gc_qtexts(void);
379 : static void gc_qtexts(void);
380 : static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
381 : static char *generate_normalized_query(JumbleState *jstate, const char *query,
382 : int query_loc, int *query_len_p);
383 : static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
384 : int query_loc);
385 : static int comp_location(const void *a, const void *b);
386 :
387 :
388 : /*
389 : * Module load callback
390 : */
391 : void
392 8 : _PG_init(void)
393 : {
394 : /*
395 : * In order to create our shared memory area, we have to be loaded via
396 : * shared_preload_libraries. If not, fall out without hooking into any of
397 : * the main system. (We don't throw error here because it seems useful to
398 : * allow the pg_stat_statements functions to be created even when the
399 : * module isn't active. The functions must protect themselves against
400 : * being called then, however.)
401 : */
402 8 : if (!process_shared_preload_libraries_in_progress)
403 1 : return;
404 :
405 : /*
406 : * Inform the postmaster that we want to enable query_id calculation if
407 : * compute_query_id is set to auto.
408 : */
409 7 : EnableQueryId();
410 :
411 : /*
412 : * Define (or redefine) custom GUC variables.
413 : */
414 7 : DefineCustomIntVariable("pg_stat_statements.max",
415 : "Sets the maximum number of statements tracked by pg_stat_statements.",
416 : NULL,
417 : &pgss_max,
418 : 5000,
419 : 100,
420 : INT_MAX / 2,
421 : PGC_POSTMASTER,
422 : 0,
423 : NULL,
424 : NULL,
425 : NULL);
426 :
427 7 : DefineCustomEnumVariable("pg_stat_statements.track",
428 : "Selects which statements are tracked by pg_stat_statements.",
429 : NULL,
430 : &pgss_track,
431 : PGSS_TRACK_TOP,
432 : track_options,
433 : PGC_SUSET,
434 : 0,
435 : NULL,
436 : NULL,
437 : NULL);
438 :
439 7 : DefineCustomBoolVariable("pg_stat_statements.track_utility",
440 : "Selects whether utility commands are tracked by pg_stat_statements.",
441 : NULL,
442 : &pgss_track_utility,
443 : true,
444 : PGC_SUSET,
445 : 0,
446 : NULL,
447 : NULL,
448 : NULL);
449 :
450 7 : DefineCustomBoolVariable("pg_stat_statements.track_planning",
451 : "Selects whether planning duration is tracked by pg_stat_statements.",
452 : NULL,
453 : &pgss_track_planning,
454 : false,
455 : PGC_SUSET,
456 : 0,
457 : NULL,
458 : NULL,
459 : NULL);
460 :
461 7 : DefineCustomBoolVariable("pg_stat_statements.save",
462 : "Save pg_stat_statements statistics across server shutdowns.",
463 : NULL,
464 : &pgss_save,
465 : true,
466 : PGC_SIGHUP,
467 : 0,
468 : NULL,
469 : NULL,
470 : NULL);
471 :
472 7 : MarkGUCPrefixReserved("pg_stat_statements");
473 :
474 : /*
475 : * Install hooks.
476 : */
477 7 : prev_shmem_request_hook = shmem_request_hook;
478 7 : shmem_request_hook = pgss_shmem_request;
479 7 : prev_shmem_startup_hook = shmem_startup_hook;
480 7 : shmem_startup_hook = pgss_shmem_startup;
481 7 : prev_post_parse_analyze_hook = post_parse_analyze_hook;
482 7 : post_parse_analyze_hook = pgss_post_parse_analyze;
483 7 : prev_planner_hook = planner_hook;
484 7 : planner_hook = pgss_planner;
485 7 : prev_ExecutorStart = ExecutorStart_hook;
486 7 : ExecutorStart_hook = pgss_ExecutorStart;
487 7 : prev_ExecutorRun = ExecutorRun_hook;
488 7 : ExecutorRun_hook = pgss_ExecutorRun;
489 7 : prev_ExecutorFinish = ExecutorFinish_hook;
490 7 : ExecutorFinish_hook = pgss_ExecutorFinish;
491 7 : prev_ExecutorEnd = ExecutorEnd_hook;
492 7 : ExecutorEnd_hook = pgss_ExecutorEnd;
493 7 : prev_ProcessUtility = ProcessUtility_hook;
494 7 : ProcessUtility_hook = pgss_ProcessUtility;
495 : }
496 :
497 : /*
498 : * shmem_request hook: request additional shared resources. We'll allocate or
499 : * attach to the shared resources in pgss_shmem_startup().
500 : */
501 : static void
502 7 : pgss_shmem_request(void)
503 : {
504 7 : if (prev_shmem_request_hook)
505 0 : prev_shmem_request_hook();
506 :
507 7 : RequestAddinShmemSpace(pgss_memsize());
508 7 : RequestNamedLWLockTranche("pg_stat_statements", 1);
509 7 : }
510 :
511 : /*
512 : * shmem_startup hook: allocate or attach to shared memory,
513 : * then load any pre-existing statistics from file.
514 : * Also create and load the query-texts file, which is expected to exist
515 : * (even if empty) while the module is enabled.
516 : */
517 : static void
518 7 : pgss_shmem_startup(void)
519 : {
520 : bool found;
521 : HASHCTL info;
522 7 : FILE *file = NULL;
523 7 : FILE *qfile = NULL;
524 : uint32 header;
525 : int32 num;
526 : int32 pgver;
527 : int32 i;
528 : int buffer_size;
529 7 : char *buffer = NULL;
530 :
531 7 : if (prev_shmem_startup_hook)
532 0 : prev_shmem_startup_hook();
533 :
534 : /* reset in case this is a restart within the postmaster */
535 7 : pgss = NULL;
536 7 : pgss_hash = NULL;
537 :
538 : /*
539 : * Create or attach to the shared memory state, including hash table
540 : */
541 7 : LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
542 :
543 7 : pgss = ShmemInitStruct("pg_stat_statements",
544 : sizeof(pgssSharedState),
545 : &found);
546 :
547 7 : if (!found)
548 : {
549 : /* First time through ... */
550 7 : pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
551 7 : pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
552 7 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
553 7 : SpinLockInit(&pgss->mutex);
554 7 : pgss->extent = 0;
555 7 : pgss->n_writers = 0;
556 7 : pgss->gc_count = 0;
557 7 : pgss->stats.dealloc = 0;
558 7 : pgss->stats.stats_reset = GetCurrentTimestamp();
559 : }
560 :
561 7 : info.keysize = sizeof(pgssHashKey);
562 7 : info.entrysize = sizeof(pgssEntry);
563 7 : pgss_hash = ShmemInitHash("pg_stat_statements hash",
564 : pgss_max, pgss_max,
565 : &info,
566 : HASH_ELEM | HASH_BLOBS);
567 :
568 7 : LWLockRelease(AddinShmemInitLock);
569 :
570 : /*
571 : * If we're in the postmaster (or a standalone backend...), set up a shmem
572 : * exit hook to dump the statistics to disk.
573 : */
574 7 : if (!IsUnderPostmaster)
575 7 : on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
576 :
577 : /*
578 : * Done if some other process already completed our initialization.
579 : */
580 7 : if (found)
581 7 : return;
582 :
583 : /*
584 : * Note: we don't bother with locks here, because there should be no other
585 : * processes running when this code is reached.
586 : */
587 :
588 : /* Unlink query text file possibly left over from crash */
589 7 : unlink(PGSS_TEXT_FILE);
590 :
591 : /* Allocate new query text temp file */
592 7 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
593 7 : if (qfile == NULL)
594 0 : goto write_error;
595 :
596 : /*
597 : * If we were told not to load old statistics, we're done. (Note we do
598 : * not try to unlink any old dump file in this case. This seems a bit
599 : * questionable but it's the historical behavior.)
600 : */
601 7 : if (!pgss_save)
602 : {
603 1 : FreeFile(qfile);
604 1 : return;
605 : }
606 :
607 : /*
608 : * Attempt to load old statistics from the dump file.
609 : */
610 6 : file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
611 6 : if (file == NULL)
612 : {
613 4 : if (errno != ENOENT)
614 0 : goto read_error;
615 : /* No existing persisted stats file, so we're done */
616 4 : FreeFile(qfile);
617 4 : return;
618 : }
619 :
620 2 : buffer_size = 2048;
621 2 : buffer = (char *) palloc(buffer_size);
622 :
623 4 : if (fread(&header, sizeof(uint32), 1, file) != 1 ||
624 4 : fread(&pgver, sizeof(uint32), 1, file) != 1 ||
625 2 : fread(&num, sizeof(int32), 1, file) != 1)
626 0 : goto read_error;
627 :
628 2 : if (header != PGSS_FILE_HEADER ||
629 2 : pgver != PGSS_PG_MAJOR_VERSION)
630 0 : goto data_error;
631 :
632 28292 : for (i = 0; i < num; i++)
633 : {
634 : pgssEntry temp;
635 : pgssEntry *entry;
636 : Size query_offset;
637 :
638 28290 : if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
639 0 : goto read_error;
640 :
641 : /* Encoding is the only field we can easily sanity-check */
642 28290 : if (!PG_VALID_BE_ENCODING(temp.encoding))
643 0 : goto data_error;
644 :
645 : /* Resize buffer as needed */
646 28290 : if (temp.query_len >= buffer_size)
647 : {
648 3 : buffer_size = Max(buffer_size * 2, temp.query_len + 1);
649 3 : buffer = repalloc(buffer, buffer_size);
650 : }
651 :
652 28290 : if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
653 0 : goto read_error;
654 :
655 : /* Should have a trailing null, but let's make sure */
656 28290 : buffer[temp.query_len] = '\0';
657 :
658 : /* Skip loading "sticky" entries */
659 28290 : if (IS_STICKY(temp.counters))
660 763 : continue;
661 :
662 : /* Store the query text */
663 27527 : query_offset = pgss->extent;
664 27527 : if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
665 0 : goto write_error;
666 27527 : pgss->extent += temp.query_len + 1;
667 :
668 : /* make the hashtable entry (discards old entries if too many) */
669 27527 : entry = entry_alloc(&temp.key, query_offset, temp.query_len,
670 : temp.encoding,
671 : false);
672 :
673 : /* copy in the actual stats */
674 27527 : entry->counters = temp.counters;
675 27527 : entry->stats_since = temp.stats_since;
676 27527 : entry->minmax_stats_since = temp.minmax_stats_since;
677 : }
678 :
679 : /* Read global statistics for pg_stat_statements */
680 2 : if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
681 0 : goto read_error;
682 :
683 2 : pfree(buffer);
684 2 : FreeFile(file);
685 2 : FreeFile(qfile);
686 :
687 : /*
688 : * Remove the persisted stats file so it's not included in
689 : * backups/replication standbys, etc. A new file will be written on next
690 : * shutdown.
691 : *
692 : * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
693 : * because we remove that file on startup; it acts inversely to
694 : * PGSS_DUMP_FILE, in that it is only supposed to be around when the
695 : * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
696 : * when the server is not running. Leaving the file creates no danger of
697 : * a newly restored database having a spurious record of execution costs,
698 : * which is what we're really concerned about here.
699 : */
700 2 : unlink(PGSS_DUMP_FILE);
701 :
702 2 : return;
703 :
704 0 : read_error:
705 0 : ereport(LOG,
706 : (errcode_for_file_access(),
707 : errmsg("could not read file \"%s\": %m",
708 : PGSS_DUMP_FILE)));
709 0 : goto fail;
710 0 : data_error:
711 0 : ereport(LOG,
712 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
713 : errmsg("ignoring invalid data in file \"%s\"",
714 : PGSS_DUMP_FILE)));
715 0 : goto fail;
716 0 : write_error:
717 0 : ereport(LOG,
718 : (errcode_for_file_access(),
719 : errmsg("could not write file \"%s\": %m",
720 : PGSS_TEXT_FILE)));
721 0 : fail:
722 0 : if (buffer)
723 0 : pfree(buffer);
724 0 : if (file)
725 0 : FreeFile(file);
726 0 : if (qfile)
727 0 : FreeFile(qfile);
728 : /* If possible, throw away the bogus file; ignore any error */
729 0 : unlink(PGSS_DUMP_FILE);
730 :
731 : /*
732 : * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
733 : * server is running with pg_stat_statements enabled
734 : */
735 : }
736 :
737 : /*
738 : * shmem_shutdown hook: Dump statistics into file.
739 : *
740 : * Note: we don't bother with acquiring lock, because there should be no
741 : * other processes running when this is called.
742 : */
743 : static void
744 7 : pgss_shmem_shutdown(int code, Datum arg)
745 : {
746 : FILE *file;
747 7 : char *qbuffer = NULL;
748 7 : Size qbuffer_size = 0;
749 : HASH_SEQ_STATUS hash_seq;
750 : int32 num_entries;
751 : pgssEntry *entry;
752 :
753 : /* Don't try to dump during a crash. */
754 7 : if (code)
755 7 : return;
756 :
757 : /* Safety check ... shouldn't get here unless shmem is set up. */
758 7 : if (!pgss || !pgss_hash)
759 0 : return;
760 :
761 : /* Don't dump if told not to. */
762 7 : if (!pgss_save)
763 2 : return;
764 :
765 5 : file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
766 5 : if (file == NULL)
767 0 : goto error;
768 :
769 5 : if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
770 0 : goto error;
771 5 : if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
772 0 : goto error;
773 5 : num_entries = hash_get_num_entries(pgss_hash);
774 5 : if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
775 0 : goto error;
776 :
777 5 : qbuffer = qtext_load_file(&qbuffer_size);
778 5 : if (qbuffer == NULL)
779 0 : goto error;
780 :
781 : /*
782 : * When serializing to disk, we store query texts immediately after their
783 : * entry data. Any orphaned query texts are thereby excluded.
784 : */
785 5 : hash_seq_init(&hash_seq, pgss_hash);
786 56896 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
787 : {
788 56891 : int len = entry->query_len;
789 56891 : char *qstr = qtext_fetch(entry->query_offset, len,
790 : qbuffer, qbuffer_size);
791 :
792 56891 : if (qstr == NULL)
793 0 : continue; /* Ignore any entries with bogus texts */
794 :
795 56891 : if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
796 56891 : fwrite(qstr, 1, len + 1, file) != len + 1)
797 : {
798 : /* note: we assume hash_seq_term won't change errno */
799 0 : hash_seq_term(&hash_seq);
800 0 : goto error;
801 : }
802 : }
803 :
804 : /* Dump global statistics for pg_stat_statements */
805 5 : if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
806 0 : goto error;
807 :
808 5 : pfree(qbuffer);
809 5 : qbuffer = NULL;
810 :
811 5 : if (FreeFile(file))
812 : {
813 0 : file = NULL;
814 0 : goto error;
815 : }
816 :
817 : /*
818 : * Rename file into place, so we atomically replace any old one.
819 : */
820 5 : (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
821 :
822 : /* Unlink query-texts file; it's not needed while shutdown */
823 5 : unlink(PGSS_TEXT_FILE);
824 :
825 5 : return;
826 :
827 0 : error:
828 0 : ereport(LOG,
829 : (errcode_for_file_access(),
830 : errmsg("could not write file \"%s\": %m",
831 : PGSS_DUMP_FILE ".tmp")));
832 0 : if (qbuffer)
833 0 : pfree(qbuffer);
834 0 : if (file)
835 0 : FreeFile(file);
836 0 : unlink(PGSS_DUMP_FILE ".tmp");
837 0 : unlink(PGSS_TEXT_FILE);
838 : }
839 :
840 : /*
841 : * Post-parse-analysis hook: mark query with a queryId
842 : */
843 : static void
844 82691 : pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
845 : {
846 82691 : if (prev_post_parse_analyze_hook)
847 0 : prev_post_parse_analyze_hook(pstate, query, jstate);
848 :
849 : /* Safety check... */
850 82691 : if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
851 12780 : return;
852 :
853 : /*
854 : * If it's EXECUTE, clear the queryId so that stats will accumulate for
855 : * the underlying PREPARE. But don't do this if we're not tracking
856 : * utility statements, to avoid messing up another extension that might be
857 : * tracking them.
858 : */
859 69911 : if (query->utilityStmt)
860 : {
861 31374 : if (pgss_track_utility && IsA(query->utilityStmt, ExecuteStmt))
862 : {
863 3340 : query->queryId = INT64CONST(0);
864 3340 : return;
865 : }
866 : }
867 :
868 : /*
869 : * If query jumbling were able to identify any ignorable constants, we
870 : * immediately create a hash table entry for the query, so that we can
871 : * record the normalized form of the query string. If there were no such
872 : * constants, the normalized string would be the same as the query text
873 : * anyway, so there's no need for an early entry.
874 : */
875 66571 : if (jstate && jstate->clocations_count > 0)
876 38383 : pgss_store(pstate->p_sourcetext,
877 : query->queryId,
878 : query->stmt_location,
879 : query->stmt_len,
880 : PGSS_INVALID,
881 : 0,
882 : 0,
883 : NULL,
884 : NULL,
885 : NULL,
886 : jstate,
887 : 0,
888 : 0,
889 : PLAN_STMT_UNKNOWN);
890 : }
891 :
892 : /*
893 : * Planner hook: forward to regular planner, but measure planning time
894 : * if needed.
895 : */
896 : static PlannedStmt *
897 49832 : pgss_planner(Query *parse,
898 : const char *query_string,
899 : int cursorOptions,
900 : ParamListInfo boundParams,
901 : ExplainState *es)
902 : {
903 : PlannedStmt *result;
904 :
905 : /*
906 : * We can't process the query if no query_string is provided, as
907 : * pgss_store needs it. We also ignore query without queryid, as it would
908 : * be treated as a utility statement, which may not be the case.
909 : */
910 49832 : if (pgss_enabled(nesting_level)
911 38681 : && pgss_track_planning && query_string
912 150 : && parse->queryId != INT64CONST(0))
913 150 : {
914 : instr_time start;
915 : instr_time duration;
916 : BufferUsage bufusage_start,
917 : bufusage;
918 : WalUsage walusage_start,
919 : walusage;
920 :
921 : /* We need to track buffer usage as the planner can access them. */
922 150 : bufusage_start = pgBufferUsage;
923 :
924 : /*
925 : * Similarly the planner could write some WAL records in some cases
926 : * (e.g. setting a hint bit with those being WAL-logged)
927 : */
928 150 : walusage_start = pgWalUsage;
929 150 : INSTR_TIME_SET_CURRENT(start);
930 :
931 150 : nesting_level++;
932 150 : PG_TRY();
933 : {
934 150 : if (prev_planner_hook)
935 0 : result = prev_planner_hook(parse, query_string, cursorOptions,
936 : boundParams, es);
937 : else
938 150 : result = standard_planner(parse, query_string, cursorOptions,
939 : boundParams, es);
940 : }
941 0 : PG_FINALLY();
942 : {
943 150 : nesting_level--;
944 : }
945 150 : PG_END_TRY();
946 :
947 150 : INSTR_TIME_SET_CURRENT(duration);
948 150 : INSTR_TIME_SUBTRACT(duration, start);
949 :
950 : /* calc differences of buffer counters. */
951 150 : memset(&bufusage, 0, sizeof(BufferUsage));
952 150 : BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
953 :
954 : /* calc differences of WAL counters. */
955 150 : memset(&walusage, 0, sizeof(WalUsage));
956 150 : WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
957 :
958 150 : pgss_store(query_string,
959 : parse->queryId,
960 : parse->stmt_location,
961 : parse->stmt_len,
962 : PGSS_PLAN,
963 150 : INSTR_TIME_GET_MILLISEC(duration),
964 : 0,
965 : &bufusage,
966 : &walusage,
967 : NULL,
968 : NULL,
969 : 0,
970 : 0,
971 : result->planOrigin);
972 : }
973 : else
974 : {
975 : /*
976 : * Even though we're not tracking plan time for this statement, we
977 : * must still increment the nesting level, to ensure that functions
978 : * evaluated during planning are not seen as top-level calls.
979 : */
980 49682 : nesting_level++;
981 49682 : PG_TRY();
982 : {
983 49682 : if (prev_planner_hook)
984 0 : result = prev_planner_hook(parse, query_string, cursorOptions,
985 : boundParams, es);
986 : else
987 49682 : result = standard_planner(parse, query_string, cursorOptions,
988 : boundParams, es);
989 : }
990 771 : PG_FINALLY();
991 : {
992 49682 : nesting_level--;
993 : }
994 49682 : PG_END_TRY();
995 : }
996 :
997 49061 : return result;
998 : }
999 :
1000 : /*
1001 : * ExecutorStart hook: start up tracking if needed
1002 : */
1003 : static void
1004 60443 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
1005 : {
1006 60443 : if (prev_ExecutorStart)
1007 0 : prev_ExecutorStart(queryDesc, eflags);
1008 : else
1009 60443 : standard_ExecutorStart(queryDesc, eflags);
1010 :
1011 : /*
1012 : * If query has queryId zero, don't track it. This prevents double
1013 : * counting of optimizable statements that are directly contained in
1014 : * utility statements.
1015 : */
1016 60167 : if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1017 : {
1018 : /*
1019 : * Set up to track total elapsed time in ExecutorRun. Make sure the
1020 : * space is allocated in the per-query context so it will go away at
1021 : * ExecutorEnd.
1022 : */
1023 40610 : if (queryDesc->totaltime == NULL)
1024 : {
1025 : MemoryContext oldcxt;
1026 :
1027 40610 : oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1028 40610 : queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1029 40610 : MemoryContextSwitchTo(oldcxt);
1030 : }
1031 : }
1032 60167 : }
1033 :
1034 : /*
1035 : * ExecutorRun hook: all we need do is track nesting depth
1036 : */
1037 : static void
1038 58843 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1039 : {
1040 58843 : nesting_level++;
1041 58843 : PG_TRY();
1042 : {
1043 58843 : if (prev_ExecutorRun)
1044 0 : prev_ExecutorRun(queryDesc, direction, count);
1045 : else
1046 58843 : standard_ExecutorRun(queryDesc, direction, count);
1047 : }
1048 3498 : PG_FINALLY();
1049 : {
1050 58843 : nesting_level--;
1051 : }
1052 58843 : PG_END_TRY();
1053 55345 : }
1054 :
1055 : /*
1056 : * ExecutorFinish hook: all we need do is track nesting depth
1057 : */
1058 : static void
1059 53283 : pgss_ExecutorFinish(QueryDesc *queryDesc)
1060 : {
1061 53283 : nesting_level++;
1062 53283 : PG_TRY();
1063 : {
1064 53283 : if (prev_ExecutorFinish)
1065 0 : prev_ExecutorFinish(queryDesc);
1066 : else
1067 53283 : standard_ExecutorFinish(queryDesc);
1068 : }
1069 167 : PG_FINALLY();
1070 : {
1071 53283 : nesting_level--;
1072 : }
1073 53283 : PG_END_TRY();
1074 53116 : }
1075 :
1076 : /*
1077 : * ExecutorEnd hook: store results if needed
1078 : */
1079 : static void
1080 56175 : pgss_ExecutorEnd(QueryDesc *queryDesc)
1081 : {
1082 56175 : int64 queryId = queryDesc->plannedstmt->queryId;
1083 :
1084 56175 : if (queryId != INT64CONST(0) && queryDesc->totaltime &&
1085 39017 : pgss_enabled(nesting_level))
1086 : {
1087 : /*
1088 : * Make sure stats accumulation is done. (Note: it's okay if several
1089 : * levels of hook all do this.)
1090 : */
1091 39017 : InstrEndLoop(queryDesc->totaltime);
1092 :
1093 38932 : pgss_store(queryDesc->sourceText,
1094 : queryId,
1095 39017 : queryDesc->plannedstmt->stmt_location,
1096 39017 : queryDesc->plannedstmt->stmt_len,
1097 : PGSS_EXEC,
1098 39017 : INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total),
1099 39017 : queryDesc->estate->es_total_processed,
1100 39017 : &queryDesc->totaltime->bufusage,
1101 39017 : &queryDesc->totaltime->walusage,
1102 85 : queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1103 : NULL,
1104 39017 : queryDesc->estate->es_parallel_workers_to_launch,
1105 39017 : queryDesc->estate->es_parallel_workers_launched,
1106 39017 : queryDesc->plannedstmt->planOrigin);
1107 : }
1108 :
1109 56175 : if (prev_ExecutorEnd)
1110 0 : prev_ExecutorEnd(queryDesc);
1111 : else
1112 56175 : standard_ExecutorEnd(queryDesc);
1113 56175 : }
1114 :
1115 : /*
1116 : * ProcessUtility hook
1117 : */
1118 : static void
1119 36863 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1120 : bool readOnlyTree,
1121 : ProcessUtilityContext context,
1122 : ParamListInfo params, QueryEnvironment *queryEnv,
1123 : DestReceiver *dest, QueryCompletion *qc)
1124 : {
1125 36863 : Node *parsetree = pstmt->utilityStmt;
1126 36863 : int64 saved_queryId = pstmt->queryId;
1127 36863 : int saved_stmt_location = pstmt->stmt_location;
1128 36863 : int saved_stmt_len = pstmt->stmt_len;
1129 36863 : bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
1130 :
1131 : /*
1132 : * Force utility statements to get queryId zero. We do this even in cases
1133 : * where the statement contains an optimizable statement for which a
1134 : * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1135 : * cases, runtime control will first go through ProcessUtility and then
1136 : * the executor, and we don't want the executor hooks to do anything,
1137 : * since we are already measuring the statement's costs at the utility
1138 : * level.
1139 : *
1140 : * Note that this is only done if pg_stat_statements is enabled and
1141 : * configured to track utility statements, in the unlikely possibility
1142 : * that user configured another extension to handle utility statements
1143 : * only.
1144 : */
1145 36863 : if (enabled)
1146 31259 : pstmt->queryId = INT64CONST(0);
1147 :
1148 : /*
1149 : * If it's an EXECUTE statement, we don't track it and don't increment the
1150 : * nesting level. This allows the cycles to be charged to the underlying
1151 : * PREPARE instead (by the Executor hooks), which is much more useful.
1152 : *
1153 : * We also don't track execution of PREPARE. If we did, we would get one
1154 : * hash table entry for the PREPARE (with hash calculated from the query
1155 : * string), and then a different one with the same query string (but hash
1156 : * calculated from the query tree) would be used to accumulate costs of
1157 : * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1158 : * actually run the planner (only parse+rewrite), its costs are generally
1159 : * pretty negligible and it seems okay to just ignore it.
1160 : */
1161 36863 : if (enabled &&
1162 31259 : !IsA(parsetree, ExecuteStmt) &&
1163 27925 : !IsA(parsetree, PrepareStmt))
1164 25142 : {
1165 : instr_time start;
1166 : instr_time duration;
1167 : uint64 rows;
1168 : BufferUsage bufusage_start,
1169 : bufusage;
1170 : WalUsage walusage_start,
1171 : walusage;
1172 :
1173 27797 : bufusage_start = pgBufferUsage;
1174 27797 : walusage_start = pgWalUsage;
1175 27797 : INSTR_TIME_SET_CURRENT(start);
1176 :
1177 27797 : nesting_level++;
1178 27797 : PG_TRY();
1179 : {
1180 27797 : if (prev_ProcessUtility)
1181 0 : prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1182 : context, params, queryEnv,
1183 : dest, qc);
1184 : else
1185 27797 : standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1186 : context, params, queryEnv,
1187 : dest, qc);
1188 : }
1189 2655 : PG_FINALLY();
1190 : {
1191 27797 : nesting_level--;
1192 : }
1193 27797 : PG_END_TRY();
1194 :
1195 : /*
1196 : * CAUTION: do not access the *pstmt data structure again below here.
1197 : * If it was a ROLLBACK or similar, that data structure may have been
1198 : * freed. We must copy everything we still need into local variables,
1199 : * which we did above.
1200 : *
1201 : * For the same reason, we can't risk restoring pstmt->queryId to its
1202 : * former value, which'd otherwise be a good idea.
1203 : */
1204 :
1205 25142 : INSTR_TIME_SET_CURRENT(duration);
1206 25142 : INSTR_TIME_SUBTRACT(duration, start);
1207 :
1208 : /*
1209 : * Track the total number of rows retrieved or affected by the utility
1210 : * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1211 : * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1212 : */
1213 25139 : rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1214 23340 : qc->commandTag == CMDTAG_FETCH ||
1215 23080 : qc->commandTag == CMDTAG_SELECT ||
1216 22887 : qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1217 50281 : qc->nprocessed : 0;
1218 :
1219 : /* calc differences of buffer counters. */
1220 25142 : memset(&bufusage, 0, sizeof(BufferUsage));
1221 25142 : BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1222 :
1223 : /* calc differences of WAL counters. */
1224 25142 : memset(&walusage, 0, sizeof(WalUsage));
1225 25142 : WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1226 :
1227 25142 : pgss_store(queryString,
1228 : saved_queryId,
1229 : saved_stmt_location,
1230 : saved_stmt_len,
1231 : PGSS_EXEC,
1232 25142 : INSTR_TIME_GET_MILLISEC(duration),
1233 : rows,
1234 : &bufusage,
1235 : &walusage,
1236 : NULL,
1237 : NULL,
1238 : 0,
1239 : 0,
1240 : pstmt->planOrigin);
1241 : }
1242 : else
1243 : {
1244 : /*
1245 : * Even though we're not tracking execution time for this statement,
1246 : * we must still increment the nesting level, to ensure that functions
1247 : * evaluated within it are not seen as top-level calls. But don't do
1248 : * so for EXECUTE; that way, when control reaches pgss_planner or
1249 : * pgss_ExecutorStart, we will treat the costs as top-level if
1250 : * appropriate. Likewise, don't bump for PREPARE, so that parse
1251 : * analysis will treat the statement as top-level if appropriate.
1252 : *
1253 : * To be absolutely certain we don't mess up the nesting level,
1254 : * evaluate the bump_level condition just once.
1255 : */
1256 9066 : bool bump_level =
1257 14797 : !IsA(parsetree, ExecuteStmt) &&
1258 5731 : !IsA(parsetree, PrepareStmt);
1259 :
1260 9066 : if (bump_level)
1261 5602 : nesting_level++;
1262 9066 : PG_TRY();
1263 : {
1264 9066 : if (prev_ProcessUtility)
1265 0 : prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1266 : context, params, queryEnv,
1267 : dest, qc);
1268 : else
1269 9066 : standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1270 : context, params, queryEnv,
1271 : dest, qc);
1272 : }
1273 138 : PG_FINALLY();
1274 : {
1275 9066 : if (bump_level)
1276 5602 : nesting_level--;
1277 : }
1278 9066 : PG_END_TRY();
1279 : }
1280 34070 : }
1281 :
1282 : /*
1283 : * Store some statistics for a statement.
1284 : *
1285 : * If jstate is not NULL then we're trying to create an entry for which
1286 : * we have no statistics as yet; we just want to record the normalized
1287 : * query string. total_time, rows, bufusage and walusage are ignored in this
1288 : * case.
1289 : *
1290 : * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1291 : * for the arrays in the Counters field.
1292 : */
1293 : static void
1294 102692 : pgss_store(const char *query, int64 queryId,
1295 : int query_location, int query_len,
1296 : pgssStoreKind kind,
1297 : double total_time, uint64 rows,
1298 : const BufferUsage *bufusage,
1299 : const WalUsage *walusage,
1300 : const struct JitInstrumentation *jitusage,
1301 : JumbleState *jstate,
1302 : int parallel_workers_to_launch,
1303 : int parallel_workers_launched,
1304 : PlannedStmtOrigin planOrigin)
1305 : {
1306 : pgssHashKey key;
1307 : pgssEntry *entry;
1308 102692 : char *norm_query = NULL;
1309 102692 : int encoding = GetDatabaseEncoding();
1310 :
1311 : Assert(query != NULL);
1312 :
1313 : /* Safety check... */
1314 102692 : if (!pgss || !pgss_hash)
1315 0 : return;
1316 :
1317 : /*
1318 : * Nothing to do if compute_query_id isn't enabled and no other module
1319 : * computed a query identifier.
1320 : */
1321 102692 : if (queryId == INT64CONST(0))
1322 0 : return;
1323 :
1324 : /*
1325 : * Confine our attention to the relevant part of the string, if the query
1326 : * is a portion of a multi-statement source string, and update query
1327 : * location and length if needed.
1328 : */
1329 102692 : query = CleanQuerytext(query, &query_location, &query_len);
1330 :
1331 : /* Set up key for hashtable search */
1332 :
1333 : /* clear padding */
1334 102692 : memset(&key, 0, sizeof(pgssHashKey));
1335 :
1336 102692 : key.userid = GetUserId();
1337 102692 : key.dbid = MyDatabaseId;
1338 102692 : key.queryid = queryId;
1339 102692 : key.toplevel = (nesting_level == 0);
1340 :
1341 : /* Lookup the hash table entry with shared lock. */
1342 102692 : LWLockAcquire(pgss->lock, LW_SHARED);
1343 :
1344 102692 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1345 :
1346 : /* Create new entry, if not present */
1347 102692 : if (!entry)
1348 : {
1349 : Size query_offset;
1350 : int gc_count;
1351 : bool stored;
1352 : bool do_gc;
1353 :
1354 : /*
1355 : * Create a new, normalized query string if caller asked. We don't
1356 : * need to hold the lock while doing this work. (Note: in any case,
1357 : * it's possible that someone else creates a duplicate hashtable entry
1358 : * in the interval where we don't hold the lock below. That case is
1359 : * handled by entry_alloc.)
1360 : */
1361 30282 : if (jstate)
1362 : {
1363 11209 : LWLockRelease(pgss->lock);
1364 11209 : norm_query = generate_normalized_query(jstate, query,
1365 : query_location,
1366 : &query_len);
1367 11209 : LWLockAcquire(pgss->lock, LW_SHARED);
1368 : }
1369 :
1370 : /* Append new query text to file with only shared lock held */
1371 30282 : stored = qtext_store(norm_query ? norm_query : query, query_len,
1372 : &query_offset, &gc_count);
1373 :
1374 : /*
1375 : * Determine whether we need to garbage collect external query texts
1376 : * while the shared lock is still held. This micro-optimization
1377 : * avoids taking the time to decide this while holding exclusive lock.
1378 : */
1379 30282 : do_gc = need_gc_qtexts();
1380 :
1381 : /* Need exclusive lock to make a new hashtable entry - promote */
1382 30282 : LWLockRelease(pgss->lock);
1383 30282 : LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1384 :
1385 : /*
1386 : * A garbage collection may have occurred while we weren't holding the
1387 : * lock. In the unlikely event that this happens, the query text we
1388 : * stored above will have been garbage collected, so write it again.
1389 : * This should be infrequent enough that doing it while holding
1390 : * exclusive lock isn't a performance problem.
1391 : */
1392 30282 : if (!stored || pgss->gc_count != gc_count)
1393 0 : stored = qtext_store(norm_query ? norm_query : query, query_len,
1394 : &query_offset, NULL);
1395 :
1396 : /* If we failed to write to the text file, give up */
1397 30282 : if (!stored)
1398 0 : goto done;
1399 :
1400 : /* OK to create a new hashtable entry */
1401 30282 : entry = entry_alloc(&key, query_offset, query_len, encoding,
1402 : jstate != NULL);
1403 :
1404 : /* If needed, perform garbage collection while exclusive lock held */
1405 30282 : if (do_gc)
1406 0 : gc_qtexts();
1407 : }
1408 :
1409 : /* Increment the counts, except when jstate is not NULL */
1410 102692 : if (!jstate)
1411 : {
1412 : Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1413 :
1414 : /*
1415 : * Grab the spinlock while updating the counters (see comment about
1416 : * locking rules at the head of the file)
1417 : */
1418 64309 : SpinLockAcquire(&entry->mutex);
1419 :
1420 : /* "Unstick" entry if it was previously sticky */
1421 64309 : if (IS_STICKY(entry->counters))
1422 29486 : entry->counters.usage = USAGE_INIT;
1423 :
1424 64309 : entry->counters.calls[kind] += 1;
1425 64309 : entry->counters.total_time[kind] += total_time;
1426 :
1427 64309 : if (entry->counters.calls[kind] == 1)
1428 : {
1429 29580 : entry->counters.min_time[kind] = total_time;
1430 29580 : entry->counters.max_time[kind] = total_time;
1431 29580 : entry->counters.mean_time[kind] = total_time;
1432 : }
1433 : else
1434 : {
1435 : /*
1436 : * Welford's method for accurately computing variance. See
1437 : * <http://www.johndcook.com/blog/standard_deviation/>
1438 : */
1439 34729 : double old_mean = entry->counters.mean_time[kind];
1440 :
1441 34729 : entry->counters.mean_time[kind] +=
1442 34729 : (total_time - old_mean) / entry->counters.calls[kind];
1443 34729 : entry->counters.sum_var_time[kind] +=
1444 34729 : (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1445 :
1446 : /*
1447 : * Calculate min and max time. min = 0 and max = 0 means that the
1448 : * min/max statistics were reset
1449 : */
1450 34729 : if (entry->counters.min_time[kind] == 0
1451 6 : && entry->counters.max_time[kind] == 0)
1452 : {
1453 3 : entry->counters.min_time[kind] = total_time;
1454 3 : entry->counters.max_time[kind] = total_time;
1455 : }
1456 : else
1457 : {
1458 34726 : if (entry->counters.min_time[kind] > total_time)
1459 6543 : entry->counters.min_time[kind] = total_time;
1460 34726 : if (entry->counters.max_time[kind] < total_time)
1461 3576 : entry->counters.max_time[kind] = total_time;
1462 : }
1463 : }
1464 64309 : entry->counters.rows += rows;
1465 64309 : entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1466 64309 : entry->counters.shared_blks_read += bufusage->shared_blks_read;
1467 64309 : entry->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1468 64309 : entry->counters.shared_blks_written += bufusage->shared_blks_written;
1469 64309 : entry->counters.local_blks_hit += bufusage->local_blks_hit;
1470 64309 : entry->counters.local_blks_read += bufusage->local_blks_read;
1471 64309 : entry->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1472 64309 : entry->counters.local_blks_written += bufusage->local_blks_written;
1473 64309 : entry->counters.temp_blks_read += bufusage->temp_blks_read;
1474 64309 : entry->counters.temp_blks_written += bufusage->temp_blks_written;
1475 64309 : entry->counters.shared_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_read_time);
1476 64309 : entry->counters.shared_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_write_time);
1477 64309 : entry->counters.local_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_read_time);
1478 64309 : entry->counters.local_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_write_time);
1479 64309 : entry->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
1480 64309 : entry->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
1481 64309 : entry->counters.usage += USAGE_EXEC(total_time);
1482 64309 : entry->counters.wal_records += walusage->wal_records;
1483 64309 : entry->counters.wal_fpi += walusage->wal_fpi;
1484 64309 : entry->counters.wal_bytes += walusage->wal_bytes;
1485 64309 : entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1486 64309 : if (jitusage)
1487 : {
1488 85 : entry->counters.jit_functions += jitusage->created_functions;
1489 85 : entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1490 :
1491 85 : if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1492 83 : entry->counters.jit_deform_count++;
1493 85 : entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1494 :
1495 85 : if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1496 33 : entry->counters.jit_inlining_count++;
1497 85 : entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1498 :
1499 85 : if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1500 80 : entry->counters.jit_optimization_count++;
1501 85 : entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1502 :
1503 85 : if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1504 80 : entry->counters.jit_emission_count++;
1505 85 : entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1506 : }
1507 :
1508 : /* parallel worker counters */
1509 64309 : entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1510 64309 : entry->counters.parallel_workers_launched += parallel_workers_launched;
1511 :
1512 : /* plan cache counters */
1513 64309 : if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1514 3125 : entry->counters.generic_plan_calls++;
1515 61184 : else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1516 379 : entry->counters.custom_plan_calls++;
1517 :
1518 64309 : SpinLockRelease(&entry->mutex);
1519 : }
1520 :
1521 38383 : done:
1522 102692 : LWLockRelease(pgss->lock);
1523 :
1524 : /* We postpone this clean-up until we're out of the lock */
1525 102692 : if (norm_query)
1526 11209 : pfree(norm_query);
1527 : }
1528 :
1529 : /*
1530 : * Reset statement statistics corresponding to userid, dbid, and queryid.
1531 : */
1532 : Datum
1533 1 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
1534 : {
1535 : Oid userid;
1536 : Oid dbid;
1537 : int64 queryid;
1538 :
1539 1 : userid = PG_GETARG_OID(0);
1540 1 : dbid = PG_GETARG_OID(1);
1541 1 : queryid = PG_GETARG_INT64(2);
1542 :
1543 1 : entry_reset(userid, dbid, queryid, false);
1544 :
1545 1 : PG_RETURN_VOID();
1546 : }
1547 :
1548 : Datum
1549 119 : pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
1550 : {
1551 : Oid userid;
1552 : Oid dbid;
1553 : int64 queryid;
1554 : bool minmax_only;
1555 :
1556 119 : userid = PG_GETARG_OID(0);
1557 119 : dbid = PG_GETARG_OID(1);
1558 119 : queryid = PG_GETARG_INT64(2);
1559 119 : minmax_only = PG_GETARG_BOOL(3);
1560 :
1561 119 : PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1562 : }
1563 :
1564 : /*
1565 : * Reset statement statistics.
1566 : */
1567 : Datum
1568 1 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
1569 : {
1570 1 : entry_reset(0, 0, 0, false);
1571 :
1572 1 : PG_RETURN_VOID();
1573 : }
1574 :
1575 : /* Number of output arguments (columns) for various API versions */
1576 : #define PG_STAT_STATEMENTS_COLS_V1_0 14
1577 : #define PG_STAT_STATEMENTS_COLS_V1_1 18
1578 : #define PG_STAT_STATEMENTS_COLS_V1_2 19
1579 : #define PG_STAT_STATEMENTS_COLS_V1_3 23
1580 : #define PG_STAT_STATEMENTS_COLS_V1_8 32
1581 : #define PG_STAT_STATEMENTS_COLS_V1_9 33
1582 : #define PG_STAT_STATEMENTS_COLS_V1_10 43
1583 : #define PG_STAT_STATEMENTS_COLS_V1_11 49
1584 : #define PG_STAT_STATEMENTS_COLS_V1_12 52
1585 : #define PG_STAT_STATEMENTS_COLS_V1_13 54
1586 : #define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1587 :
1588 : /*
1589 : * Retrieve statement statistics.
1590 : *
1591 : * The SQL API of this function has changed multiple times, and will likely
1592 : * do so again in future. To support the case where a newer version of this
1593 : * loadable module is being used with an old SQL declaration of the function,
1594 : * we continue to support the older API versions. For 1.2 and later, the
1595 : * expected API version is identified by embedding it in the C name of the
1596 : * function. Unfortunately we weren't bright enough to do that for 1.1.
1597 : */
1598 : Datum
1599 129 : pg_stat_statements_1_13(PG_FUNCTION_ARGS)
1600 : {
1601 129 : bool showtext = PG_GETARG_BOOL(0);
1602 :
1603 129 : pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext);
1604 :
1605 129 : return (Datum) 0;
1606 : }
1607 :
1608 : Datum
1609 1 : pg_stat_statements_1_12(PG_FUNCTION_ARGS)
1610 : {
1611 1 : bool showtext = PG_GETARG_BOOL(0);
1612 :
1613 1 : pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
1614 :
1615 1 : return (Datum) 0;
1616 : }
1617 :
1618 : Datum
1619 1 : pg_stat_statements_1_11(PG_FUNCTION_ARGS)
1620 : {
1621 1 : bool showtext = PG_GETARG_BOOL(0);
1622 :
1623 1 : pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1624 :
1625 1 : return (Datum) 0;
1626 : }
1627 :
1628 : Datum
1629 1 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
1630 : {
1631 1 : bool showtext = PG_GETARG_BOOL(0);
1632 :
1633 1 : pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1634 :
1635 1 : return (Datum) 0;
1636 : }
1637 :
1638 : Datum
1639 1 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
1640 : {
1641 1 : bool showtext = PG_GETARG_BOOL(0);
1642 :
1643 1 : pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1644 :
1645 1 : return (Datum) 0;
1646 : }
1647 :
1648 : Datum
1649 1 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
1650 : {
1651 1 : bool showtext = PG_GETARG_BOOL(0);
1652 :
1653 1 : pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1654 :
1655 1 : return (Datum) 0;
1656 : }
1657 :
1658 : Datum
1659 1 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
1660 : {
1661 1 : bool showtext = PG_GETARG_BOOL(0);
1662 :
1663 1 : pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1664 :
1665 1 : return (Datum) 0;
1666 : }
1667 :
1668 : Datum
1669 0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
1670 : {
1671 0 : bool showtext = PG_GETARG_BOOL(0);
1672 :
1673 0 : pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1674 :
1675 0 : return (Datum) 0;
1676 : }
1677 :
1678 : /*
1679 : * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1680 : * This can be removed someday, perhaps.
1681 : */
1682 : Datum
1683 0 : pg_stat_statements(PG_FUNCTION_ARGS)
1684 : {
1685 : /* If it's really API 1.1, we'll figure that out below */
1686 0 : pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1687 :
1688 0 : return (Datum) 0;
1689 : }
1690 :
1691 : /* Common code for all versions of pg_stat_statements() */
1692 : static void
1693 135 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
1694 : pgssVersion api_version,
1695 : bool showtext)
1696 : {
1697 135 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1698 135 : Oid userid = GetUserId();
1699 135 : bool is_allowed_role = false;
1700 135 : char *qbuffer = NULL;
1701 135 : Size qbuffer_size = 0;
1702 135 : Size extent = 0;
1703 135 : int gc_count = 0;
1704 : HASH_SEQ_STATUS hash_seq;
1705 : pgssEntry *entry;
1706 :
1707 : /*
1708 : * Superusers or roles with the privileges of pg_read_all_stats members
1709 : * are allowed
1710 : */
1711 135 : is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1712 :
1713 : /* hash table must exist already */
1714 135 : if (!pgss || !pgss_hash)
1715 0 : ereport(ERROR,
1716 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1717 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1718 :
1719 135 : InitMaterializedSRF(fcinfo, 0);
1720 :
1721 : /*
1722 : * Check we have the expected number of output arguments. Aside from
1723 : * being a good safety check, we need a kluge here to detect API version
1724 : * 1.1, which was wedged into the code in an ill-considered way.
1725 : */
1726 135 : switch (rsinfo->setDesc->natts)
1727 : {
1728 0 : case PG_STAT_STATEMENTS_COLS_V1_0:
1729 0 : if (api_version != PGSS_V1_0)
1730 0 : elog(ERROR, "incorrect number of output arguments");
1731 0 : break;
1732 0 : case PG_STAT_STATEMENTS_COLS_V1_1:
1733 : /* pg_stat_statements() should have told us 1.0 */
1734 0 : if (api_version != PGSS_V1_0)
1735 0 : elog(ERROR, "incorrect number of output arguments");
1736 0 : api_version = PGSS_V1_1;
1737 0 : break;
1738 0 : case PG_STAT_STATEMENTS_COLS_V1_2:
1739 0 : if (api_version != PGSS_V1_2)
1740 0 : elog(ERROR, "incorrect number of output arguments");
1741 0 : break;
1742 1 : case PG_STAT_STATEMENTS_COLS_V1_3:
1743 1 : if (api_version != PGSS_V1_3)
1744 0 : elog(ERROR, "incorrect number of output arguments");
1745 1 : break;
1746 1 : case PG_STAT_STATEMENTS_COLS_V1_8:
1747 1 : if (api_version != PGSS_V1_8)
1748 0 : elog(ERROR, "incorrect number of output arguments");
1749 1 : break;
1750 1 : case PG_STAT_STATEMENTS_COLS_V1_9:
1751 1 : if (api_version != PGSS_V1_9)
1752 0 : elog(ERROR, "incorrect number of output arguments");
1753 1 : break;
1754 1 : case PG_STAT_STATEMENTS_COLS_V1_10:
1755 1 : if (api_version != PGSS_V1_10)
1756 0 : elog(ERROR, "incorrect number of output arguments");
1757 1 : break;
1758 1 : case PG_STAT_STATEMENTS_COLS_V1_11:
1759 1 : if (api_version != PGSS_V1_11)
1760 0 : elog(ERROR, "incorrect number of output arguments");
1761 1 : break;
1762 1 : case PG_STAT_STATEMENTS_COLS_V1_12:
1763 1 : if (api_version != PGSS_V1_12)
1764 0 : elog(ERROR, "incorrect number of output arguments");
1765 1 : break;
1766 129 : case PG_STAT_STATEMENTS_COLS_V1_13:
1767 129 : if (api_version != PGSS_V1_13)
1768 0 : elog(ERROR, "incorrect number of output arguments");
1769 129 : break;
1770 0 : default:
1771 0 : elog(ERROR, "incorrect number of output arguments");
1772 : }
1773 :
1774 : /*
1775 : * We'd like to load the query text file (if needed) while not holding any
1776 : * lock on pgss->lock. In the worst case we'll have to do this again
1777 : * after we have the lock, but it's unlikely enough to make this a win
1778 : * despite occasional duplicated work. We need to reload if anybody
1779 : * writes to the file (either a retail qtext_store(), or a garbage
1780 : * collection) between this point and where we've gotten shared lock. If
1781 : * a qtext_store is actually in progress when we look, we might as well
1782 : * skip the speculative load entirely.
1783 : */
1784 135 : if (showtext)
1785 : {
1786 : int n_writers;
1787 :
1788 : /* Take the mutex so we can examine variables */
1789 135 : SpinLockAcquire(&pgss->mutex);
1790 135 : extent = pgss->extent;
1791 135 : n_writers = pgss->n_writers;
1792 135 : gc_count = pgss->gc_count;
1793 135 : SpinLockRelease(&pgss->mutex);
1794 :
1795 : /* No point in loading file now if there are active writers */
1796 135 : if (n_writers == 0)
1797 135 : qbuffer = qtext_load_file(&qbuffer_size);
1798 : }
1799 :
1800 : /*
1801 : * Get shared lock, load or reload the query text file if we must, and
1802 : * iterate over the hashtable entries.
1803 : *
1804 : * With a large hash table, we might be holding the lock rather longer
1805 : * than one could wish. However, this only blocks creation of new hash
1806 : * table entries, and the larger the hash table the less likely that is to
1807 : * be needed. So we can hope this is okay. Perhaps someday we'll decide
1808 : * we need to partition the hash table to limit the time spent holding any
1809 : * one lock.
1810 : */
1811 135 : LWLockAcquire(pgss->lock, LW_SHARED);
1812 :
1813 135 : if (showtext)
1814 : {
1815 : /*
1816 : * Here it is safe to examine extent and gc_count without taking the
1817 : * mutex. Note that although other processes might change
1818 : * pgss->extent just after we look at it, the strings they then write
1819 : * into the file cannot yet be referenced in the hashtable, so we
1820 : * don't care whether we see them or not.
1821 : *
1822 : * If qtext_load_file fails, we just press on; we'll return NULL for
1823 : * every query text.
1824 : */
1825 135 : if (qbuffer == NULL ||
1826 135 : pgss->extent != extent ||
1827 135 : pgss->gc_count != gc_count)
1828 : {
1829 0 : if (qbuffer)
1830 0 : pfree(qbuffer);
1831 0 : qbuffer = qtext_load_file(&qbuffer_size);
1832 : }
1833 : }
1834 :
1835 135 : hash_seq_init(&hash_seq, pgss_hash);
1836 28684 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
1837 : {
1838 : Datum values[PG_STAT_STATEMENTS_COLS];
1839 : bool nulls[PG_STAT_STATEMENTS_COLS];
1840 28549 : int i = 0;
1841 : Counters tmp;
1842 : double stddev;
1843 28549 : int64 queryid = entry->key.queryid;
1844 : TimestampTz stats_since;
1845 : TimestampTz minmax_stats_since;
1846 :
1847 28549 : memset(values, 0, sizeof(values));
1848 28549 : memset(nulls, 0, sizeof(nulls));
1849 :
1850 28549 : values[i++] = ObjectIdGetDatum(entry->key.userid);
1851 28549 : values[i++] = ObjectIdGetDatum(entry->key.dbid);
1852 28549 : if (api_version >= PGSS_V1_9)
1853 28537 : values[i++] = BoolGetDatum(entry->key.toplevel);
1854 :
1855 28549 : if (is_allowed_role || entry->key.userid == userid)
1856 : {
1857 28545 : if (api_version >= PGSS_V1_2)
1858 28545 : values[i++] = Int64GetDatumFast(queryid);
1859 :
1860 28545 : if (showtext)
1861 : {
1862 28545 : char *qstr = qtext_fetch(entry->query_offset,
1863 : entry->query_len,
1864 : qbuffer,
1865 : qbuffer_size);
1866 :
1867 28545 : if (qstr)
1868 : {
1869 : char *enc;
1870 :
1871 28545 : enc = pg_any_to_server(qstr,
1872 : entry->query_len,
1873 : entry->encoding);
1874 :
1875 28545 : values[i++] = CStringGetTextDatum(enc);
1876 :
1877 28545 : if (enc != qstr)
1878 0 : pfree(enc);
1879 : }
1880 : else
1881 : {
1882 : /* Just return a null if we fail to find the text */
1883 0 : nulls[i++] = true;
1884 : }
1885 : }
1886 : else
1887 : {
1888 : /* Query text not requested */
1889 0 : nulls[i++] = true;
1890 : }
1891 : }
1892 : else
1893 : {
1894 : /* Don't show queryid */
1895 4 : if (api_version >= PGSS_V1_2)
1896 4 : nulls[i++] = true;
1897 :
1898 : /*
1899 : * Don't show query text, but hint as to the reason for not doing
1900 : * so if it was requested
1901 : */
1902 4 : if (showtext)
1903 4 : values[i++] = CStringGetTextDatum("<insufficient privilege>");
1904 : else
1905 0 : nulls[i++] = true;
1906 : }
1907 :
1908 : /* copy counters to a local variable to keep locking time short */
1909 28549 : SpinLockAcquire(&entry->mutex);
1910 28549 : tmp = entry->counters;
1911 28549 : SpinLockRelease(&entry->mutex);
1912 :
1913 : /*
1914 : * The spinlock is not required when reading these two as they are
1915 : * always updated when holding pgss->lock exclusively.
1916 : */
1917 28549 : stats_since = entry->stats_since;
1918 28549 : minmax_stats_since = entry->minmax_stats_since;
1919 :
1920 : /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1921 28549 : if (IS_STICKY(tmp))
1922 45 : continue;
1923 :
1924 : /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1925 85512 : for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1926 : {
1927 57008 : if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1928 : {
1929 57004 : values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1930 57004 : values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1931 : }
1932 :
1933 57008 : if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1934 : api_version >= PGSS_V1_8)
1935 : {
1936 57004 : values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1937 57004 : values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1938 57004 : values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1939 :
1940 : /*
1941 : * Note we are calculating the population variance here, not
1942 : * the sample variance, as we have data for the whole
1943 : * population, so Bessel's correction is not used, and we
1944 : * don't divide by tmp.calls - 1.
1945 : */
1946 57004 : if (tmp.calls[kind] > 1)
1947 5317 : stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1948 : else
1949 51687 : stddev = 0.0;
1950 57004 : values[i++] = Float8GetDatumFast(stddev);
1951 : }
1952 : }
1953 28504 : values[i++] = Int64GetDatumFast(tmp.rows);
1954 28504 : values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1955 28504 : values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1956 28504 : if (api_version >= PGSS_V1_1)
1957 28504 : values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1958 28504 : values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1959 28504 : values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1960 28504 : values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1961 28504 : if (api_version >= PGSS_V1_1)
1962 28504 : values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1963 28504 : values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1964 28504 : values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1965 28504 : values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1966 28504 : if (api_version >= PGSS_V1_1)
1967 : {
1968 28504 : values[i++] = Float8GetDatumFast(tmp.shared_blk_read_time);
1969 28504 : values[i++] = Float8GetDatumFast(tmp.shared_blk_write_time);
1970 : }
1971 28504 : if (api_version >= PGSS_V1_11)
1972 : {
1973 28476 : values[i++] = Float8GetDatumFast(tmp.local_blk_read_time);
1974 28476 : values[i++] = Float8GetDatumFast(tmp.local_blk_write_time);
1975 : }
1976 28504 : if (api_version >= PGSS_V1_10)
1977 : {
1978 28485 : values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
1979 28485 : values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
1980 : }
1981 28504 : if (api_version >= PGSS_V1_8)
1982 : {
1983 : char buf[256];
1984 : Datum wal_bytes;
1985 :
1986 28500 : values[i++] = Int64GetDatumFast(tmp.wal_records);
1987 28500 : values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1988 :
1989 28500 : snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1990 :
1991 : /* Convert to numeric. */
1992 28500 : wal_bytes = DirectFunctionCall3(numeric_in,
1993 : CStringGetDatum(buf),
1994 : ObjectIdGetDatum(0),
1995 : Int32GetDatum(-1));
1996 28500 : values[i++] = wal_bytes;
1997 : }
1998 28504 : if (api_version >= PGSS_V1_12)
1999 : {
2000 28466 : values[i++] = Int64GetDatumFast(tmp.wal_buffers_full);
2001 : }
2002 28504 : if (api_version >= PGSS_V1_10)
2003 : {
2004 28485 : values[i++] = Int64GetDatumFast(tmp.jit_functions);
2005 28485 : values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
2006 28485 : values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
2007 28485 : values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
2008 28485 : values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
2009 28485 : values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
2010 28485 : values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
2011 28485 : values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
2012 : }
2013 28504 : if (api_version >= PGSS_V1_11)
2014 : {
2015 28476 : values[i++] = Int64GetDatumFast(tmp.jit_deform_count);
2016 28476 : values[i++] = Float8GetDatumFast(tmp.jit_deform_time);
2017 : }
2018 28504 : if (api_version >= PGSS_V1_12)
2019 : {
2020 28466 : values[i++] = Int64GetDatumFast(tmp.parallel_workers_to_launch);
2021 28466 : values[i++] = Int64GetDatumFast(tmp.parallel_workers_launched);
2022 : }
2023 28504 : if (api_version >= PGSS_V1_13)
2024 : {
2025 28461 : values[i++] = Int64GetDatumFast(tmp.generic_plan_calls);
2026 28461 : values[i++] = Int64GetDatumFast(tmp.custom_plan_calls);
2027 : }
2028 28504 : if (api_version >= PGSS_V1_11)
2029 : {
2030 28476 : values[i++] = TimestampTzGetDatum(stats_since);
2031 28476 : values[i++] = TimestampTzGetDatum(minmax_stats_since);
2032 : }
2033 :
2034 : Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2035 : api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2036 : api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2037 : api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2038 : api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2039 : api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2040 : api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2041 : api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2042 : api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2043 : api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2044 : -1 /* fail if you forget to update this assert */ ));
2045 :
2046 28504 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2047 : }
2048 :
2049 135 : LWLockRelease(pgss->lock);
2050 :
2051 135 : if (qbuffer)
2052 135 : pfree(qbuffer);
2053 135 : }
2054 :
2055 : /* Number of output arguments (columns) for pg_stat_statements_info */
2056 : #define PG_STAT_STATEMENTS_INFO_COLS 2
2057 :
2058 : /*
2059 : * Return statistics of pg_stat_statements.
2060 : */
2061 : Datum
2062 2 : pg_stat_statements_info(PG_FUNCTION_ARGS)
2063 : {
2064 : pgssGlobalStats stats;
2065 : TupleDesc tupdesc;
2066 2 : Datum values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2067 2 : bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2068 :
2069 2 : if (!pgss || !pgss_hash)
2070 0 : ereport(ERROR,
2071 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2072 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2073 :
2074 : /* Build a tuple descriptor for our result type */
2075 2 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2076 0 : elog(ERROR, "return type must be a row type");
2077 :
2078 : /* Read global statistics for pg_stat_statements */
2079 2 : SpinLockAcquire(&pgss->mutex);
2080 2 : stats = pgss->stats;
2081 2 : SpinLockRelease(&pgss->mutex);
2082 :
2083 2 : values[0] = Int64GetDatum(stats.dealloc);
2084 2 : values[1] = TimestampTzGetDatum(stats.stats_reset);
2085 :
2086 2 : PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
2087 : }
2088 :
2089 : /*
2090 : * Estimate shared memory space needed.
2091 : */
2092 : static Size
2093 7 : pgss_memsize(void)
2094 : {
2095 : Size size;
2096 :
2097 7 : size = MAXALIGN(sizeof(pgssSharedState));
2098 7 : size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2099 :
2100 7 : return size;
2101 : }
2102 :
2103 : /*
2104 : * Allocate a new hashtable entry.
2105 : * caller must hold an exclusive lock on pgss->lock
2106 : *
2107 : * "query" need not be null-terminated; we rely on query_len instead
2108 : *
2109 : * If "sticky" is true, make the new entry artificially sticky so that it will
2110 : * probably still be there when the query finishes execution. We do this by
2111 : * giving it a median usage value rather than the normal value. (Strictly
2112 : * speaking, query strings are normalized on a best effort basis, though it
2113 : * would be difficult to demonstrate this even under artificial conditions.)
2114 : *
2115 : * Note: despite needing exclusive lock, it's not an error for the target
2116 : * entry to already exist. This is because pgss_store releases and
2117 : * reacquires lock after failing to find a match; so someone else could
2118 : * have made the entry while we waited to get exclusive lock.
2119 : */
2120 : static pgssEntry *
2121 57809 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2122 : bool sticky)
2123 : {
2124 : pgssEntry *entry;
2125 : bool found;
2126 :
2127 : /* Make space if needed */
2128 57809 : while (hash_get_num_entries(pgss_hash) >= pgss_max)
2129 0 : entry_dealloc();
2130 :
2131 : /* Find or create an entry with desired hash code */
2132 57809 : entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2133 :
2134 57809 : if (!found)
2135 : {
2136 : /* New entry, initialize it */
2137 :
2138 : /* reset the statistics */
2139 57809 : memset(&entry->counters, 0, sizeof(Counters));
2140 : /* set the appropriate initial usage count */
2141 57809 : entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2142 : /* re-initialize the mutex each time ... we assume no one using it */
2143 57809 : SpinLockInit(&entry->mutex);
2144 : /* ... and don't forget the query text metadata */
2145 : Assert(query_len >= 0);
2146 57809 : entry->query_offset = query_offset;
2147 57809 : entry->query_len = query_len;
2148 57809 : entry->encoding = encoding;
2149 57809 : entry->stats_since = GetCurrentTimestamp();
2150 57809 : entry->minmax_stats_since = entry->stats_since;
2151 : }
2152 :
2153 57809 : return entry;
2154 : }
2155 :
2156 : /*
2157 : * qsort comparator for sorting into increasing usage order
2158 : */
2159 : static int
2160 0 : entry_cmp(const void *lhs, const void *rhs)
2161 : {
2162 0 : double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2163 0 : double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2164 :
2165 0 : if (l_usage < r_usage)
2166 0 : return -1;
2167 0 : else if (l_usage > r_usage)
2168 0 : return +1;
2169 : else
2170 0 : return 0;
2171 : }
2172 :
2173 : /*
2174 : * Deallocate least-used entries.
2175 : *
2176 : * Caller must hold an exclusive lock on pgss->lock.
2177 : */
2178 : static void
2179 0 : entry_dealloc(void)
2180 : {
2181 : HASH_SEQ_STATUS hash_seq;
2182 : pgssEntry **entries;
2183 : pgssEntry *entry;
2184 : int nvictims;
2185 : int i;
2186 : Size tottextlen;
2187 : int nvalidtexts;
2188 :
2189 : /*
2190 : * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2191 : * While we're scanning the table, apply the decay factor to the usage
2192 : * values, and update the mean query length.
2193 : *
2194 : * Note that the mean query length is almost immediately obsolete, since
2195 : * we compute it before not after discarding the least-used entries.
2196 : * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2197 : * making two passes to get a more current result. Likewise, the new
2198 : * cur_median_usage includes the entries we're about to zap.
2199 : */
2200 :
2201 0 : entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2202 :
2203 0 : i = 0;
2204 0 : tottextlen = 0;
2205 0 : nvalidtexts = 0;
2206 :
2207 0 : hash_seq_init(&hash_seq, pgss_hash);
2208 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2209 : {
2210 0 : entries[i++] = entry;
2211 : /* "Sticky" entries get a different usage decay rate. */
2212 0 : if (IS_STICKY(entry->counters))
2213 0 : entry->counters.usage *= STICKY_DECREASE_FACTOR;
2214 : else
2215 0 : entry->counters.usage *= USAGE_DECREASE_FACTOR;
2216 : /* In the mean length computation, ignore dropped texts. */
2217 0 : if (entry->query_len >= 0)
2218 : {
2219 0 : tottextlen += entry->query_len + 1;
2220 0 : nvalidtexts++;
2221 : }
2222 : }
2223 :
2224 : /* Sort into increasing order by usage */
2225 0 : qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2226 :
2227 : /* Record the (approximate) median usage */
2228 0 : if (i > 0)
2229 0 : pgss->cur_median_usage = entries[i / 2]->counters.usage;
2230 : /* Record the mean query length */
2231 0 : if (nvalidtexts > 0)
2232 0 : pgss->mean_query_len = tottextlen / nvalidtexts;
2233 : else
2234 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2235 :
2236 : /* Now zap an appropriate fraction of lowest-usage entries */
2237 0 : nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2238 0 : nvictims = Min(nvictims, i);
2239 :
2240 0 : for (i = 0; i < nvictims; i++)
2241 : {
2242 0 : hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2243 : }
2244 :
2245 0 : pfree(entries);
2246 :
2247 : /* Increment the number of times entries are deallocated */
2248 0 : SpinLockAcquire(&pgss->mutex);
2249 0 : pgss->stats.dealloc += 1;
2250 0 : SpinLockRelease(&pgss->mutex);
2251 0 : }
2252 :
2253 : /*
2254 : * Given a query string (not necessarily null-terminated), allocate a new
2255 : * entry in the external query text file and store the string there.
2256 : *
2257 : * If successful, returns true, and stores the new entry's offset in the file
2258 : * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2259 : * number of garbage collections that have occurred so far.
2260 : *
2261 : * On failure, returns false.
2262 : *
2263 : * At least a shared lock on pgss->lock must be held by the caller, so as
2264 : * to prevent a concurrent garbage collection. Share-lock-holding callers
2265 : * should pass a gc_count pointer to obtain the number of garbage collections,
2266 : * so that they can recheck the count after obtaining exclusive lock to
2267 : * detect whether a garbage collection occurred (and removed this entry).
2268 : */
2269 : static bool
2270 30282 : qtext_store(const char *query, int query_len,
2271 : Size *query_offset, int *gc_count)
2272 : {
2273 : Size off;
2274 : int fd;
2275 :
2276 : /*
2277 : * We use a spinlock to protect extent/n_writers/gc_count, so that
2278 : * multiple processes may execute this function concurrently.
2279 : */
2280 30282 : SpinLockAcquire(&pgss->mutex);
2281 30282 : off = pgss->extent;
2282 30282 : pgss->extent += query_len + 1;
2283 30282 : pgss->n_writers++;
2284 30282 : if (gc_count)
2285 30282 : *gc_count = pgss->gc_count;
2286 30282 : SpinLockRelease(&pgss->mutex);
2287 :
2288 30282 : *query_offset = off;
2289 :
2290 : /*
2291 : * Don't allow the file to grow larger than what qtext_load_file can
2292 : * (theoretically) handle. This has been seen to be reachable on 32-bit
2293 : * platforms.
2294 : */
2295 30282 : if (unlikely(query_len >= MaxAllocHugeSize - off))
2296 : {
2297 0 : errno = EFBIG; /* not quite right, but it'll do */
2298 0 : fd = -1;
2299 0 : goto error;
2300 : }
2301 :
2302 : /* Now write the data into the successfully-reserved part of the file */
2303 30282 : fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2304 30282 : if (fd < 0)
2305 0 : goto error;
2306 :
2307 30282 : if (pg_pwrite(fd, query, query_len, off) != query_len)
2308 0 : goto error;
2309 30282 : if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2310 0 : goto error;
2311 :
2312 30282 : CloseTransientFile(fd);
2313 :
2314 : /* Mark our write complete */
2315 30282 : SpinLockAcquire(&pgss->mutex);
2316 30282 : pgss->n_writers--;
2317 30282 : SpinLockRelease(&pgss->mutex);
2318 :
2319 30282 : return true;
2320 :
2321 0 : error:
2322 0 : ereport(LOG,
2323 : (errcode_for_file_access(),
2324 : errmsg("could not write file \"%s\": %m",
2325 : PGSS_TEXT_FILE)));
2326 :
2327 0 : if (fd >= 0)
2328 0 : CloseTransientFile(fd);
2329 :
2330 : /* Mark our write complete */
2331 0 : SpinLockAcquire(&pgss->mutex);
2332 0 : pgss->n_writers--;
2333 0 : SpinLockRelease(&pgss->mutex);
2334 :
2335 0 : return false;
2336 : }
2337 :
2338 : /*
2339 : * Read the external query text file into a palloc'd buffer.
2340 : *
2341 : * Returns NULL (without throwing an error) if unable to read, eg
2342 : * file not there or insufficient memory.
2343 : *
2344 : * On success, the buffer size is also returned into *buffer_size.
2345 : *
2346 : * This can be called without any lock on pgss->lock, but in that case
2347 : * the caller is responsible for verifying that the result is sane.
2348 : */
2349 : static char *
2350 140 : qtext_load_file(Size *buffer_size)
2351 : {
2352 : char *buf;
2353 : int fd;
2354 : struct stat stat;
2355 : Size nread;
2356 :
2357 140 : fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
2358 140 : if (fd < 0)
2359 : {
2360 0 : if (errno != ENOENT)
2361 0 : ereport(LOG,
2362 : (errcode_for_file_access(),
2363 : errmsg("could not read file \"%s\": %m",
2364 : PGSS_TEXT_FILE)));
2365 0 : return NULL;
2366 : }
2367 :
2368 : /* Get file length */
2369 140 : if (fstat(fd, &stat))
2370 : {
2371 0 : ereport(LOG,
2372 : (errcode_for_file_access(),
2373 : errmsg("could not stat file \"%s\": %m",
2374 : PGSS_TEXT_FILE)));
2375 0 : CloseTransientFile(fd);
2376 0 : return NULL;
2377 : }
2378 :
2379 : /* Allocate buffer; beware that off_t might be wider than size_t */
2380 140 : if (stat.st_size <= MaxAllocHugeSize)
2381 140 : buf = (char *) palloc_extended(stat.st_size, MCXT_ALLOC_HUGE | MCXT_ALLOC_NO_OOM);
2382 : else
2383 0 : buf = NULL;
2384 140 : if (buf == NULL)
2385 : {
2386 0 : ereport(LOG,
2387 : (errcode(ERRCODE_OUT_OF_MEMORY),
2388 : errmsg("out of memory"),
2389 : errdetail("Could not allocate enough memory to read file \"%s\".",
2390 : PGSS_TEXT_FILE)));
2391 0 : CloseTransientFile(fd);
2392 0 : return NULL;
2393 : }
2394 :
2395 : /*
2396 : * OK, slurp in the file. Windows fails if we try to read more than
2397 : * INT_MAX bytes at once, and other platforms might not like that either,
2398 : * so read a very large file in 1GB segments.
2399 : */
2400 140 : nread = 0;
2401 279 : while (nread < stat.st_size)
2402 : {
2403 139 : int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2404 :
2405 : /*
2406 : * If we get a short read and errno doesn't get set, the reason is
2407 : * probably that garbage collection truncated the file since we did
2408 : * the fstat(), so we don't log a complaint --- but we don't return
2409 : * the data, either, since it's most likely corrupt due to concurrent
2410 : * writes from garbage collection.
2411 : */
2412 139 : errno = 0;
2413 139 : if (read(fd, buf + nread, toread) != toread)
2414 : {
2415 0 : if (errno)
2416 0 : ereport(LOG,
2417 : (errcode_for_file_access(),
2418 : errmsg("could not read file \"%s\": %m",
2419 : PGSS_TEXT_FILE)));
2420 0 : pfree(buf);
2421 0 : CloseTransientFile(fd);
2422 0 : return NULL;
2423 : }
2424 139 : nread += toread;
2425 : }
2426 :
2427 140 : if (CloseTransientFile(fd) != 0)
2428 0 : ereport(LOG,
2429 : (errcode_for_file_access(),
2430 : errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2431 :
2432 140 : *buffer_size = nread;
2433 140 : return buf;
2434 : }
2435 :
2436 : /*
2437 : * Locate a query text in the file image previously read by qtext_load_file().
2438 : *
2439 : * We validate the given offset/length, and return NULL if bogus. Otherwise,
2440 : * the result points to a null-terminated string within the buffer.
2441 : */
2442 : static char *
2443 85436 : qtext_fetch(Size query_offset, int query_len,
2444 : char *buffer, Size buffer_size)
2445 : {
2446 : /* File read failed? */
2447 85436 : if (buffer == NULL)
2448 0 : return NULL;
2449 : /* Bogus offset/length? */
2450 85436 : if (query_len < 0 ||
2451 85436 : query_offset + query_len >= buffer_size)
2452 0 : return NULL;
2453 : /* As a further sanity check, make sure there's a trailing null */
2454 85436 : if (buffer[query_offset + query_len] != '\0')
2455 0 : return NULL;
2456 : /* Looks OK */
2457 85436 : return buffer + query_offset;
2458 : }
2459 :
2460 : /*
2461 : * Do we need to garbage-collect the external query text file?
2462 : *
2463 : * Caller should hold at least a shared lock on pgss->lock.
2464 : */
2465 : static bool
2466 30282 : need_gc_qtexts(void)
2467 : {
2468 : Size extent;
2469 :
2470 : /* Read shared extent pointer */
2471 30282 : SpinLockAcquire(&pgss->mutex);
2472 30282 : extent = pgss->extent;
2473 30282 : SpinLockRelease(&pgss->mutex);
2474 :
2475 : /*
2476 : * Don't proceed if file does not exceed 512 bytes per possible entry.
2477 : *
2478 : * Here and in the next test, 32-bit machines have overflow hazards if
2479 : * pgss_max and/or mean_query_len are large. Force the multiplications
2480 : * and comparisons to be done in uint64 arithmetic to forestall trouble.
2481 : */
2482 30282 : if ((uint64) extent < (uint64) 512 * pgss_max)
2483 30282 : return false;
2484 :
2485 : /*
2486 : * Don't proceed if file is less than about 50% bloat. Nothing can or
2487 : * should be done in the event of unusually large query texts accounting
2488 : * for file's large size. We go to the trouble of maintaining the mean
2489 : * query length in order to prevent garbage collection from thrashing
2490 : * uselessly.
2491 : */
2492 0 : if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2493 0 : return false;
2494 :
2495 0 : return true;
2496 : }
2497 :
2498 : /*
2499 : * Garbage-collect orphaned query texts in external file.
2500 : *
2501 : * This won't be called often in the typical case, since it's likely that
2502 : * there won't be too much churn, and besides, a similar compaction process
2503 : * occurs when serializing to disk at shutdown or as part of resetting.
2504 : * Despite this, it seems prudent to plan for the edge case where the file
2505 : * becomes unreasonably large, with no other method of compaction likely to
2506 : * occur in the foreseeable future.
2507 : *
2508 : * The caller must hold an exclusive lock on pgss->lock.
2509 : *
2510 : * At the first sign of trouble we unlink the query text file to get a clean
2511 : * slate (although existing statistics are retained), rather than risk
2512 : * thrashing by allowing the same problem case to recur indefinitely.
2513 : */
2514 : static void
2515 0 : gc_qtexts(void)
2516 : {
2517 : char *qbuffer;
2518 : Size qbuffer_size;
2519 0 : FILE *qfile = NULL;
2520 : HASH_SEQ_STATUS hash_seq;
2521 : pgssEntry *entry;
2522 : Size extent;
2523 : int nentries;
2524 :
2525 : /*
2526 : * When called from pgss_store, some other session might have proceeded
2527 : * with garbage collection in the no-lock-held interim of lock strength
2528 : * escalation. Check once more that this is actually necessary.
2529 : */
2530 0 : if (!need_gc_qtexts())
2531 0 : return;
2532 :
2533 : /*
2534 : * Load the old texts file. If we fail (out of memory, for instance),
2535 : * invalidate query texts. Hopefully this is rare. It might seem better
2536 : * to leave things alone on an OOM failure, but the problem is that the
2537 : * file is only going to get bigger; hoping for a future non-OOM result is
2538 : * risky and can easily lead to complete denial of service.
2539 : */
2540 0 : qbuffer = qtext_load_file(&qbuffer_size);
2541 0 : if (qbuffer == NULL)
2542 0 : goto gc_fail;
2543 :
2544 : /*
2545 : * We overwrite the query texts file in place, so as to reduce the risk of
2546 : * an out-of-disk-space failure. Since the file is guaranteed not to get
2547 : * larger, this should always work on traditional filesystems; though we
2548 : * could still lose on copy-on-write filesystems.
2549 : */
2550 0 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2551 0 : if (qfile == NULL)
2552 : {
2553 0 : ereport(LOG,
2554 : (errcode_for_file_access(),
2555 : errmsg("could not write file \"%s\": %m",
2556 : PGSS_TEXT_FILE)));
2557 0 : goto gc_fail;
2558 : }
2559 :
2560 0 : extent = 0;
2561 0 : nentries = 0;
2562 :
2563 0 : hash_seq_init(&hash_seq, pgss_hash);
2564 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2565 : {
2566 0 : int query_len = entry->query_len;
2567 0 : char *qry = qtext_fetch(entry->query_offset,
2568 : query_len,
2569 : qbuffer,
2570 : qbuffer_size);
2571 :
2572 0 : if (qry == NULL)
2573 : {
2574 : /* Trouble ... drop the text */
2575 0 : entry->query_offset = 0;
2576 0 : entry->query_len = -1;
2577 : /* entry will not be counted in mean query length computation */
2578 0 : continue;
2579 : }
2580 :
2581 0 : if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2582 : {
2583 0 : ereport(LOG,
2584 : (errcode_for_file_access(),
2585 : errmsg("could not write file \"%s\": %m",
2586 : PGSS_TEXT_FILE)));
2587 0 : hash_seq_term(&hash_seq);
2588 0 : goto gc_fail;
2589 : }
2590 :
2591 0 : entry->query_offset = extent;
2592 0 : extent += query_len + 1;
2593 0 : nentries++;
2594 : }
2595 :
2596 : /*
2597 : * Truncate away any now-unused space. If this fails for some odd reason,
2598 : * we log it, but there's no need to fail.
2599 : */
2600 0 : if (ftruncate(fileno(qfile), extent) != 0)
2601 0 : ereport(LOG,
2602 : (errcode_for_file_access(),
2603 : errmsg("could not truncate file \"%s\": %m",
2604 : PGSS_TEXT_FILE)));
2605 :
2606 0 : if (FreeFile(qfile))
2607 : {
2608 0 : ereport(LOG,
2609 : (errcode_for_file_access(),
2610 : errmsg("could not write file \"%s\": %m",
2611 : PGSS_TEXT_FILE)));
2612 0 : qfile = NULL;
2613 0 : goto gc_fail;
2614 : }
2615 :
2616 0 : elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2617 : pgss->extent, extent);
2618 :
2619 : /* Reset the shared extent pointer */
2620 0 : pgss->extent = extent;
2621 :
2622 : /*
2623 : * Also update the mean query length, to be sure that need_gc_qtexts()
2624 : * won't still think we have a problem.
2625 : */
2626 0 : if (nentries > 0)
2627 0 : pgss->mean_query_len = extent / nentries;
2628 : else
2629 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2630 :
2631 0 : pfree(qbuffer);
2632 :
2633 : /*
2634 : * OK, count a garbage collection cycle. (Note: even though we have
2635 : * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2636 : * other processes may examine gc_count while holding only the mutex.
2637 : * Also, we have to advance the count *after* we've rewritten the file,
2638 : * else other processes might not realize they read a stale file.)
2639 : */
2640 0 : record_gc_qtexts();
2641 :
2642 0 : return;
2643 :
2644 0 : gc_fail:
2645 : /* clean up resources */
2646 0 : if (qfile)
2647 0 : FreeFile(qfile);
2648 0 : if (qbuffer)
2649 0 : pfree(qbuffer);
2650 :
2651 : /*
2652 : * Since the contents of the external file are now uncertain, mark all
2653 : * hashtable entries as having invalid texts.
2654 : */
2655 0 : hash_seq_init(&hash_seq, pgss_hash);
2656 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2657 : {
2658 0 : entry->query_offset = 0;
2659 0 : entry->query_len = -1;
2660 : }
2661 :
2662 : /*
2663 : * Destroy the query text file and create a new, empty one
2664 : */
2665 0 : (void) unlink(PGSS_TEXT_FILE);
2666 0 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2667 0 : if (qfile == NULL)
2668 0 : ereport(LOG,
2669 : (errcode_for_file_access(),
2670 : errmsg("could not recreate file \"%s\": %m",
2671 : PGSS_TEXT_FILE)));
2672 : else
2673 0 : FreeFile(qfile);
2674 :
2675 : /* Reset the shared extent pointer */
2676 0 : pgss->extent = 0;
2677 :
2678 : /* Reset mean_query_len to match the new state */
2679 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2680 :
2681 : /*
2682 : * Bump the GC count even though we failed.
2683 : *
2684 : * This is needed to make concurrent readers of file without any lock on
2685 : * pgss->lock notice existence of new version of file. Once readers
2686 : * subsequently observe a change in GC count with pgss->lock held, that
2687 : * forces a safe reopen of file. Writers also require that we bump here,
2688 : * of course. (As required by locking protocol, readers and writers don't
2689 : * trust earlier file contents until gc_count is found unchanged after
2690 : * pgss->lock acquired in shared or exclusive mode respectively.)
2691 : */
2692 0 : record_gc_qtexts();
2693 : }
2694 :
2695 : #define SINGLE_ENTRY_RESET(e) \
2696 : if (e) { \
2697 : if (minmax_only) { \
2698 : /* When requested reset only min/max statistics of an entry */ \
2699 : for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2700 : { \
2701 : e->counters.max_time[kind] = 0; \
2702 : e->counters.min_time[kind] = 0; \
2703 : } \
2704 : e->minmax_stats_since = stats_reset; \
2705 : } \
2706 : else \
2707 : { \
2708 : /* Remove the key otherwise */ \
2709 : hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2710 : num_remove++; \
2711 : } \
2712 : }
2713 :
2714 : /*
2715 : * Reset entries corresponding to parameters passed.
2716 : */
2717 : static TimestampTz
2718 121 : entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2719 : {
2720 : HASH_SEQ_STATUS hash_seq;
2721 : pgssEntry *entry;
2722 : FILE *qfile;
2723 : int64 num_entries;
2724 121 : int64 num_remove = 0;
2725 : pgssHashKey key;
2726 : TimestampTz stats_reset;
2727 :
2728 121 : if (!pgss || !pgss_hash)
2729 0 : ereport(ERROR,
2730 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2731 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2732 :
2733 121 : LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
2734 121 : num_entries = hash_get_num_entries(pgss_hash);
2735 :
2736 121 : stats_reset = GetCurrentTimestamp();
2737 :
2738 121 : if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2739 : {
2740 : /* If all the parameters are available, use the fast path. */
2741 1 : memset(&key, 0, sizeof(pgssHashKey));
2742 1 : key.userid = userid;
2743 1 : key.dbid = dbid;
2744 1 : key.queryid = queryid;
2745 :
2746 : /*
2747 : * Reset the entry if it exists, starting with the non-top-level
2748 : * entry.
2749 : */
2750 1 : key.toplevel = false;
2751 1 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2752 :
2753 1 : SINGLE_ENTRY_RESET(entry);
2754 :
2755 : /* Also reset the top-level entry if it exists. */
2756 1 : key.toplevel = true;
2757 1 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2758 :
2759 1 : SINGLE_ENTRY_RESET(entry);
2760 : }
2761 120 : else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2762 : {
2763 : /* Reset entries corresponding to valid parameters. */
2764 4 : hash_seq_init(&hash_seq, pgss_hash);
2765 51 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2766 : {
2767 47 : if ((!userid || entry->key.userid == userid) &&
2768 36 : (!dbid || entry->key.dbid == dbid) &&
2769 34 : (!queryid || entry->key.queryid == queryid))
2770 : {
2771 7 : SINGLE_ENTRY_RESET(entry);
2772 : }
2773 : }
2774 : }
2775 : else
2776 : {
2777 : /* Reset all entries. */
2778 116 : hash_seq_init(&hash_seq, pgss_hash);
2779 1151 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2780 : {
2781 941 : SINGLE_ENTRY_RESET(entry);
2782 : }
2783 : }
2784 :
2785 : /* All entries are removed? */
2786 121 : if (num_entries != num_remove)
2787 6 : goto release_lock;
2788 :
2789 : /*
2790 : * Reset global statistics for pg_stat_statements since all entries are
2791 : * removed.
2792 : */
2793 115 : SpinLockAcquire(&pgss->mutex);
2794 115 : pgss->stats.dealloc = 0;
2795 115 : pgss->stats.stats_reset = stats_reset;
2796 115 : SpinLockRelease(&pgss->mutex);
2797 :
2798 : /*
2799 : * Write new empty query file, perhaps even creating a new one to recover
2800 : * if the file was missing.
2801 : */
2802 115 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2803 115 : if (qfile == NULL)
2804 : {
2805 0 : ereport(LOG,
2806 : (errcode_for_file_access(),
2807 : errmsg("could not create file \"%s\": %m",
2808 : PGSS_TEXT_FILE)));
2809 0 : goto done;
2810 : }
2811 :
2812 : /* If ftruncate fails, log it, but it's not a fatal problem */
2813 115 : if (ftruncate(fileno(qfile), 0) != 0)
2814 0 : ereport(LOG,
2815 : (errcode_for_file_access(),
2816 : errmsg("could not truncate file \"%s\": %m",
2817 : PGSS_TEXT_FILE)));
2818 :
2819 115 : FreeFile(qfile);
2820 :
2821 115 : done:
2822 115 : pgss->extent = 0;
2823 : /* This counts as a query text garbage collection for our purposes */
2824 115 : record_gc_qtexts();
2825 :
2826 121 : release_lock:
2827 121 : LWLockRelease(pgss->lock);
2828 :
2829 121 : return stats_reset;
2830 : }
2831 :
2832 : /*
2833 : * Generate a normalized version of the query string that will be used to
2834 : * represent all similar queries.
2835 : *
2836 : * Note that the normalized representation may well vary depending on
2837 : * just which "equivalent" query is used to create the hashtable entry.
2838 : * We assume this is OK.
2839 : *
2840 : * If query_loc > 0, then "query" has been advanced by that much compared to
2841 : * the original string start, so we need to translate the provided locations
2842 : * to compensate. (This lets us avoid re-scanning statements before the one
2843 : * of interest, so it's worth doing.)
2844 : *
2845 : * *query_len_p contains the input string length, and is updated with
2846 : * the result string length on exit. The resulting string might be longer
2847 : * or shorter depending on what happens with replacement of constants.
2848 : *
2849 : * Returns a palloc'd string.
2850 : */
2851 : static char *
2852 11209 : generate_normalized_query(JumbleState *jstate, const char *query,
2853 : int query_loc, int *query_len_p)
2854 : {
2855 : char *norm_query;
2856 11209 : int query_len = *query_len_p;
2857 : int norm_query_buflen, /* Space allowed for norm_query */
2858 : len_to_wrt, /* Length (in bytes) to write */
2859 11209 : quer_loc = 0, /* Source query byte location */
2860 11209 : n_quer_loc = 0, /* Normalized query byte location */
2861 11209 : last_off = 0, /* Offset from start for previous tok */
2862 11209 : last_tok_len = 0; /* Length (in bytes) of that tok */
2863 11209 : int num_constants_replaced = 0;
2864 :
2865 : /*
2866 : * Get constants' lengths (core system only gives us locations). Note
2867 : * this also ensures the items are sorted by location.
2868 : */
2869 11209 : fill_in_constant_lengths(jstate, query, query_loc);
2870 :
2871 : /*
2872 : * Allow for $n symbols to be longer than the constants they replace.
2873 : * Constants must take at least one byte in text form, while a $n symbol
2874 : * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2875 : * could refine that limit based on the max value of n for the current
2876 : * query, but it hardly seems worth any extra effort to do so.
2877 : */
2878 11209 : norm_query_buflen = query_len + jstate->clocations_count * 10;
2879 :
2880 : /* Allocate result buffer */
2881 11209 : norm_query = palloc(norm_query_buflen + 1);
2882 :
2883 44747 : for (int i = 0; i < jstate->clocations_count; i++)
2884 : {
2885 : int off, /* Offset from start for cur tok */
2886 : tok_len; /* Length (in bytes) of that tok */
2887 :
2888 : /*
2889 : * If we have an external param at this location, but no lists are
2890 : * being squashed across the query, then we skip here; this will make
2891 : * us print the characters found in the original query that represent
2892 : * the parameter in the next iteration (or after the loop is done),
2893 : * which is a bit odd but seems to work okay in most cases.
2894 : */
2895 33538 : if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
2896 157 : continue;
2897 :
2898 33381 : off = jstate->clocations[i].location;
2899 :
2900 : /* Adjust recorded location if we're dealing with partial string */
2901 33381 : off -= query_loc;
2902 :
2903 33381 : tok_len = jstate->clocations[i].length;
2904 :
2905 33381 : if (tok_len < 0)
2906 253 : continue; /* ignore any duplicates */
2907 :
2908 : /* Copy next chunk (what precedes the next constant) */
2909 33128 : len_to_wrt = off - last_off;
2910 33128 : len_to_wrt -= last_tok_len;
2911 : Assert(len_to_wrt >= 0);
2912 33128 : memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2913 33128 : n_quer_loc += len_to_wrt;
2914 :
2915 : /*
2916 : * And insert a param symbol in place of the constant token; and, if
2917 : * we have a squashable list, insert a placeholder comment starting
2918 : * from the list's second value.
2919 : */
2920 33128 : n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
2921 33128 : num_constants_replaced + 1 + jstate->highest_extern_param_id,
2922 33128 : jstate->clocations[i].squashed ? " /*, ... */" : "");
2923 33128 : num_constants_replaced++;
2924 :
2925 : /* move forward */
2926 33128 : quer_loc = off + tok_len;
2927 33128 : last_off = off;
2928 33128 : last_tok_len = tok_len;
2929 : }
2930 :
2931 : /*
2932 : * We've copied up until the last ignorable constant. Copy over the
2933 : * remaining bytes of the original query string.
2934 : */
2935 11209 : len_to_wrt = query_len - quer_loc;
2936 :
2937 : Assert(len_to_wrt >= 0);
2938 11209 : memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2939 11209 : n_quer_loc += len_to_wrt;
2940 :
2941 : Assert(n_quer_loc <= norm_query_buflen);
2942 11209 : norm_query[n_quer_loc] = '\0';
2943 :
2944 11209 : *query_len_p = n_quer_loc;
2945 11209 : return norm_query;
2946 : }
2947 :
2948 : /*
2949 : * Given a valid SQL string and an array of constant-location records,
2950 : * fill in the textual lengths of those constants.
2951 : *
2952 : * The constants may use any allowed constant syntax, such as float literals,
2953 : * bit-strings, single-quoted strings and dollar-quoted strings. This is
2954 : * accomplished by using the public API for the core scanner.
2955 : *
2956 : * It is the caller's job to ensure that the string is a valid SQL statement
2957 : * with constants at the indicated locations. Since in practice the string
2958 : * has already been parsed, and the locations that the caller provides will
2959 : * have originated from within the authoritative parser, this should not be
2960 : * a problem.
2961 : *
2962 : * Multiple constants can have the same location. We reset lengths of those
2963 : * past the first to -1 so that they can later be ignored.
2964 : *
2965 : * If query_loc > 0, then "query" has been advanced by that much compared to
2966 : * the original string start, so we need to translate the provided locations
2967 : * to compensate. (This lets us avoid re-scanning statements before the one
2968 : * of interest, so it's worth doing.)
2969 : *
2970 : * N.B. There is an assumption that a '-' character at a Const location begins
2971 : * a negative numeric constant. This precludes there ever being another
2972 : * reason for a constant to start with a '-'.
2973 : */
2974 : static void
2975 11209 : fill_in_constant_lengths(JumbleState *jstate, const char *query,
2976 : int query_loc)
2977 : {
2978 : LocationLen *locs;
2979 : core_yyscan_t yyscanner;
2980 : core_yy_extra_type yyextra;
2981 : core_YYSTYPE yylval;
2982 : YYLTYPE yylloc;
2983 :
2984 : /*
2985 : * Sort the records by location so that we can process them in order while
2986 : * scanning the query text.
2987 : */
2988 11209 : if (jstate->clocations_count > 1)
2989 7092 : qsort(jstate->clocations, jstate->clocations_count,
2990 : sizeof(LocationLen), comp_location);
2991 11209 : locs = jstate->clocations;
2992 :
2993 : /* initialize the flex scanner --- should match raw_parser() */
2994 11209 : yyscanner = scanner_init(query,
2995 : &yyextra,
2996 : &ScanKeywords,
2997 : ScanKeywordTokens);
2998 :
2999 : /* Search for each constant, in sequence */
3000 44747 : for (int i = 0; i < jstate->clocations_count; i++)
3001 : {
3002 : int loc;
3003 : int tok;
3004 :
3005 : /* Ignore constants after the first one in the same location */
3006 33538 : if (i > 0 && locs[i].location == locs[i - 1].location)
3007 : {
3008 255 : locs[i].length = -1;
3009 255 : continue;
3010 : }
3011 :
3012 33283 : if (locs[i].squashed)
3013 660 : continue; /* squashable list, ignore */
3014 :
3015 : /* Adjust recorded location if we're dealing with partial string */
3016 32623 : loc = locs[i].location - query_loc;
3017 : Assert(loc >= 0);
3018 :
3019 : /*
3020 : * We have a valid location for a constant that's not a dupe. Lex
3021 : * tokens until we find the desired constant.
3022 : */
3023 : for (;;)
3024 : {
3025 251931 : tok = core_yylex(&yylval, &yylloc, yyscanner);
3026 :
3027 : /* We should not hit end-of-string, but if we do, behave sanely */
3028 251931 : if (tok == 0)
3029 0 : break; /* out of inner for-loop */
3030 :
3031 : /*
3032 : * We should find the token position exactly, but if we somehow
3033 : * run past it, work with that.
3034 : */
3035 251931 : if (yylloc >= loc)
3036 : {
3037 32623 : if (query[loc] == '-')
3038 : {
3039 : /*
3040 : * It's a negative value - this is the one and only case
3041 : * where we replace more than a single token.
3042 : *
3043 : * Do not compensate for the core system's special-case
3044 : * adjustment of location to that of the leading '-'
3045 : * operator in the event of a negative constant. It is
3046 : * also useful for our purposes to start from the minus
3047 : * symbol. In this way, queries like "select * from foo
3048 : * where bar = 1" and "select * from foo where bar = -2"
3049 : * will have identical normalized query strings.
3050 : */
3051 378 : tok = core_yylex(&yylval, &yylloc, yyscanner);
3052 378 : if (tok == 0)
3053 0 : break; /* out of inner for-loop */
3054 : }
3055 :
3056 : /*
3057 : * We now rely on the assumption that flex has placed a zero
3058 : * byte after the text of the current token in scanbuf.
3059 : */
3060 32623 : locs[i].length = strlen(yyextra.scanbuf + loc);
3061 32623 : break; /* out of inner for-loop */
3062 : }
3063 : }
3064 :
3065 : /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3066 32623 : if (tok == 0)
3067 0 : break;
3068 : }
3069 :
3070 11209 : scanner_finish(yyscanner);
3071 11209 : }
3072 :
3073 : /*
3074 : * comp_location: comparator for qsorting LocationLen structs by location
3075 : */
3076 : static int
3077 37598 : comp_location(const void *a, const void *b)
3078 : {
3079 37598 : int l = ((const LocationLen *) a)->location;
3080 37598 : int r = ((const LocationLen *) b)->location;
3081 :
3082 37598 : return pg_cmp_s32(l, r);
3083 : }
|