Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_stat_statements.c
4 : * Track statement planning and execution times as well as resource
5 : * usage across a whole database cluster.
6 : *
7 : * Execution costs are totaled for each distinct source query, and kept in
8 : * a shared hashtable. (We track only as many distinct queries as will fit
9 : * in the designated amount of shared memory.)
10 : *
11 : * Starting in Postgres 9.2, this module normalized query entries. As of
12 : * Postgres 14, the normalization is done by the core if compute_query_id is
13 : * enabled, or optionally by third-party modules.
14 : *
15 : * To facilitate presenting entries to users, we create "representative" query
16 : * strings in which constants are replaced with parameter symbols ($n), to
17 : * make it clearer what a normalized entry can represent. To save on shared
18 : * memory, and to avoid having to truncate oversized query strings, we store
19 : * these strings in a temporary external query-texts file. Offsets into this
20 : * file are kept in shared memory.
21 : *
22 : * Note about locking issues: to create or delete an entry in the shared
23 : * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 : * in an entry except the counters requires the same. To look up an entry,
25 : * one must hold the lock shared. To read or update the counters within
26 : * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 : * disappear!) and also take the entry's mutex spinlock.
28 : * The shared state variable pgss->extent (the next free spot in the external
29 : * query-text file) should be accessed only while holding either the
30 : * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 : * allow reserving file space while holding only shared lock on pgss->lock.
32 : * Rewriting the entire external query-text file, eg for garbage collection,
33 : * requires holding pgss->lock exclusively; this allows individual entries
34 : * in the file to be read or written while holding only shared lock.
35 : *
36 : *
37 : * Copyright (c) 2008-2025, PostgreSQL Global Development Group
38 : *
39 : * IDENTIFICATION
40 : * contrib/pg_stat_statements/pg_stat_statements.c
41 : *
42 : *-------------------------------------------------------------------------
43 : */
44 : #include "postgres.h"
45 :
46 : #include <math.h>
47 : #include <sys/stat.h>
48 : #include <unistd.h>
49 :
50 : #include "access/parallel.h"
51 : #include "catalog/pg_authid.h"
52 : #include "common/int.h"
53 : #include "executor/instrument.h"
54 : #include "funcapi.h"
55 : #include "jit/jit.h"
56 : #include "mb/pg_wchar.h"
57 : #include "miscadmin.h"
58 : #include "nodes/queryjumble.h"
59 : #include "optimizer/planner.h"
60 : #include "parser/analyze.h"
61 : #include "parser/scanner.h"
62 : #include "pgstat.h"
63 : #include "storage/fd.h"
64 : #include "storage/ipc.h"
65 : #include "storage/lwlock.h"
66 : #include "storage/shmem.h"
67 : #include "storage/spin.h"
68 : #include "tcop/utility.h"
69 : #include "utils/acl.h"
70 : #include "utils/builtins.h"
71 : #include "utils/memutils.h"
72 : #include "utils/timestamp.h"
73 :
74 16 : PG_MODULE_MAGIC_EXT(
75 : .name = "pg_stat_statements",
76 : .version = PG_VERSION
77 : );
78 :
79 : /* Location of permanent stats file (valid when database is shut down) */
80 : #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
81 :
82 : /*
83 : * Location of external query text file.
84 : */
85 : #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
86 :
87 : /* Magic number identifying the stats file format */
88 : static const uint32 PGSS_FILE_HEADER = 0x20250731;
89 :
90 : /* PostgreSQL major version number, changes in which invalidate all entries */
91 : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
92 :
93 : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
94 : #define USAGE_EXEC(duration) (1.0)
95 : #define USAGE_INIT (1.0) /* including initial planning */
96 : #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
97 : #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
98 : #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
99 : #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
100 : #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
101 : #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
102 :
103 : /*
104 : * Extension version number, for supporting older extension versions' objects
105 : */
106 : typedef enum pgssVersion
107 : {
108 : PGSS_V1_0 = 0,
109 : PGSS_V1_1,
110 : PGSS_V1_2,
111 : PGSS_V1_3,
112 : PGSS_V1_8,
113 : PGSS_V1_9,
114 : PGSS_V1_10,
115 : PGSS_V1_11,
116 : PGSS_V1_12,
117 : PGSS_V1_13,
118 : } pgssVersion;
119 :
120 : typedef enum pgssStoreKind
121 : {
122 : PGSS_INVALID = -1,
123 :
124 : /*
125 : * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
126 : * reference the underlying values in the arrays in the Counters struct,
127 : * and this order is required in pg_stat_statements_internal().
128 : */
129 : PGSS_PLAN = 0,
130 : PGSS_EXEC,
131 : } pgssStoreKind;
132 :
133 : #define PGSS_NUMKIND (PGSS_EXEC + 1)
134 :
135 : /*
136 : * Hashtable key that defines the identity of a hashtable entry. We separate
137 : * queries by user and by database even if they are otherwise identical.
138 : *
139 : * If you add a new key to this struct, make sure to teach pgss_store() to
140 : * zero the padding bytes. Otherwise, things will break, because pgss_hash is
141 : * created using HASH_BLOBS, and thus tag_hash is used to hash this.
142 :
143 : */
144 : typedef struct pgssHashKey
145 : {
146 : Oid userid; /* user OID */
147 : Oid dbid; /* database OID */
148 : int64 queryid; /* query identifier */
149 : bool toplevel; /* query executed at top level */
150 : } pgssHashKey;
151 :
152 : /*
153 : * The actual stats counters kept within pgssEntry.
154 : */
155 : typedef struct Counters
156 : {
157 : int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
158 : double total_time[PGSS_NUMKIND]; /* total planning/execution time,
159 : * in msec */
160 : double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
161 : * msec since min/max reset */
162 : double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
163 : * msec since min/max reset */
164 : double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
165 : * msec */
166 : double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
167 : * planning/execution time in msec */
168 : int64 rows; /* total # of retrieved or affected rows */
169 : int64 shared_blks_hit; /* # of shared buffer hits */
170 : int64 shared_blks_read; /* # of shared disk blocks read */
171 : int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
172 : int64 shared_blks_written; /* # of shared disk blocks written */
173 : int64 local_blks_hit; /* # of local buffer hits */
174 : int64 local_blks_read; /* # of local disk blocks read */
175 : int64 local_blks_dirtied; /* # of local disk blocks dirtied */
176 : int64 local_blks_written; /* # of local disk blocks written */
177 : int64 temp_blks_read; /* # of temp blocks read */
178 : int64 temp_blks_written; /* # of temp blocks written */
179 : double shared_blk_read_time; /* time spent reading shared blocks,
180 : * in msec */
181 : double shared_blk_write_time; /* time spent writing shared blocks,
182 : * in msec */
183 : double local_blk_read_time; /* time spent reading local blocks, in
184 : * msec */
185 : double local_blk_write_time; /* time spent writing local blocks, in
186 : * msec */
187 : double temp_blk_read_time; /* time spent reading temp blocks, in msec */
188 : double temp_blk_write_time; /* time spent writing temp blocks, in
189 : * msec */
190 : double usage; /* usage factor */
191 : int64 wal_records; /* # of WAL records generated */
192 : int64 wal_fpi; /* # of WAL full page images generated */
193 : uint64 wal_bytes; /* total amount of WAL generated in bytes */
194 : int64 wal_buffers_full; /* # of times the WAL buffers became full */
195 : int64 jit_functions; /* total number of JIT functions emitted */
196 : double jit_generation_time; /* total time to generate jit code */
197 : int64 jit_inlining_count; /* number of times inlining time has been
198 : * > 0 */
199 : double jit_deform_time; /* total time to deform tuples in jit code */
200 : int64 jit_deform_count; /* number of times deform time has been >
201 : * 0 */
202 :
203 : double jit_inlining_time; /* total time to inline jit code */
204 : int64 jit_optimization_count; /* number of times optimization time
205 : * has been > 0 */
206 : double jit_optimization_time; /* total time to optimize jit code */
207 : int64 jit_emission_count; /* number of times emission time has been
208 : * > 0 */
209 : double jit_emission_time; /* total time to emit jit code */
210 : int64 parallel_workers_to_launch; /* # of parallel workers planned
211 : * to be launched */
212 : int64 parallel_workers_launched; /* # of parallel workers actually
213 : * launched */
214 : int64 generic_plan_calls; /* number of calls using a generic plan */
215 : int64 custom_plan_calls; /* number of calls using a custom plan */
216 : } Counters;
217 :
218 : /*
219 : * Global statistics for pg_stat_statements
220 : */
221 : typedef struct pgssGlobalStats
222 : {
223 : int64 dealloc; /* # of times entries were deallocated */
224 : TimestampTz stats_reset; /* timestamp with all stats reset */
225 : } pgssGlobalStats;
226 :
227 : /*
228 : * Statistics per statement
229 : *
230 : * Note: in event of a failure in garbage collection of the query text file,
231 : * we reset query_offset to zero and query_len to -1. This will be seen as
232 : * an invalid state by qtext_fetch().
233 : */
234 : typedef struct pgssEntry
235 : {
236 : pgssHashKey key; /* hash key of entry - MUST BE FIRST */
237 : Counters counters; /* the statistics for this query */
238 : Size query_offset; /* query text offset in external file */
239 : int query_len; /* # of valid bytes in query string, or -1 */
240 : int encoding; /* query text encoding */
241 : TimestampTz stats_since; /* timestamp of entry allocation */
242 : TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
243 : slock_t mutex; /* protects the counters only */
244 : } pgssEntry;
245 :
246 : /*
247 : * Global shared state
248 : */
249 : typedef struct pgssSharedState
250 : {
251 : LWLock *lock; /* protects hashtable search/modification */
252 : double cur_median_usage; /* current median usage in hashtable */
253 : Size mean_query_len; /* current mean entry text length */
254 : slock_t mutex; /* protects following fields only: */
255 : Size extent; /* current extent of query file */
256 : int n_writers; /* number of active writers to query file */
257 : int gc_count; /* query file garbage collection cycle count */
258 : pgssGlobalStats stats; /* global statistics for pgss */
259 : } pgssSharedState;
260 :
261 : /*---- Local variables ----*/
262 :
263 : /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
264 : static int nesting_level = 0;
265 :
266 : /* Saved hook values */
267 : static shmem_request_hook_type prev_shmem_request_hook = NULL;
268 : static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
269 : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
270 : static planner_hook_type prev_planner_hook = NULL;
271 : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
272 : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
273 : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
274 : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
275 : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
276 :
277 : /* Links to shared memory state */
278 : static pgssSharedState *pgss = NULL;
279 : static HTAB *pgss_hash = NULL;
280 :
281 : /*---- GUC variables ----*/
282 :
283 : typedef enum
284 : {
285 : PGSS_TRACK_NONE, /* track no statements */
286 : PGSS_TRACK_TOP, /* only top level statements */
287 : PGSS_TRACK_ALL, /* all statements, including nested ones */
288 : } PGSSTrackLevel;
289 :
290 : static const struct config_enum_entry track_options[] =
291 : {
292 : {"none", PGSS_TRACK_NONE, false},
293 : {"top", PGSS_TRACK_TOP, false},
294 : {"all", PGSS_TRACK_ALL, false},
295 : {NULL, 0, false}
296 : };
297 :
298 : static int pgss_max = 5000; /* max # statements to track */
299 : static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
300 : static bool pgss_track_utility = true; /* whether to track utility commands */
301 : static bool pgss_track_planning = false; /* whether to track planning
302 : * duration */
303 : static bool pgss_save = true; /* whether to save stats across shutdown */
304 :
305 : #define pgss_enabled(level) \
306 : (!IsParallelWorker() && \
307 : (pgss_track == PGSS_TRACK_ALL || \
308 : (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
309 :
310 : #define record_gc_qtexts() \
311 : do { \
312 : SpinLockAcquire(&pgss->mutex); \
313 : pgss->gc_count++; \
314 : SpinLockRelease(&pgss->mutex); \
315 : } while(0)
316 :
317 : /*---- Function declarations ----*/
318 :
319 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
320 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
321 42 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_11);
322 0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
323 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
324 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
325 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
326 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
327 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_11);
328 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_12);
329 50 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_13);
330 0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
331 16 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
332 :
333 : static void pgss_shmem_request(void);
334 : static void pgss_shmem_startup(void);
335 : static void pgss_shmem_shutdown(int code, Datum arg);
336 : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
337 : JumbleState *jstate);
338 : static PlannedStmt *pgss_planner(Query *parse,
339 : const char *query_string,
340 : int cursorOptions,
341 : ParamListInfo boundParams);
342 : static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
343 : static void pgss_ExecutorRun(QueryDesc *queryDesc,
344 : ScanDirection direction,
345 : uint64 count);
346 : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
347 : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
348 : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
349 : bool readOnlyTree,
350 : ProcessUtilityContext context, ParamListInfo params,
351 : QueryEnvironment *queryEnv,
352 : DestReceiver *dest, QueryCompletion *qc);
353 : static void pgss_store(const char *query, int64 queryId,
354 : int query_location, int query_len,
355 : pgssStoreKind kind,
356 : double total_time, uint64 rows,
357 : const BufferUsage *bufusage,
358 : const WalUsage *walusage,
359 : const struct JitInstrumentation *jitusage,
360 : JumbleState *jstate,
361 : int parallel_workers_to_launch,
362 : int parallel_workers_launched,
363 : PlannedStmtOrigin planOrigin);
364 : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
365 : pgssVersion api_version,
366 : bool showtext);
367 : static Size pgss_memsize(void);
368 : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
369 : int encoding, bool sticky);
370 : static void entry_dealloc(void);
371 : static bool qtext_store(const char *query, int query_len,
372 : Size *query_offset, int *gc_count);
373 : static char *qtext_load_file(Size *buffer_size);
374 : static char *qtext_fetch(Size query_offset, int query_len,
375 : char *buffer, Size buffer_size);
376 : static bool need_gc_qtexts(void);
377 : static void gc_qtexts(void);
378 : static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
379 : static char *generate_normalized_query(JumbleState *jstate, const char *query,
380 : int query_loc, int *query_len_p);
381 : static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
382 : int query_loc);
383 : static int comp_location(const void *a, const void *b);
384 :
385 :
386 : /*
387 : * Module load callback
388 : */
389 : void
390 16 : _PG_init(void)
391 : {
392 : /*
393 : * In order to create our shared memory area, we have to be loaded via
394 : * shared_preload_libraries. If not, fall out without hooking into any of
395 : * the main system. (We don't throw error here because it seems useful to
396 : * allow the pg_stat_statements functions to be created even when the
397 : * module isn't active. The functions must protect themselves against
398 : * being called then, however.)
399 : */
400 16 : if (!process_shared_preload_libraries_in_progress)
401 2 : return;
402 :
403 : /*
404 : * Inform the postmaster that we want to enable query_id calculation if
405 : * compute_query_id is set to auto.
406 : */
407 14 : EnableQueryId();
408 :
409 : /*
410 : * Define (or redefine) custom GUC variables.
411 : */
412 14 : DefineCustomIntVariable("pg_stat_statements.max",
413 : "Sets the maximum number of statements tracked by pg_stat_statements.",
414 : NULL,
415 : &pgss_max,
416 : 5000,
417 : 100,
418 : INT_MAX / 2,
419 : PGC_POSTMASTER,
420 : 0,
421 : NULL,
422 : NULL,
423 : NULL);
424 :
425 14 : DefineCustomEnumVariable("pg_stat_statements.track",
426 : "Selects which statements are tracked by pg_stat_statements.",
427 : NULL,
428 : &pgss_track,
429 : PGSS_TRACK_TOP,
430 : track_options,
431 : PGC_SUSET,
432 : 0,
433 : NULL,
434 : NULL,
435 : NULL);
436 :
437 14 : DefineCustomBoolVariable("pg_stat_statements.track_utility",
438 : "Selects whether utility commands are tracked by pg_stat_statements.",
439 : NULL,
440 : &pgss_track_utility,
441 : true,
442 : PGC_SUSET,
443 : 0,
444 : NULL,
445 : NULL,
446 : NULL);
447 :
448 14 : DefineCustomBoolVariable("pg_stat_statements.track_planning",
449 : "Selects whether planning duration is tracked by pg_stat_statements.",
450 : NULL,
451 : &pgss_track_planning,
452 : false,
453 : PGC_SUSET,
454 : 0,
455 : NULL,
456 : NULL,
457 : NULL);
458 :
459 14 : DefineCustomBoolVariable("pg_stat_statements.save",
460 : "Save pg_stat_statements statistics across server shutdowns.",
461 : NULL,
462 : &pgss_save,
463 : true,
464 : PGC_SIGHUP,
465 : 0,
466 : NULL,
467 : NULL,
468 : NULL);
469 :
470 14 : MarkGUCPrefixReserved("pg_stat_statements");
471 :
472 : /*
473 : * Install hooks.
474 : */
475 14 : prev_shmem_request_hook = shmem_request_hook;
476 14 : shmem_request_hook = pgss_shmem_request;
477 14 : prev_shmem_startup_hook = shmem_startup_hook;
478 14 : shmem_startup_hook = pgss_shmem_startup;
479 14 : prev_post_parse_analyze_hook = post_parse_analyze_hook;
480 14 : post_parse_analyze_hook = pgss_post_parse_analyze;
481 14 : prev_planner_hook = planner_hook;
482 14 : planner_hook = pgss_planner;
483 14 : prev_ExecutorStart = ExecutorStart_hook;
484 14 : ExecutorStart_hook = pgss_ExecutorStart;
485 14 : prev_ExecutorRun = ExecutorRun_hook;
486 14 : ExecutorRun_hook = pgss_ExecutorRun;
487 14 : prev_ExecutorFinish = ExecutorFinish_hook;
488 14 : ExecutorFinish_hook = pgss_ExecutorFinish;
489 14 : prev_ExecutorEnd = ExecutorEnd_hook;
490 14 : ExecutorEnd_hook = pgss_ExecutorEnd;
491 14 : prev_ProcessUtility = ProcessUtility_hook;
492 14 : ProcessUtility_hook = pgss_ProcessUtility;
493 : }
494 :
495 : /*
496 : * shmem_request hook: request additional shared resources. We'll allocate or
497 : * attach to the shared resources in pgss_shmem_startup().
498 : */
499 : static void
500 14 : pgss_shmem_request(void)
501 : {
502 14 : if (prev_shmem_request_hook)
503 0 : prev_shmem_request_hook();
504 :
505 14 : RequestAddinShmemSpace(pgss_memsize());
506 14 : RequestNamedLWLockTranche("pg_stat_statements", 1);
507 14 : }
508 :
509 : /*
510 : * shmem_startup hook: allocate or attach to shared memory,
511 : * then load any pre-existing statistics from file.
512 : * Also create and load the query-texts file, which is expected to exist
513 : * (even if empty) while the module is enabled.
514 : */
515 : static void
516 14 : pgss_shmem_startup(void)
517 : {
518 : bool found;
519 : HASHCTL info;
520 14 : FILE *file = NULL;
521 14 : FILE *qfile = NULL;
522 : uint32 header;
523 : int32 num;
524 : int32 pgver;
525 : int32 i;
526 : int buffer_size;
527 14 : char *buffer = NULL;
528 :
529 14 : if (prev_shmem_startup_hook)
530 0 : prev_shmem_startup_hook();
531 :
532 : /* reset in case this is a restart within the postmaster */
533 14 : pgss = NULL;
534 14 : pgss_hash = NULL;
535 :
536 : /*
537 : * Create or attach to the shared memory state, including hash table
538 : */
539 14 : LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
540 :
541 14 : pgss = ShmemInitStruct("pg_stat_statements",
542 : sizeof(pgssSharedState),
543 : &found);
544 :
545 14 : if (!found)
546 : {
547 : /* First time through ... */
548 14 : pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
549 14 : pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
550 14 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
551 14 : SpinLockInit(&pgss->mutex);
552 14 : pgss->extent = 0;
553 14 : pgss->n_writers = 0;
554 14 : pgss->gc_count = 0;
555 14 : pgss->stats.dealloc = 0;
556 14 : pgss->stats.stats_reset = GetCurrentTimestamp();
557 : }
558 :
559 14 : info.keysize = sizeof(pgssHashKey);
560 14 : info.entrysize = sizeof(pgssEntry);
561 14 : pgss_hash = ShmemInitHash("pg_stat_statements hash",
562 : pgss_max, pgss_max,
563 : &info,
564 : HASH_ELEM | HASH_BLOBS);
565 :
566 14 : LWLockRelease(AddinShmemInitLock);
567 :
568 : /*
569 : * If we're in the postmaster (or a standalone backend...), set up a shmem
570 : * exit hook to dump the statistics to disk.
571 : */
572 14 : if (!IsUnderPostmaster)
573 14 : on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
574 :
575 : /*
576 : * Done if some other process already completed our initialization.
577 : */
578 14 : if (found)
579 14 : return;
580 :
581 : /*
582 : * Note: we don't bother with locks here, because there should be no other
583 : * processes running when this code is reached.
584 : */
585 :
586 : /* Unlink query text file possibly left over from crash */
587 14 : unlink(PGSS_TEXT_FILE);
588 :
589 : /* Allocate new query text temp file */
590 14 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
591 14 : if (qfile == NULL)
592 0 : goto write_error;
593 :
594 : /*
595 : * If we were told not to load old statistics, we're done. (Note we do
596 : * not try to unlink any old dump file in this case. This seems a bit
597 : * questionable but it's the historical behavior.)
598 : */
599 14 : if (!pgss_save)
600 : {
601 2 : FreeFile(qfile);
602 2 : return;
603 : }
604 :
605 : /*
606 : * Attempt to load old statistics from the dump file.
607 : */
608 12 : file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
609 12 : if (file == NULL)
610 : {
611 8 : if (errno != ENOENT)
612 0 : goto read_error;
613 : /* No existing persisted stats file, so we're done */
614 8 : FreeFile(qfile);
615 8 : return;
616 : }
617 :
618 4 : buffer_size = 2048;
619 4 : buffer = (char *) palloc(buffer_size);
620 :
621 8 : if (fread(&header, sizeof(uint32), 1, file) != 1 ||
622 8 : fread(&pgver, sizeof(uint32), 1, file) != 1 ||
623 4 : fread(&num, sizeof(int32), 1, file) != 1)
624 0 : goto read_error;
625 :
626 4 : if (header != PGSS_FILE_HEADER ||
627 4 : pgver != PGSS_PG_MAJOR_VERSION)
628 0 : goto data_error;
629 :
630 52752 : for (i = 0; i < num; i++)
631 : {
632 : pgssEntry temp;
633 : pgssEntry *entry;
634 : Size query_offset;
635 :
636 52748 : if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
637 0 : goto read_error;
638 :
639 : /* Encoding is the only field we can easily sanity-check */
640 52748 : if (!PG_VALID_BE_ENCODING(temp.encoding))
641 0 : goto data_error;
642 :
643 : /* Resize buffer as needed */
644 52748 : if (temp.query_len >= buffer_size)
645 : {
646 4 : buffer_size = Max(buffer_size * 2, temp.query_len + 1);
647 4 : buffer = repalloc(buffer, buffer_size);
648 : }
649 :
650 52748 : if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
651 0 : goto read_error;
652 :
653 : /* Should have a trailing null, but let's make sure */
654 52748 : buffer[temp.query_len] = '\0';
655 :
656 : /* Skip loading "sticky" entries */
657 52748 : if (IS_STICKY(temp.counters))
658 1464 : continue;
659 :
660 : /* Store the query text */
661 51284 : query_offset = pgss->extent;
662 51284 : if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
663 0 : goto write_error;
664 51284 : pgss->extent += temp.query_len + 1;
665 :
666 : /* make the hashtable entry (discards old entries if too many) */
667 51284 : entry = entry_alloc(&temp.key, query_offset, temp.query_len,
668 : temp.encoding,
669 : false);
670 :
671 : /* copy in the actual stats */
672 51284 : entry->counters = temp.counters;
673 51284 : entry->stats_since = temp.stats_since;
674 51284 : entry->minmax_stats_since = temp.minmax_stats_since;
675 : }
676 :
677 : /* Read global statistics for pg_stat_statements */
678 4 : if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
679 0 : goto read_error;
680 :
681 4 : pfree(buffer);
682 4 : FreeFile(file);
683 4 : FreeFile(qfile);
684 :
685 : /*
686 : * Remove the persisted stats file so it's not included in
687 : * backups/replication standbys, etc. A new file will be written on next
688 : * shutdown.
689 : *
690 : * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
691 : * because we remove that file on startup; it acts inversely to
692 : * PGSS_DUMP_FILE, in that it is only supposed to be around when the
693 : * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
694 : * when the server is not running. Leaving the file creates no danger of
695 : * a newly restored database having a spurious record of execution costs,
696 : * which is what we're really concerned about here.
697 : */
698 4 : unlink(PGSS_DUMP_FILE);
699 :
700 4 : return;
701 :
702 0 : read_error:
703 0 : ereport(LOG,
704 : (errcode_for_file_access(),
705 : errmsg("could not read file \"%s\": %m",
706 : PGSS_DUMP_FILE)));
707 0 : goto fail;
708 0 : data_error:
709 0 : ereport(LOG,
710 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
711 : errmsg("ignoring invalid data in file \"%s\"",
712 : PGSS_DUMP_FILE)));
713 0 : goto fail;
714 0 : write_error:
715 0 : ereport(LOG,
716 : (errcode_for_file_access(),
717 : errmsg("could not write file \"%s\": %m",
718 : PGSS_TEXT_FILE)));
719 0 : fail:
720 0 : if (buffer)
721 0 : pfree(buffer);
722 0 : if (file)
723 0 : FreeFile(file);
724 0 : if (qfile)
725 0 : FreeFile(qfile);
726 : /* If possible, throw away the bogus file; ignore any error */
727 0 : unlink(PGSS_DUMP_FILE);
728 :
729 : /*
730 : * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
731 : * server is running with pg_stat_statements enabled
732 : */
733 : }
734 :
735 : /*
736 : * shmem_shutdown hook: Dump statistics into file.
737 : *
738 : * Note: we don't bother with acquiring lock, because there should be no
739 : * other processes running when this is called.
740 : */
741 : static void
742 14 : pgss_shmem_shutdown(int code, Datum arg)
743 : {
744 : FILE *file;
745 14 : char *qbuffer = NULL;
746 14 : Size qbuffer_size = 0;
747 : HASH_SEQ_STATUS hash_seq;
748 : int32 num_entries;
749 : pgssEntry *entry;
750 :
751 : /* Don't try to dump during a crash. */
752 14 : if (code)
753 14 : return;
754 :
755 : /* Safety check ... shouldn't get here unless shmem is set up. */
756 14 : if (!pgss || !pgss_hash)
757 0 : return;
758 :
759 : /* Don't dump if told not to. */
760 14 : if (!pgss_save)
761 4 : return;
762 :
763 10 : file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
764 10 : if (file == NULL)
765 0 : goto error;
766 :
767 10 : if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
768 0 : goto error;
769 10 : if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
770 0 : goto error;
771 10 : num_entries = hash_get_num_entries(pgss_hash);
772 10 : if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
773 0 : goto error;
774 :
775 10 : qbuffer = qtext_load_file(&qbuffer_size);
776 10 : if (qbuffer == NULL)
777 0 : goto error;
778 :
779 : /*
780 : * When serializing to disk, we store query texts immediately after their
781 : * entry data. Any orphaned query texts are thereby excluded.
782 : */
783 10 : hash_seq_init(&hash_seq, pgss_hash);
784 106056 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
785 : {
786 106046 : int len = entry->query_len;
787 106046 : char *qstr = qtext_fetch(entry->query_offset, len,
788 : qbuffer, qbuffer_size);
789 :
790 106046 : if (qstr == NULL)
791 0 : continue; /* Ignore any entries with bogus texts */
792 :
793 106046 : if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
794 106046 : fwrite(qstr, 1, len + 1, file) != len + 1)
795 : {
796 : /* note: we assume hash_seq_term won't change errno */
797 0 : hash_seq_term(&hash_seq);
798 0 : goto error;
799 : }
800 : }
801 :
802 : /* Dump global statistics for pg_stat_statements */
803 10 : if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
804 0 : goto error;
805 :
806 10 : free(qbuffer);
807 10 : qbuffer = NULL;
808 :
809 10 : if (FreeFile(file))
810 : {
811 0 : file = NULL;
812 0 : goto error;
813 : }
814 :
815 : /*
816 : * Rename file into place, so we atomically replace any old one.
817 : */
818 10 : (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
819 :
820 : /* Unlink query-texts file; it's not needed while shutdown */
821 10 : unlink(PGSS_TEXT_FILE);
822 :
823 10 : return;
824 :
825 0 : error:
826 0 : ereport(LOG,
827 : (errcode_for_file_access(),
828 : errmsg("could not write file \"%s\": %m",
829 : PGSS_DUMP_FILE ".tmp")));
830 0 : free(qbuffer);
831 0 : if (file)
832 0 : FreeFile(file);
833 0 : unlink(PGSS_DUMP_FILE ".tmp");
834 0 : unlink(PGSS_TEXT_FILE);
835 : }
836 :
837 : /*
838 : * Post-parse-analysis hook: mark query with a queryId
839 : */
840 : static void
841 155396 : pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
842 : {
843 155396 : if (prev_post_parse_analyze_hook)
844 0 : prev_post_parse_analyze_hook(pstate, query, jstate);
845 :
846 : /* Safety check... */
847 155396 : if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
848 24810 : return;
849 :
850 : /*
851 : * If it's EXECUTE, clear the queryId so that stats will accumulate for
852 : * the underlying PREPARE. But don't do this if we're not tracking
853 : * utility statements, to avoid messing up another extension that might be
854 : * tracking them.
855 : */
856 130586 : if (query->utilityStmt)
857 : {
858 58080 : if (pgss_track_utility && IsA(query->utilityStmt, ExecuteStmt))
859 : {
860 6484 : query->queryId = INT64CONST(0);
861 6484 : return;
862 : }
863 : }
864 :
865 : /*
866 : * If query jumbling were able to identify any ignorable constants, we
867 : * immediately create a hash table entry for the query, so that we can
868 : * record the normalized form of the query string. If there were no such
869 : * constants, the normalized string would be the same as the query text
870 : * anyway, so there's no need for an early entry.
871 : */
872 124102 : if (jstate && jstate->clocations_count > 0)
873 72000 : pgss_store(pstate->p_sourcetext,
874 : query->queryId,
875 : query->stmt_location,
876 : query->stmt_len,
877 : PGSS_INVALID,
878 : 0,
879 : 0,
880 : NULL,
881 : NULL,
882 : NULL,
883 : jstate,
884 : 0,
885 : 0,
886 : PLAN_STMT_UNKNOWN);
887 : }
888 :
889 : /*
890 : * Planner hook: forward to regular planner, but measure planning time
891 : * if needed.
892 : */
893 : static PlannedStmt *
894 94360 : pgss_planner(Query *parse,
895 : const char *query_string,
896 : int cursorOptions,
897 : ParamListInfo boundParams)
898 : {
899 : PlannedStmt *result;
900 :
901 : /*
902 : * We can't process the query if no query_string is provided, as
903 : * pgss_store needs it. We also ignore query without queryid, as it would
904 : * be treated as a utility statement, which may not be the case.
905 : */
906 94360 : if (pgss_enabled(nesting_level)
907 72836 : && pgss_track_planning && query_string
908 262 : && parse->queryId != INT64CONST(0))
909 262 : {
910 : instr_time start;
911 : instr_time duration;
912 : BufferUsage bufusage_start,
913 : bufusage;
914 : WalUsage walusage_start,
915 : walusage;
916 :
917 : /* We need to track buffer usage as the planner can access them. */
918 262 : bufusage_start = pgBufferUsage;
919 :
920 : /*
921 : * Similarly the planner could write some WAL records in some cases
922 : * (e.g. setting a hint bit with those being WAL-logged)
923 : */
924 262 : walusage_start = pgWalUsage;
925 262 : INSTR_TIME_SET_CURRENT(start);
926 :
927 262 : nesting_level++;
928 262 : PG_TRY();
929 : {
930 262 : if (prev_planner_hook)
931 0 : result = prev_planner_hook(parse, query_string, cursorOptions,
932 : boundParams);
933 : else
934 262 : result = standard_planner(parse, query_string, cursorOptions,
935 : boundParams);
936 : }
937 0 : PG_FINALLY();
938 : {
939 262 : nesting_level--;
940 : }
941 262 : PG_END_TRY();
942 :
943 262 : INSTR_TIME_SET_CURRENT(duration);
944 262 : INSTR_TIME_SUBTRACT(duration, start);
945 :
946 : /* calc differences of buffer counters. */
947 262 : memset(&bufusage, 0, sizeof(BufferUsage));
948 262 : BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
949 :
950 : /* calc differences of WAL counters. */
951 262 : memset(&walusage, 0, sizeof(WalUsage));
952 262 : WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
953 :
954 262 : pgss_store(query_string,
955 : parse->queryId,
956 : parse->stmt_location,
957 : parse->stmt_len,
958 : PGSS_PLAN,
959 262 : INSTR_TIME_GET_MILLISEC(duration),
960 : 0,
961 : &bufusage,
962 : &walusage,
963 : NULL,
964 : NULL,
965 : 0,
966 : 0,
967 : result->planOrigin);
968 : }
969 : else
970 : {
971 : /*
972 : * Even though we're not tracking plan time for this statement, we
973 : * must still increment the nesting level, to ensure that functions
974 : * evaluated during planning are not seen as top-level calls.
975 : */
976 94098 : nesting_level++;
977 94098 : PG_TRY();
978 : {
979 94098 : if (prev_planner_hook)
980 0 : result = prev_planner_hook(parse, query_string, cursorOptions,
981 : boundParams);
982 : else
983 94098 : result = standard_planner(parse, query_string, cursorOptions,
984 : boundParams);
985 : }
986 1490 : PG_FINALLY();
987 : {
988 94098 : nesting_level--;
989 : }
990 94098 : PG_END_TRY();
991 : }
992 :
993 92870 : return result;
994 : }
995 :
996 : /*
997 : * ExecutorStart hook: start up tracking if needed
998 : */
999 : static void
1000 115290 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
1001 : {
1002 115290 : if (prev_ExecutorStart)
1003 0 : prev_ExecutorStart(queryDesc, eflags);
1004 : else
1005 115290 : standard_ExecutorStart(queryDesc, eflags);
1006 :
1007 : /*
1008 : * If query has queryId zero, don't track it. This prevents double
1009 : * counting of optimizable statements that are directly contained in
1010 : * utility statements.
1011 : */
1012 114762 : if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1013 : {
1014 : /*
1015 : * Set up to track total elapsed time in ExecutorRun. Make sure the
1016 : * space is allocated in the per-query context so it will go away at
1017 : * ExecutorEnd.
1018 : */
1019 76686 : if (queryDesc->totaltime == NULL)
1020 : {
1021 : MemoryContext oldcxt;
1022 :
1023 76686 : oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1024 76686 : queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1025 76686 : MemoryContextSwitchTo(oldcxt);
1026 : }
1027 : }
1028 114762 : }
1029 :
1030 : /*
1031 : * ExecutorRun hook: all we need do is track nesting depth
1032 : */
1033 : static void
1034 112238 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1035 : {
1036 112238 : nesting_level++;
1037 112238 : PG_TRY();
1038 : {
1039 112238 : if (prev_ExecutorRun)
1040 0 : prev_ExecutorRun(queryDesc, direction, count);
1041 : else
1042 112238 : standard_ExecutorRun(queryDesc, direction, count);
1043 : }
1044 6830 : PG_FINALLY();
1045 : {
1046 112238 : nesting_level--;
1047 : }
1048 112238 : PG_END_TRY();
1049 105408 : }
1050 :
1051 : /*
1052 : * ExecutorFinish hook: all we need do is track nesting depth
1053 : */
1054 : static void
1055 101512 : pgss_ExecutorFinish(QueryDesc *queryDesc)
1056 : {
1057 101512 : nesting_level++;
1058 101512 : PG_TRY();
1059 : {
1060 101512 : if (prev_ExecutorFinish)
1061 0 : prev_ExecutorFinish(queryDesc);
1062 : else
1063 101512 : standard_ExecutorFinish(queryDesc);
1064 : }
1065 328 : PG_FINALLY();
1066 : {
1067 101512 : nesting_level--;
1068 : }
1069 101512 : PG_END_TRY();
1070 101184 : }
1071 :
1072 : /*
1073 : * ExecutorEnd hook: store results if needed
1074 : */
1075 : static void
1076 106950 : pgss_ExecutorEnd(QueryDesc *queryDesc)
1077 : {
1078 106950 : int64 queryId = queryDesc->plannedstmt->queryId;
1079 :
1080 106950 : if (queryId != INT64CONST(0) && queryDesc->totaltime &&
1081 73650 : pgss_enabled(nesting_level))
1082 : {
1083 : /*
1084 : * Make sure stats accumulation is done. (Note: it's okay if several
1085 : * levels of hook all do this.)
1086 : */
1087 73650 : InstrEndLoop(queryDesc->totaltime);
1088 :
1089 73492 : pgss_store(queryDesc->sourceText,
1090 : queryId,
1091 73650 : queryDesc->plannedstmt->stmt_location,
1092 73650 : queryDesc->plannedstmt->stmt_len,
1093 : PGSS_EXEC,
1094 73650 : queryDesc->totaltime->total * 1000.0, /* convert to msec */
1095 73650 : queryDesc->estate->es_total_processed,
1096 73650 : &queryDesc->totaltime->bufusage,
1097 73650 : &queryDesc->totaltime->walusage,
1098 158 : queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1099 : NULL,
1100 73650 : queryDesc->estate->es_parallel_workers_to_launch,
1101 73650 : queryDesc->estate->es_parallel_workers_launched,
1102 73650 : queryDesc->plannedstmt->planOrigin);
1103 : }
1104 :
1105 106950 : if (prev_ExecutorEnd)
1106 0 : prev_ExecutorEnd(queryDesc);
1107 : else
1108 106950 : standard_ExecutorEnd(queryDesc);
1109 106950 : }
1110 :
1111 : /*
1112 : * ProcessUtility hook
1113 : */
1114 : static void
1115 68838 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1116 : bool readOnlyTree,
1117 : ProcessUtilityContext context,
1118 : ParamListInfo params, QueryEnvironment *queryEnv,
1119 : DestReceiver *dest, QueryCompletion *qc)
1120 : {
1121 68838 : Node *parsetree = pstmt->utilityStmt;
1122 68838 : int64 saved_queryId = pstmt->queryId;
1123 68838 : int saved_stmt_location = pstmt->stmt_location;
1124 68838 : int saved_stmt_len = pstmt->stmt_len;
1125 68838 : bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
1126 :
1127 : /*
1128 : * Force utility statements to get queryId zero. We do this even in cases
1129 : * where the statement contains an optimizable statement for which a
1130 : * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1131 : * cases, runtime control will first go through ProcessUtility and then
1132 : * the executor, and we don't want the executor hooks to do anything,
1133 : * since we are already measuring the statement's costs at the utility
1134 : * level.
1135 : *
1136 : * Note that this is only done if pg_stat_statements is enabled and
1137 : * configured to track utility statements, in the unlikely possibility
1138 : * that user configured another extension to handle utility statements
1139 : * only.
1140 : */
1141 68838 : if (enabled)
1142 57872 : pstmt->queryId = INT64CONST(0);
1143 :
1144 : /*
1145 : * If it's an EXECUTE statement, we don't track it and don't increment the
1146 : * nesting level. This allows the cycles to be charged to the underlying
1147 : * PREPARE instead (by the Executor hooks), which is much more useful.
1148 : *
1149 : * We also don't track execution of PREPARE. If we did, we would get one
1150 : * hash table entry for the PREPARE (with hash calculated from the query
1151 : * string), and then a different one with the same query string (but hash
1152 : * calculated from the query tree) would be used to accumulate costs of
1153 : * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1154 : * actually run the planner (only parse+rewrite), its costs are generally
1155 : * pretty negligible and it seems okay to just ignore it.
1156 : */
1157 68838 : if (enabled &&
1158 57872 : !IsA(parsetree, ExecuteStmt) &&
1159 51400 : !IsA(parsetree, PrepareStmt))
1160 46344 : {
1161 : instr_time start;
1162 : instr_time duration;
1163 : uint64 rows;
1164 : BufferUsage bufusage_start,
1165 : bufusage;
1166 : WalUsage walusage_start,
1167 : walusage;
1168 :
1169 51154 : bufusage_start = pgBufferUsage;
1170 51154 : walusage_start = pgWalUsage;
1171 51154 : INSTR_TIME_SET_CURRENT(start);
1172 :
1173 51154 : nesting_level++;
1174 51154 : PG_TRY();
1175 : {
1176 51154 : if (prev_ProcessUtility)
1177 0 : prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1178 : context, params, queryEnv,
1179 : dest, qc);
1180 : else
1181 51154 : standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1182 : context, params, queryEnv,
1183 : dest, qc);
1184 : }
1185 4810 : PG_FINALLY();
1186 : {
1187 51154 : nesting_level--;
1188 : }
1189 51154 : PG_END_TRY();
1190 :
1191 : /*
1192 : * CAUTION: do not access the *pstmt data structure again below here.
1193 : * If it was a ROLLBACK or similar, that data structure may have been
1194 : * freed. We must copy everything we still need into local variables,
1195 : * which we did above.
1196 : *
1197 : * For the same reason, we can't risk restoring pstmt->queryId to its
1198 : * former value, which'd otherwise be a good idea.
1199 : */
1200 :
1201 46344 : INSTR_TIME_SET_CURRENT(duration);
1202 46344 : INSTR_TIME_SUBTRACT(duration, start);
1203 :
1204 : /*
1205 : * Track the total number of rows retrieved or affected by the utility
1206 : * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1207 : * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1208 : */
1209 46338 : rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1210 43038 : qc->commandTag == CMDTAG_FETCH ||
1211 42518 : qc->commandTag == CMDTAG_SELECT ||
1212 42144 : qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1213 92682 : qc->nprocessed : 0;
1214 :
1215 : /* calc differences of buffer counters. */
1216 46344 : memset(&bufusage, 0, sizeof(BufferUsage));
1217 46344 : BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1218 :
1219 : /* calc differences of WAL counters. */
1220 46344 : memset(&walusage, 0, sizeof(WalUsage));
1221 46344 : WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1222 :
1223 46344 : pgss_store(queryString,
1224 : saved_queryId,
1225 : saved_stmt_location,
1226 : saved_stmt_len,
1227 : PGSS_EXEC,
1228 46344 : INSTR_TIME_GET_MILLISEC(duration),
1229 : rows,
1230 : &bufusage,
1231 : &walusage,
1232 : NULL,
1233 : NULL,
1234 : 0,
1235 : 0,
1236 : pstmt->planOrigin);
1237 : }
1238 : else
1239 : {
1240 : /*
1241 : * Even though we're not tracking execution time for this statement,
1242 : * we must still increment the nesting level, to ensure that functions
1243 : * evaluated within it are not seen as top-level calls. But don't do
1244 : * so for EXECUTE; that way, when control reaches pgss_planner or
1245 : * pgss_ExecutorStart, we will treat the costs as top-level if
1246 : * appropriate. Likewise, don't bump for PREPARE, so that parse
1247 : * analysis will treat the statement as top-level if appropriate.
1248 : *
1249 : * To be absolutely certain we don't mess up the nesting level,
1250 : * evaluate the bump_level condition just once.
1251 : */
1252 17684 : bool bump_level =
1253 28894 : !IsA(parsetree, ExecuteStmt) &&
1254 11210 : !IsA(parsetree, PrepareStmt);
1255 :
1256 17684 : if (bump_level)
1257 10962 : nesting_level++;
1258 17684 : PG_TRY();
1259 : {
1260 17684 : if (prev_ProcessUtility)
1261 0 : prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1262 : context, params, queryEnv,
1263 : dest, qc);
1264 : else
1265 17684 : standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1266 : context, params, queryEnv,
1267 : dest, qc);
1268 : }
1269 260 : PG_FINALLY();
1270 : {
1271 17684 : if (bump_level)
1272 10962 : nesting_level--;
1273 : }
1274 17684 : PG_END_TRY();
1275 : }
1276 63768 : }
1277 :
1278 : /*
1279 : * Store some statistics for a statement.
1280 : *
1281 : * If jstate is not NULL then we're trying to create an entry for which
1282 : * we have no statistics as yet; we just want to record the normalized
1283 : * query string. total_time, rows, bufusage and walusage are ignored in this
1284 : * case.
1285 : *
1286 : * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1287 : * for the arrays in the Counters field.
1288 : */
1289 : static void
1290 192256 : pgss_store(const char *query, int64 queryId,
1291 : int query_location, int query_len,
1292 : pgssStoreKind kind,
1293 : double total_time, uint64 rows,
1294 : const BufferUsage *bufusage,
1295 : const WalUsage *walusage,
1296 : const struct JitInstrumentation *jitusage,
1297 : JumbleState *jstate,
1298 : int parallel_workers_to_launch,
1299 : int parallel_workers_launched,
1300 : PlannedStmtOrigin planOrigin)
1301 : {
1302 : pgssHashKey key;
1303 : pgssEntry *entry;
1304 192256 : char *norm_query = NULL;
1305 192256 : int encoding = GetDatabaseEncoding();
1306 :
1307 : Assert(query != NULL);
1308 :
1309 : /* Safety check... */
1310 192256 : if (!pgss || !pgss_hash)
1311 0 : return;
1312 :
1313 : /*
1314 : * Nothing to do if compute_query_id isn't enabled and no other module
1315 : * computed a query identifier.
1316 : */
1317 192256 : if (queryId == INT64CONST(0))
1318 0 : return;
1319 :
1320 : /*
1321 : * Confine our attention to the relevant part of the string, if the query
1322 : * is a portion of a multi-statement source string, and update query
1323 : * location and length if needed.
1324 : */
1325 192256 : query = CleanQuerytext(query, &query_location, &query_len);
1326 :
1327 : /* Set up key for hashtable search */
1328 :
1329 : /* clear padding */
1330 192256 : memset(&key, 0, sizeof(pgssHashKey));
1331 :
1332 192256 : key.userid = GetUserId();
1333 192256 : key.dbid = MyDatabaseId;
1334 192256 : key.queryid = queryId;
1335 192256 : key.toplevel = (nesting_level == 0);
1336 :
1337 : /* Lookup the hash table entry with shared lock. */
1338 192256 : LWLockAcquire(pgss->lock, LW_SHARED);
1339 :
1340 192256 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1341 :
1342 : /* Create new entry, if not present */
1343 192256 : if (!entry)
1344 : {
1345 : Size query_offset;
1346 : int gc_count;
1347 : bool stored;
1348 : bool do_gc;
1349 :
1350 : /*
1351 : * Create a new, normalized query string if caller asked. We don't
1352 : * need to hold the lock while doing this work. (Note: in any case,
1353 : * it's possible that someone else creates a duplicate hashtable entry
1354 : * in the interval where we don't hold the lock below. That case is
1355 : * handled by entry_alloc.)
1356 : */
1357 56522 : if (jstate)
1358 : {
1359 21142 : LWLockRelease(pgss->lock);
1360 21142 : norm_query = generate_normalized_query(jstate, query,
1361 : query_location,
1362 : &query_len);
1363 21142 : LWLockAcquire(pgss->lock, LW_SHARED);
1364 : }
1365 :
1366 : /* Append new query text to file with only shared lock held */
1367 56522 : stored = qtext_store(norm_query ? norm_query : query, query_len,
1368 : &query_offset, &gc_count);
1369 :
1370 : /*
1371 : * Determine whether we need to garbage collect external query texts
1372 : * while the shared lock is still held. This micro-optimization
1373 : * avoids taking the time to decide this while holding exclusive lock.
1374 : */
1375 56522 : do_gc = need_gc_qtexts();
1376 :
1377 : /* Need exclusive lock to make a new hashtable entry - promote */
1378 56522 : LWLockRelease(pgss->lock);
1379 56522 : LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1380 :
1381 : /*
1382 : * A garbage collection may have occurred while we weren't holding the
1383 : * lock. In the unlikely event that this happens, the query text we
1384 : * stored above will have been garbage collected, so write it again.
1385 : * This should be infrequent enough that doing it while holding
1386 : * exclusive lock isn't a performance problem.
1387 : */
1388 56522 : if (!stored || pgss->gc_count != gc_count)
1389 0 : stored = qtext_store(norm_query ? norm_query : query, query_len,
1390 : &query_offset, NULL);
1391 :
1392 : /* If we failed to write to the text file, give up */
1393 56522 : if (!stored)
1394 0 : goto done;
1395 :
1396 : /* OK to create a new hashtable entry */
1397 56522 : entry = entry_alloc(&key, query_offset, query_len, encoding,
1398 : jstate != NULL);
1399 :
1400 : /* If needed, perform garbage collection while exclusive lock held */
1401 56522 : if (do_gc)
1402 0 : gc_qtexts();
1403 : }
1404 :
1405 : /* Increment the counts, except when jstate is not NULL */
1406 192256 : if (!jstate)
1407 : {
1408 : Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1409 :
1410 : /*
1411 : * Grab the spinlock while updating the counters (see comment about
1412 : * locking rules at the head of the file)
1413 : */
1414 120256 : SpinLockAcquire(&entry->mutex);
1415 :
1416 : /* "Unstick" entry if it was previously sticky */
1417 120256 : if (IS_STICKY(entry->counters))
1418 55000 : entry->counters.usage = USAGE_INIT;
1419 :
1420 120256 : entry->counters.calls[kind] += 1;
1421 120256 : entry->counters.total_time[kind] += total_time;
1422 :
1423 120256 : if (entry->counters.calls[kind] == 1)
1424 : {
1425 55156 : entry->counters.min_time[kind] = total_time;
1426 55156 : entry->counters.max_time[kind] = total_time;
1427 55156 : entry->counters.mean_time[kind] = total_time;
1428 : }
1429 : else
1430 : {
1431 : /*
1432 : * Welford's method for accurately computing variance. See
1433 : * <http://www.johndcook.com/blog/standard_deviation/>
1434 : */
1435 65100 : double old_mean = entry->counters.mean_time[kind];
1436 :
1437 65100 : entry->counters.mean_time[kind] +=
1438 65100 : (total_time - old_mean) / entry->counters.calls[kind];
1439 65100 : entry->counters.sum_var_time[kind] +=
1440 65100 : (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1441 :
1442 : /*
1443 : * Calculate min and max time. min = 0 and max = 0 means that the
1444 : * min/max statistics were reset
1445 : */
1446 65100 : if (entry->counters.min_time[kind] == 0
1447 12 : && entry->counters.max_time[kind] == 0)
1448 : {
1449 6 : entry->counters.min_time[kind] = total_time;
1450 6 : entry->counters.max_time[kind] = total_time;
1451 : }
1452 : else
1453 : {
1454 65094 : if (entry->counters.min_time[kind] > total_time)
1455 12794 : entry->counters.min_time[kind] = total_time;
1456 65094 : if (entry->counters.max_time[kind] < total_time)
1457 6388 : entry->counters.max_time[kind] = total_time;
1458 : }
1459 : }
1460 120256 : entry->counters.rows += rows;
1461 120256 : entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1462 120256 : entry->counters.shared_blks_read += bufusage->shared_blks_read;
1463 120256 : entry->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1464 120256 : entry->counters.shared_blks_written += bufusage->shared_blks_written;
1465 120256 : entry->counters.local_blks_hit += bufusage->local_blks_hit;
1466 120256 : entry->counters.local_blks_read += bufusage->local_blks_read;
1467 120256 : entry->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1468 120256 : entry->counters.local_blks_written += bufusage->local_blks_written;
1469 120256 : entry->counters.temp_blks_read += bufusage->temp_blks_read;
1470 120256 : entry->counters.temp_blks_written += bufusage->temp_blks_written;
1471 120256 : entry->counters.shared_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_read_time);
1472 120256 : entry->counters.shared_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_write_time);
1473 120256 : entry->counters.local_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_read_time);
1474 120256 : entry->counters.local_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_write_time);
1475 120256 : entry->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
1476 120256 : entry->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
1477 120256 : entry->counters.usage += USAGE_EXEC(total_time);
1478 120256 : entry->counters.wal_records += walusage->wal_records;
1479 120256 : entry->counters.wal_fpi += walusage->wal_fpi;
1480 120256 : entry->counters.wal_bytes += walusage->wal_bytes;
1481 120256 : entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1482 120256 : if (jitusage)
1483 : {
1484 158 : entry->counters.jit_functions += jitusage->created_functions;
1485 158 : entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1486 :
1487 158 : if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1488 154 : entry->counters.jit_deform_count++;
1489 158 : entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1490 :
1491 158 : if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1492 74 : entry->counters.jit_inlining_count++;
1493 158 : entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1494 :
1495 158 : if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1496 154 : entry->counters.jit_optimization_count++;
1497 158 : entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1498 :
1499 158 : if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1500 154 : entry->counters.jit_emission_count++;
1501 158 : entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1502 : }
1503 :
1504 : /* parallel worker counters */
1505 120256 : entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1506 120256 : entry->counters.parallel_workers_launched += parallel_workers_launched;
1507 :
1508 : /* plan cache counters */
1509 120256 : if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1510 6092 : entry->counters.generic_plan_calls++;
1511 114164 : else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1512 738 : entry->counters.custom_plan_calls++;
1513 :
1514 120256 : SpinLockRelease(&entry->mutex);
1515 : }
1516 :
1517 72000 : done:
1518 192256 : LWLockRelease(pgss->lock);
1519 :
1520 : /* We postpone this clean-up until we're out of the lock */
1521 192256 : if (norm_query)
1522 21142 : pfree(norm_query);
1523 : }
1524 :
1525 : /*
1526 : * Reset statement statistics corresponding to userid, dbid, and queryid.
1527 : */
1528 : Datum
1529 2 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
1530 : {
1531 : Oid userid;
1532 : Oid dbid;
1533 : int64 queryid;
1534 :
1535 2 : userid = PG_GETARG_OID(0);
1536 2 : dbid = PG_GETARG_OID(1);
1537 2 : queryid = PG_GETARG_INT64(2);
1538 :
1539 2 : entry_reset(userid, dbid, queryid, false);
1540 :
1541 2 : PG_RETURN_VOID();
1542 : }
1543 :
1544 : Datum
1545 232 : pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
1546 : {
1547 : Oid userid;
1548 : Oid dbid;
1549 : int64 queryid;
1550 : bool minmax_only;
1551 :
1552 232 : userid = PG_GETARG_OID(0);
1553 232 : dbid = PG_GETARG_OID(1);
1554 232 : queryid = PG_GETARG_INT64(2);
1555 232 : minmax_only = PG_GETARG_BOOL(3);
1556 :
1557 232 : PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1558 : }
1559 :
1560 : /*
1561 : * Reset statement statistics.
1562 : */
1563 : Datum
1564 2 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
1565 : {
1566 2 : entry_reset(0, 0, 0, false);
1567 :
1568 2 : PG_RETURN_VOID();
1569 : }
1570 :
1571 : /* Number of output arguments (columns) for various API versions */
1572 : #define PG_STAT_STATEMENTS_COLS_V1_0 14
1573 : #define PG_STAT_STATEMENTS_COLS_V1_1 18
1574 : #define PG_STAT_STATEMENTS_COLS_V1_2 19
1575 : #define PG_STAT_STATEMENTS_COLS_V1_3 23
1576 : #define PG_STAT_STATEMENTS_COLS_V1_8 32
1577 : #define PG_STAT_STATEMENTS_COLS_V1_9 33
1578 : #define PG_STAT_STATEMENTS_COLS_V1_10 43
1579 : #define PG_STAT_STATEMENTS_COLS_V1_11 49
1580 : #define PG_STAT_STATEMENTS_COLS_V1_12 52
1581 : #define PG_STAT_STATEMENTS_COLS_V1_13 54
1582 : #define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1583 :
1584 : /*
1585 : * Retrieve statement statistics.
1586 : *
1587 : * The SQL API of this function has changed multiple times, and will likely
1588 : * do so again in future. To support the case where a newer version of this
1589 : * loadable module is being used with an old SQL declaration of the function,
1590 : * we continue to support the older API versions. For 1.2 and later, the
1591 : * expected API version is identified by embedding it in the C name of the
1592 : * function. Unfortunately we weren't bright enough to do that for 1.1.
1593 : */
1594 : Datum
1595 250 : pg_stat_statements_1_13(PG_FUNCTION_ARGS)
1596 : {
1597 250 : bool showtext = PG_GETARG_BOOL(0);
1598 :
1599 250 : pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext);
1600 :
1601 250 : return (Datum) 0;
1602 : }
1603 :
1604 : Datum
1605 2 : pg_stat_statements_1_12(PG_FUNCTION_ARGS)
1606 : {
1607 2 : bool showtext = PG_GETARG_BOOL(0);
1608 :
1609 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
1610 :
1611 2 : return (Datum) 0;
1612 : }
1613 :
1614 : Datum
1615 2 : pg_stat_statements_1_11(PG_FUNCTION_ARGS)
1616 : {
1617 2 : bool showtext = PG_GETARG_BOOL(0);
1618 :
1619 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1620 :
1621 2 : return (Datum) 0;
1622 : }
1623 :
1624 : Datum
1625 2 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
1626 : {
1627 2 : bool showtext = PG_GETARG_BOOL(0);
1628 :
1629 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1630 :
1631 2 : return (Datum) 0;
1632 : }
1633 :
1634 : Datum
1635 2 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
1636 : {
1637 2 : bool showtext = PG_GETARG_BOOL(0);
1638 :
1639 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1640 :
1641 2 : return (Datum) 0;
1642 : }
1643 :
1644 : Datum
1645 2 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
1646 : {
1647 2 : bool showtext = PG_GETARG_BOOL(0);
1648 :
1649 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1650 :
1651 2 : return (Datum) 0;
1652 : }
1653 :
1654 : Datum
1655 2 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
1656 : {
1657 2 : bool showtext = PG_GETARG_BOOL(0);
1658 :
1659 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1660 :
1661 2 : return (Datum) 0;
1662 : }
1663 :
1664 : Datum
1665 0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
1666 : {
1667 0 : bool showtext = PG_GETARG_BOOL(0);
1668 :
1669 0 : pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1670 :
1671 0 : return (Datum) 0;
1672 : }
1673 :
1674 : /*
1675 : * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1676 : * This can be removed someday, perhaps.
1677 : */
1678 : Datum
1679 0 : pg_stat_statements(PG_FUNCTION_ARGS)
1680 : {
1681 : /* If it's really API 1.1, we'll figure that out below */
1682 0 : pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1683 :
1684 0 : return (Datum) 0;
1685 : }
1686 :
1687 : /* Common code for all versions of pg_stat_statements() */
1688 : static void
1689 262 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
1690 : pgssVersion api_version,
1691 : bool showtext)
1692 : {
1693 262 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1694 262 : Oid userid = GetUserId();
1695 262 : bool is_allowed_role = false;
1696 262 : char *qbuffer = NULL;
1697 262 : Size qbuffer_size = 0;
1698 262 : Size extent = 0;
1699 262 : int gc_count = 0;
1700 : HASH_SEQ_STATUS hash_seq;
1701 : pgssEntry *entry;
1702 :
1703 : /*
1704 : * Superusers or roles with the privileges of pg_read_all_stats members
1705 : * are allowed
1706 : */
1707 262 : is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1708 :
1709 : /* hash table must exist already */
1710 262 : if (!pgss || !pgss_hash)
1711 0 : ereport(ERROR,
1712 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1713 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1714 :
1715 262 : InitMaterializedSRF(fcinfo, 0);
1716 :
1717 : /*
1718 : * Check we have the expected number of output arguments. Aside from
1719 : * being a good safety check, we need a kluge here to detect API version
1720 : * 1.1, which was wedged into the code in an ill-considered way.
1721 : */
1722 262 : switch (rsinfo->setDesc->natts)
1723 : {
1724 0 : case PG_STAT_STATEMENTS_COLS_V1_0:
1725 0 : if (api_version != PGSS_V1_0)
1726 0 : elog(ERROR, "incorrect number of output arguments");
1727 0 : break;
1728 0 : case PG_STAT_STATEMENTS_COLS_V1_1:
1729 : /* pg_stat_statements() should have told us 1.0 */
1730 0 : if (api_version != PGSS_V1_0)
1731 0 : elog(ERROR, "incorrect number of output arguments");
1732 0 : api_version = PGSS_V1_1;
1733 0 : break;
1734 0 : case PG_STAT_STATEMENTS_COLS_V1_2:
1735 0 : if (api_version != PGSS_V1_2)
1736 0 : elog(ERROR, "incorrect number of output arguments");
1737 0 : break;
1738 2 : case PG_STAT_STATEMENTS_COLS_V1_3:
1739 2 : if (api_version != PGSS_V1_3)
1740 0 : elog(ERROR, "incorrect number of output arguments");
1741 2 : break;
1742 2 : case PG_STAT_STATEMENTS_COLS_V1_8:
1743 2 : if (api_version != PGSS_V1_8)
1744 0 : elog(ERROR, "incorrect number of output arguments");
1745 2 : break;
1746 2 : case PG_STAT_STATEMENTS_COLS_V1_9:
1747 2 : if (api_version != PGSS_V1_9)
1748 0 : elog(ERROR, "incorrect number of output arguments");
1749 2 : break;
1750 2 : case PG_STAT_STATEMENTS_COLS_V1_10:
1751 2 : if (api_version != PGSS_V1_10)
1752 0 : elog(ERROR, "incorrect number of output arguments");
1753 2 : break;
1754 2 : case PG_STAT_STATEMENTS_COLS_V1_11:
1755 2 : if (api_version != PGSS_V1_11)
1756 0 : elog(ERROR, "incorrect number of output arguments");
1757 2 : break;
1758 2 : case PG_STAT_STATEMENTS_COLS_V1_12:
1759 2 : if (api_version != PGSS_V1_12)
1760 0 : elog(ERROR, "incorrect number of output arguments");
1761 2 : break;
1762 250 : case PG_STAT_STATEMENTS_COLS_V1_13:
1763 250 : if (api_version != PGSS_V1_13)
1764 0 : elog(ERROR, "incorrect number of output arguments");
1765 250 : break;
1766 0 : default:
1767 0 : elog(ERROR, "incorrect number of output arguments");
1768 : }
1769 :
1770 : /*
1771 : * We'd like to load the query text file (if needed) while not holding any
1772 : * lock on pgss->lock. In the worst case we'll have to do this again
1773 : * after we have the lock, but it's unlikely enough to make this a win
1774 : * despite occasional duplicated work. We need to reload if anybody
1775 : * writes to the file (either a retail qtext_store(), or a garbage
1776 : * collection) between this point and where we've gotten shared lock. If
1777 : * a qtext_store is actually in progress when we look, we might as well
1778 : * skip the speculative load entirely.
1779 : */
1780 262 : if (showtext)
1781 : {
1782 : int n_writers;
1783 :
1784 : /* Take the mutex so we can examine variables */
1785 262 : SpinLockAcquire(&pgss->mutex);
1786 262 : extent = pgss->extent;
1787 262 : n_writers = pgss->n_writers;
1788 262 : gc_count = pgss->gc_count;
1789 262 : SpinLockRelease(&pgss->mutex);
1790 :
1791 : /* No point in loading file now if there are active writers */
1792 262 : if (n_writers == 0)
1793 262 : qbuffer = qtext_load_file(&qbuffer_size);
1794 : }
1795 :
1796 : /*
1797 : * Get shared lock, load or reload the query text file if we must, and
1798 : * iterate over the hashtable entries.
1799 : *
1800 : * With a large hash table, we might be holding the lock rather longer
1801 : * than one could wish. However, this only blocks creation of new hash
1802 : * table entries, and the larger the hash table the less likely that is to
1803 : * be needed. So we can hope this is okay. Perhaps someday we'll decide
1804 : * we need to partition the hash table to limit the time spent holding any
1805 : * one lock.
1806 : */
1807 262 : LWLockAcquire(pgss->lock, LW_SHARED);
1808 :
1809 262 : if (showtext)
1810 : {
1811 : /*
1812 : * Here it is safe to examine extent and gc_count without taking the
1813 : * mutex. Note that although other processes might change
1814 : * pgss->extent just after we look at it, the strings they then write
1815 : * into the file cannot yet be referenced in the hashtable, so we
1816 : * don't care whether we see them or not.
1817 : *
1818 : * If qtext_load_file fails, we just press on; we'll return NULL for
1819 : * every query text.
1820 : */
1821 262 : if (qbuffer == NULL ||
1822 262 : pgss->extent != extent ||
1823 262 : pgss->gc_count != gc_count)
1824 : {
1825 0 : free(qbuffer);
1826 0 : qbuffer = qtext_load_file(&qbuffer_size);
1827 : }
1828 : }
1829 :
1830 262 : hash_seq_init(&hash_seq, pgss_hash);
1831 53446 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
1832 : {
1833 : Datum values[PG_STAT_STATEMENTS_COLS];
1834 : bool nulls[PG_STAT_STATEMENTS_COLS];
1835 53184 : int i = 0;
1836 : Counters tmp;
1837 : double stddev;
1838 53184 : int64 queryid = entry->key.queryid;
1839 : TimestampTz stats_since;
1840 : TimestampTz minmax_stats_since;
1841 :
1842 53184 : memset(values, 0, sizeof(values));
1843 53184 : memset(nulls, 0, sizeof(nulls));
1844 :
1845 53184 : values[i++] = ObjectIdGetDatum(entry->key.userid);
1846 53184 : values[i++] = ObjectIdGetDatum(entry->key.dbid);
1847 53184 : if (api_version >= PGSS_V1_9)
1848 53160 : values[i++] = BoolGetDatum(entry->key.toplevel);
1849 :
1850 53184 : if (is_allowed_role || entry->key.userid == userid)
1851 : {
1852 53176 : if (api_version >= PGSS_V1_2)
1853 53176 : values[i++] = Int64GetDatumFast(queryid);
1854 :
1855 53176 : if (showtext)
1856 : {
1857 53176 : char *qstr = qtext_fetch(entry->query_offset,
1858 : entry->query_len,
1859 : qbuffer,
1860 : qbuffer_size);
1861 :
1862 53176 : if (qstr)
1863 : {
1864 : char *enc;
1865 :
1866 53176 : enc = pg_any_to_server(qstr,
1867 : entry->query_len,
1868 : entry->encoding);
1869 :
1870 53176 : values[i++] = CStringGetTextDatum(enc);
1871 :
1872 53176 : if (enc != qstr)
1873 0 : pfree(enc);
1874 : }
1875 : else
1876 : {
1877 : /* Just return a null if we fail to find the text */
1878 0 : nulls[i++] = true;
1879 : }
1880 : }
1881 : else
1882 : {
1883 : /* Query text not requested */
1884 0 : nulls[i++] = true;
1885 : }
1886 : }
1887 : else
1888 : {
1889 : /* Don't show queryid */
1890 8 : if (api_version >= PGSS_V1_2)
1891 8 : nulls[i++] = true;
1892 :
1893 : /*
1894 : * Don't show query text, but hint as to the reason for not doing
1895 : * so if it was requested
1896 : */
1897 8 : if (showtext)
1898 8 : values[i++] = CStringGetTextDatum("<insufficient privilege>");
1899 : else
1900 0 : nulls[i++] = true;
1901 : }
1902 :
1903 : /* copy counters to a local variable to keep locking time short */
1904 53184 : SpinLockAcquire(&entry->mutex);
1905 53184 : tmp = entry->counters;
1906 53184 : SpinLockRelease(&entry->mutex);
1907 :
1908 : /*
1909 : * The spinlock is not required when reading these two as they are
1910 : * always updated when holding pgss->lock exclusively.
1911 : */
1912 53184 : stats_since = entry->stats_since;
1913 53184 : minmax_stats_since = entry->minmax_stats_since;
1914 :
1915 : /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1916 53184 : if (IS_STICKY(tmp))
1917 78 : continue;
1918 :
1919 : /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1920 159318 : for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1921 : {
1922 106212 : if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1923 : {
1924 106204 : values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1925 106204 : values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1926 : }
1927 :
1928 106212 : if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1929 : api_version >= PGSS_V1_8)
1930 : {
1931 106204 : values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1932 106204 : values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1933 106204 : values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1934 :
1935 : /*
1936 : * Note we are calculating the population variance here, not
1937 : * the sample variance, as we have data for the whole
1938 : * population, so Bessel's correction is not used, and we
1939 : * don't divide by tmp.calls - 1.
1940 : */
1941 106204 : if (tmp.calls[kind] > 1)
1942 10000 : stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1943 : else
1944 96204 : stddev = 0.0;
1945 106204 : values[i++] = Float8GetDatumFast(stddev);
1946 : }
1947 : }
1948 53106 : values[i++] = Int64GetDatumFast(tmp.rows);
1949 53106 : values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1950 53106 : values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1951 53106 : if (api_version >= PGSS_V1_1)
1952 53106 : values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1953 53106 : values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1954 53106 : values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1955 53106 : values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1956 53106 : if (api_version >= PGSS_V1_1)
1957 53106 : values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1958 53106 : values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1959 53106 : values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1960 53106 : values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1961 53106 : if (api_version >= PGSS_V1_1)
1962 : {
1963 53106 : values[i++] = Float8GetDatumFast(tmp.shared_blk_read_time);
1964 53106 : values[i++] = Float8GetDatumFast(tmp.shared_blk_write_time);
1965 : }
1966 53106 : if (api_version >= PGSS_V1_11)
1967 : {
1968 53050 : values[i++] = Float8GetDatumFast(tmp.local_blk_read_time);
1969 53050 : values[i++] = Float8GetDatumFast(tmp.local_blk_write_time);
1970 : }
1971 53106 : if (api_version >= PGSS_V1_10)
1972 : {
1973 53068 : values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
1974 53068 : values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
1975 : }
1976 53106 : if (api_version >= PGSS_V1_8)
1977 : {
1978 : char buf[256];
1979 : Datum wal_bytes;
1980 :
1981 53098 : values[i++] = Int64GetDatumFast(tmp.wal_records);
1982 53098 : values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1983 :
1984 53098 : snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1985 :
1986 : /* Convert to numeric. */
1987 53098 : wal_bytes = DirectFunctionCall3(numeric_in,
1988 : CStringGetDatum(buf),
1989 : ObjectIdGetDatum(0),
1990 : Int32GetDatum(-1));
1991 53098 : values[i++] = wal_bytes;
1992 : }
1993 53106 : if (api_version >= PGSS_V1_12)
1994 : {
1995 53030 : values[i++] = Int64GetDatumFast(tmp.wal_buffers_full);
1996 : }
1997 53106 : if (api_version >= PGSS_V1_10)
1998 : {
1999 53068 : values[i++] = Int64GetDatumFast(tmp.jit_functions);
2000 53068 : values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
2001 53068 : values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
2002 53068 : values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
2003 53068 : values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
2004 53068 : values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
2005 53068 : values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
2006 53068 : values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
2007 : }
2008 53106 : if (api_version >= PGSS_V1_11)
2009 : {
2010 53050 : values[i++] = Int64GetDatumFast(tmp.jit_deform_count);
2011 53050 : values[i++] = Float8GetDatumFast(tmp.jit_deform_time);
2012 : }
2013 53106 : if (api_version >= PGSS_V1_12)
2014 : {
2015 53030 : values[i++] = Int64GetDatumFast(tmp.parallel_workers_to_launch);
2016 53030 : values[i++] = Int64GetDatumFast(tmp.parallel_workers_launched);
2017 : }
2018 53106 : if (api_version >= PGSS_V1_13)
2019 : {
2020 53020 : values[i++] = Int64GetDatumFast(tmp.generic_plan_calls);
2021 53020 : values[i++] = Int64GetDatumFast(tmp.custom_plan_calls);
2022 : }
2023 53106 : if (api_version >= PGSS_V1_11)
2024 : {
2025 53050 : values[i++] = TimestampTzGetDatum(stats_since);
2026 53050 : values[i++] = TimestampTzGetDatum(minmax_stats_since);
2027 : }
2028 :
2029 : Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2030 : api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2031 : api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2032 : api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2033 : api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2034 : api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2035 : api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2036 : api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2037 : api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2038 : api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2039 : -1 /* fail if you forget to update this assert */ ));
2040 :
2041 53106 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2042 : }
2043 :
2044 262 : LWLockRelease(pgss->lock);
2045 :
2046 262 : free(qbuffer);
2047 262 : }
2048 :
2049 : /* Number of output arguments (columns) for pg_stat_statements_info */
2050 : #define PG_STAT_STATEMENTS_INFO_COLS 2
2051 :
2052 : /*
2053 : * Return statistics of pg_stat_statements.
2054 : */
2055 : Datum
2056 4 : pg_stat_statements_info(PG_FUNCTION_ARGS)
2057 : {
2058 : pgssGlobalStats stats;
2059 : TupleDesc tupdesc;
2060 4 : Datum values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2061 4 : bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2062 :
2063 4 : if (!pgss || !pgss_hash)
2064 0 : ereport(ERROR,
2065 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2066 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2067 :
2068 : /* Build a tuple descriptor for our result type */
2069 4 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2070 0 : elog(ERROR, "return type must be a row type");
2071 :
2072 : /* Read global statistics for pg_stat_statements */
2073 4 : SpinLockAcquire(&pgss->mutex);
2074 4 : stats = pgss->stats;
2075 4 : SpinLockRelease(&pgss->mutex);
2076 :
2077 4 : values[0] = Int64GetDatum(stats.dealloc);
2078 4 : values[1] = TimestampTzGetDatum(stats.stats_reset);
2079 :
2080 4 : PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
2081 : }
2082 :
2083 : /*
2084 : * Estimate shared memory space needed.
2085 : */
2086 : static Size
2087 14 : pgss_memsize(void)
2088 : {
2089 : Size size;
2090 :
2091 14 : size = MAXALIGN(sizeof(pgssSharedState));
2092 14 : size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2093 :
2094 14 : return size;
2095 : }
2096 :
2097 : /*
2098 : * Allocate a new hashtable entry.
2099 : * caller must hold an exclusive lock on pgss->lock
2100 : *
2101 : * "query" need not be null-terminated; we rely on query_len instead
2102 : *
2103 : * If "sticky" is true, make the new entry artificially sticky so that it will
2104 : * probably still be there when the query finishes execution. We do this by
2105 : * giving it a median usage value rather than the normal value. (Strictly
2106 : * speaking, query strings are normalized on a best effort basis, though it
2107 : * would be difficult to demonstrate this even under artificial conditions.)
2108 : *
2109 : * Note: despite needing exclusive lock, it's not an error for the target
2110 : * entry to already exist. This is because pgss_store releases and
2111 : * reacquires lock after failing to find a match; so someone else could
2112 : * have made the entry while we waited to get exclusive lock.
2113 : */
2114 : static pgssEntry *
2115 107806 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2116 : bool sticky)
2117 : {
2118 : pgssEntry *entry;
2119 : bool found;
2120 :
2121 : /* Make space if needed */
2122 107806 : while (hash_get_num_entries(pgss_hash) >= pgss_max)
2123 0 : entry_dealloc();
2124 :
2125 : /* Find or create an entry with desired hash code */
2126 107806 : entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2127 :
2128 107806 : if (!found)
2129 : {
2130 : /* New entry, initialize it */
2131 :
2132 : /* reset the statistics */
2133 107804 : memset(&entry->counters, 0, sizeof(Counters));
2134 : /* set the appropriate initial usage count */
2135 107804 : entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2136 : /* re-initialize the mutex each time ... we assume no one using it */
2137 107804 : SpinLockInit(&entry->mutex);
2138 : /* ... and don't forget the query text metadata */
2139 : Assert(query_len >= 0);
2140 107804 : entry->query_offset = query_offset;
2141 107804 : entry->query_len = query_len;
2142 107804 : entry->encoding = encoding;
2143 107804 : entry->stats_since = GetCurrentTimestamp();
2144 107804 : entry->minmax_stats_since = entry->stats_since;
2145 : }
2146 :
2147 107806 : return entry;
2148 : }
2149 :
2150 : /*
2151 : * qsort comparator for sorting into increasing usage order
2152 : */
2153 : static int
2154 0 : entry_cmp(const void *lhs, const void *rhs)
2155 : {
2156 0 : double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2157 0 : double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2158 :
2159 0 : if (l_usage < r_usage)
2160 0 : return -1;
2161 0 : else if (l_usage > r_usage)
2162 0 : return +1;
2163 : else
2164 0 : return 0;
2165 : }
2166 :
2167 : /*
2168 : * Deallocate least-used entries.
2169 : *
2170 : * Caller must hold an exclusive lock on pgss->lock.
2171 : */
2172 : static void
2173 0 : entry_dealloc(void)
2174 : {
2175 : HASH_SEQ_STATUS hash_seq;
2176 : pgssEntry **entries;
2177 : pgssEntry *entry;
2178 : int nvictims;
2179 : int i;
2180 : Size tottextlen;
2181 : int nvalidtexts;
2182 :
2183 : /*
2184 : * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2185 : * While we're scanning the table, apply the decay factor to the usage
2186 : * values, and update the mean query length.
2187 : *
2188 : * Note that the mean query length is almost immediately obsolete, since
2189 : * we compute it before not after discarding the least-used entries.
2190 : * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2191 : * making two passes to get a more current result. Likewise, the new
2192 : * cur_median_usage includes the entries we're about to zap.
2193 : */
2194 :
2195 0 : entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2196 :
2197 0 : i = 0;
2198 0 : tottextlen = 0;
2199 0 : nvalidtexts = 0;
2200 :
2201 0 : hash_seq_init(&hash_seq, pgss_hash);
2202 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2203 : {
2204 0 : entries[i++] = entry;
2205 : /* "Sticky" entries get a different usage decay rate. */
2206 0 : if (IS_STICKY(entry->counters))
2207 0 : entry->counters.usage *= STICKY_DECREASE_FACTOR;
2208 : else
2209 0 : entry->counters.usage *= USAGE_DECREASE_FACTOR;
2210 : /* In the mean length computation, ignore dropped texts. */
2211 0 : if (entry->query_len >= 0)
2212 : {
2213 0 : tottextlen += entry->query_len + 1;
2214 0 : nvalidtexts++;
2215 : }
2216 : }
2217 :
2218 : /* Sort into increasing order by usage */
2219 0 : qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2220 :
2221 : /* Record the (approximate) median usage */
2222 0 : if (i > 0)
2223 0 : pgss->cur_median_usage = entries[i / 2]->counters.usage;
2224 : /* Record the mean query length */
2225 0 : if (nvalidtexts > 0)
2226 0 : pgss->mean_query_len = tottextlen / nvalidtexts;
2227 : else
2228 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2229 :
2230 : /* Now zap an appropriate fraction of lowest-usage entries */
2231 0 : nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2232 0 : nvictims = Min(nvictims, i);
2233 :
2234 0 : for (i = 0; i < nvictims; i++)
2235 : {
2236 0 : hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2237 : }
2238 :
2239 0 : pfree(entries);
2240 :
2241 : /* Increment the number of times entries are deallocated */
2242 0 : SpinLockAcquire(&pgss->mutex);
2243 0 : pgss->stats.dealloc += 1;
2244 0 : SpinLockRelease(&pgss->mutex);
2245 0 : }
2246 :
2247 : /*
2248 : * Given a query string (not necessarily null-terminated), allocate a new
2249 : * entry in the external query text file and store the string there.
2250 : *
2251 : * If successful, returns true, and stores the new entry's offset in the file
2252 : * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2253 : * number of garbage collections that have occurred so far.
2254 : *
2255 : * On failure, returns false.
2256 : *
2257 : * At least a shared lock on pgss->lock must be held by the caller, so as
2258 : * to prevent a concurrent garbage collection. Share-lock-holding callers
2259 : * should pass a gc_count pointer to obtain the number of garbage collections,
2260 : * so that they can recheck the count after obtaining exclusive lock to
2261 : * detect whether a garbage collection occurred (and removed this entry).
2262 : */
2263 : static bool
2264 56522 : qtext_store(const char *query, int query_len,
2265 : Size *query_offset, int *gc_count)
2266 : {
2267 : Size off;
2268 : int fd;
2269 :
2270 : /*
2271 : * We use a spinlock to protect extent/n_writers/gc_count, so that
2272 : * multiple processes may execute this function concurrently.
2273 : */
2274 56522 : SpinLockAcquire(&pgss->mutex);
2275 56522 : off = pgss->extent;
2276 56522 : pgss->extent += query_len + 1;
2277 56522 : pgss->n_writers++;
2278 56522 : if (gc_count)
2279 56522 : *gc_count = pgss->gc_count;
2280 56522 : SpinLockRelease(&pgss->mutex);
2281 :
2282 56522 : *query_offset = off;
2283 :
2284 : /*
2285 : * Don't allow the file to grow larger than what qtext_load_file can
2286 : * (theoretically) handle. This has been seen to be reachable on 32-bit
2287 : * platforms.
2288 : */
2289 56522 : if (unlikely(query_len >= MaxAllocHugeSize - off))
2290 : {
2291 0 : errno = EFBIG; /* not quite right, but it'll do */
2292 0 : fd = -1;
2293 0 : goto error;
2294 : }
2295 :
2296 : /* Now write the data into the successfully-reserved part of the file */
2297 56522 : fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2298 56522 : if (fd < 0)
2299 0 : goto error;
2300 :
2301 56522 : if (pg_pwrite(fd, query, query_len, off) != query_len)
2302 0 : goto error;
2303 56522 : if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2304 0 : goto error;
2305 :
2306 56522 : CloseTransientFile(fd);
2307 :
2308 : /* Mark our write complete */
2309 56522 : SpinLockAcquire(&pgss->mutex);
2310 56522 : pgss->n_writers--;
2311 56522 : SpinLockRelease(&pgss->mutex);
2312 :
2313 56522 : return true;
2314 :
2315 0 : error:
2316 0 : ereport(LOG,
2317 : (errcode_for_file_access(),
2318 : errmsg("could not write file \"%s\": %m",
2319 : PGSS_TEXT_FILE)));
2320 :
2321 0 : if (fd >= 0)
2322 0 : CloseTransientFile(fd);
2323 :
2324 : /* Mark our write complete */
2325 0 : SpinLockAcquire(&pgss->mutex);
2326 0 : pgss->n_writers--;
2327 0 : SpinLockRelease(&pgss->mutex);
2328 :
2329 0 : return false;
2330 : }
2331 :
2332 : /*
2333 : * Read the external query text file into a malloc'd buffer.
2334 : *
2335 : * Returns NULL (without throwing an error) if unable to read, eg
2336 : * file not there or insufficient memory.
2337 : *
2338 : * On success, the buffer size is also returned into *buffer_size.
2339 : *
2340 : * This can be called without any lock on pgss->lock, but in that case
2341 : * the caller is responsible for verifying that the result is sane.
2342 : */
2343 : static char *
2344 272 : qtext_load_file(Size *buffer_size)
2345 : {
2346 : char *buf;
2347 : int fd;
2348 : struct stat stat;
2349 : Size nread;
2350 :
2351 272 : fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
2352 272 : if (fd < 0)
2353 : {
2354 0 : if (errno != ENOENT)
2355 0 : ereport(LOG,
2356 : (errcode_for_file_access(),
2357 : errmsg("could not read file \"%s\": %m",
2358 : PGSS_TEXT_FILE)));
2359 0 : return NULL;
2360 : }
2361 :
2362 : /* Get file length */
2363 272 : if (fstat(fd, &stat))
2364 : {
2365 0 : ereport(LOG,
2366 : (errcode_for_file_access(),
2367 : errmsg("could not stat file \"%s\": %m",
2368 : PGSS_TEXT_FILE)));
2369 0 : CloseTransientFile(fd);
2370 0 : return NULL;
2371 : }
2372 :
2373 : /* Allocate buffer; beware that off_t might be wider than size_t */
2374 272 : if (stat.st_size <= MaxAllocHugeSize)
2375 272 : buf = (char *) malloc(stat.st_size);
2376 : else
2377 0 : buf = NULL;
2378 272 : if (buf == NULL)
2379 : {
2380 0 : ereport(LOG,
2381 : (errcode(ERRCODE_OUT_OF_MEMORY),
2382 : errmsg("out of memory"),
2383 : errdetail("Could not allocate enough memory to read file \"%s\".",
2384 : PGSS_TEXT_FILE)));
2385 0 : CloseTransientFile(fd);
2386 0 : return NULL;
2387 : }
2388 :
2389 : /*
2390 : * OK, slurp in the file. Windows fails if we try to read more than
2391 : * INT_MAX bytes at once, and other platforms might not like that either,
2392 : * so read a very large file in 1GB segments.
2393 : */
2394 272 : nread = 0;
2395 542 : while (nread < stat.st_size)
2396 : {
2397 270 : int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2398 :
2399 : /*
2400 : * If we get a short read and errno doesn't get set, the reason is
2401 : * probably that garbage collection truncated the file since we did
2402 : * the fstat(), so we don't log a complaint --- but we don't return
2403 : * the data, either, since it's most likely corrupt due to concurrent
2404 : * writes from garbage collection.
2405 : */
2406 270 : errno = 0;
2407 270 : if (read(fd, buf + nread, toread) != toread)
2408 : {
2409 0 : if (errno)
2410 0 : ereport(LOG,
2411 : (errcode_for_file_access(),
2412 : errmsg("could not read file \"%s\": %m",
2413 : PGSS_TEXT_FILE)));
2414 0 : free(buf);
2415 0 : CloseTransientFile(fd);
2416 0 : return NULL;
2417 : }
2418 270 : nread += toread;
2419 : }
2420 :
2421 272 : if (CloseTransientFile(fd) != 0)
2422 0 : ereport(LOG,
2423 : (errcode_for_file_access(),
2424 : errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2425 :
2426 272 : *buffer_size = nread;
2427 272 : return buf;
2428 : }
2429 :
2430 : /*
2431 : * Locate a query text in the file image previously read by qtext_load_file().
2432 : *
2433 : * We validate the given offset/length, and return NULL if bogus. Otherwise,
2434 : * the result points to a null-terminated string within the buffer.
2435 : */
2436 : static char *
2437 159222 : qtext_fetch(Size query_offset, int query_len,
2438 : char *buffer, Size buffer_size)
2439 : {
2440 : /* File read failed? */
2441 159222 : if (buffer == NULL)
2442 0 : return NULL;
2443 : /* Bogus offset/length? */
2444 159222 : if (query_len < 0 ||
2445 159222 : query_offset + query_len >= buffer_size)
2446 0 : return NULL;
2447 : /* As a further sanity check, make sure there's a trailing null */
2448 159222 : if (buffer[query_offset + query_len] != '\0')
2449 0 : return NULL;
2450 : /* Looks OK */
2451 159222 : return buffer + query_offset;
2452 : }
2453 :
2454 : /*
2455 : * Do we need to garbage-collect the external query text file?
2456 : *
2457 : * Caller should hold at least a shared lock on pgss->lock.
2458 : */
2459 : static bool
2460 56522 : need_gc_qtexts(void)
2461 : {
2462 : Size extent;
2463 :
2464 : /* Read shared extent pointer */
2465 56522 : SpinLockAcquire(&pgss->mutex);
2466 56522 : extent = pgss->extent;
2467 56522 : SpinLockRelease(&pgss->mutex);
2468 :
2469 : /*
2470 : * Don't proceed if file does not exceed 512 bytes per possible entry.
2471 : *
2472 : * Here and in the next test, 32-bit machines have overflow hazards if
2473 : * pgss_max and/or mean_query_len are large. Force the multiplications
2474 : * and comparisons to be done in uint64 arithmetic to forestall trouble.
2475 : */
2476 56522 : if ((uint64) extent < (uint64) 512 * pgss_max)
2477 56522 : return false;
2478 :
2479 : /*
2480 : * Don't proceed if file is less than about 50% bloat. Nothing can or
2481 : * should be done in the event of unusually large query texts accounting
2482 : * for file's large size. We go to the trouble of maintaining the mean
2483 : * query length in order to prevent garbage collection from thrashing
2484 : * uselessly.
2485 : */
2486 0 : if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2487 0 : return false;
2488 :
2489 0 : return true;
2490 : }
2491 :
2492 : /*
2493 : * Garbage-collect orphaned query texts in external file.
2494 : *
2495 : * This won't be called often in the typical case, since it's likely that
2496 : * there won't be too much churn, and besides, a similar compaction process
2497 : * occurs when serializing to disk at shutdown or as part of resetting.
2498 : * Despite this, it seems prudent to plan for the edge case where the file
2499 : * becomes unreasonably large, with no other method of compaction likely to
2500 : * occur in the foreseeable future.
2501 : *
2502 : * The caller must hold an exclusive lock on pgss->lock.
2503 : *
2504 : * At the first sign of trouble we unlink the query text file to get a clean
2505 : * slate (although existing statistics are retained), rather than risk
2506 : * thrashing by allowing the same problem case to recur indefinitely.
2507 : */
2508 : static void
2509 0 : gc_qtexts(void)
2510 : {
2511 : char *qbuffer;
2512 : Size qbuffer_size;
2513 0 : FILE *qfile = NULL;
2514 : HASH_SEQ_STATUS hash_seq;
2515 : pgssEntry *entry;
2516 : Size extent;
2517 : int nentries;
2518 :
2519 : /*
2520 : * When called from pgss_store, some other session might have proceeded
2521 : * with garbage collection in the no-lock-held interim of lock strength
2522 : * escalation. Check once more that this is actually necessary.
2523 : */
2524 0 : if (!need_gc_qtexts())
2525 0 : return;
2526 :
2527 : /*
2528 : * Load the old texts file. If we fail (out of memory, for instance),
2529 : * invalidate query texts. Hopefully this is rare. It might seem better
2530 : * to leave things alone on an OOM failure, but the problem is that the
2531 : * file is only going to get bigger; hoping for a future non-OOM result is
2532 : * risky and can easily lead to complete denial of service.
2533 : */
2534 0 : qbuffer = qtext_load_file(&qbuffer_size);
2535 0 : if (qbuffer == NULL)
2536 0 : goto gc_fail;
2537 :
2538 : /*
2539 : * We overwrite the query texts file in place, so as to reduce the risk of
2540 : * an out-of-disk-space failure. Since the file is guaranteed not to get
2541 : * larger, this should always work on traditional filesystems; though we
2542 : * could still lose on copy-on-write filesystems.
2543 : */
2544 0 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2545 0 : if (qfile == NULL)
2546 : {
2547 0 : ereport(LOG,
2548 : (errcode_for_file_access(),
2549 : errmsg("could not write file \"%s\": %m",
2550 : PGSS_TEXT_FILE)));
2551 0 : goto gc_fail;
2552 : }
2553 :
2554 0 : extent = 0;
2555 0 : nentries = 0;
2556 :
2557 0 : hash_seq_init(&hash_seq, pgss_hash);
2558 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2559 : {
2560 0 : int query_len = entry->query_len;
2561 0 : char *qry = qtext_fetch(entry->query_offset,
2562 : query_len,
2563 : qbuffer,
2564 : qbuffer_size);
2565 :
2566 0 : if (qry == NULL)
2567 : {
2568 : /* Trouble ... drop the text */
2569 0 : entry->query_offset = 0;
2570 0 : entry->query_len = -1;
2571 : /* entry will not be counted in mean query length computation */
2572 0 : continue;
2573 : }
2574 :
2575 0 : if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2576 : {
2577 0 : ereport(LOG,
2578 : (errcode_for_file_access(),
2579 : errmsg("could not write file \"%s\": %m",
2580 : PGSS_TEXT_FILE)));
2581 0 : hash_seq_term(&hash_seq);
2582 0 : goto gc_fail;
2583 : }
2584 :
2585 0 : entry->query_offset = extent;
2586 0 : extent += query_len + 1;
2587 0 : nentries++;
2588 : }
2589 :
2590 : /*
2591 : * Truncate away any now-unused space. If this fails for some odd reason,
2592 : * we log it, but there's no need to fail.
2593 : */
2594 0 : if (ftruncate(fileno(qfile), extent) != 0)
2595 0 : ereport(LOG,
2596 : (errcode_for_file_access(),
2597 : errmsg("could not truncate file \"%s\": %m",
2598 : PGSS_TEXT_FILE)));
2599 :
2600 0 : if (FreeFile(qfile))
2601 : {
2602 0 : ereport(LOG,
2603 : (errcode_for_file_access(),
2604 : errmsg("could not write file \"%s\": %m",
2605 : PGSS_TEXT_FILE)));
2606 0 : qfile = NULL;
2607 0 : goto gc_fail;
2608 : }
2609 :
2610 0 : elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2611 : pgss->extent, extent);
2612 :
2613 : /* Reset the shared extent pointer */
2614 0 : pgss->extent = extent;
2615 :
2616 : /*
2617 : * Also update the mean query length, to be sure that need_gc_qtexts()
2618 : * won't still think we have a problem.
2619 : */
2620 0 : if (nentries > 0)
2621 0 : pgss->mean_query_len = extent / nentries;
2622 : else
2623 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2624 :
2625 0 : free(qbuffer);
2626 :
2627 : /*
2628 : * OK, count a garbage collection cycle. (Note: even though we have
2629 : * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2630 : * other processes may examine gc_count while holding only the mutex.
2631 : * Also, we have to advance the count *after* we've rewritten the file,
2632 : * else other processes might not realize they read a stale file.)
2633 : */
2634 0 : record_gc_qtexts();
2635 :
2636 0 : return;
2637 :
2638 0 : gc_fail:
2639 : /* clean up resources */
2640 0 : if (qfile)
2641 0 : FreeFile(qfile);
2642 0 : free(qbuffer);
2643 :
2644 : /*
2645 : * Since the contents of the external file are now uncertain, mark all
2646 : * hashtable entries as having invalid texts.
2647 : */
2648 0 : hash_seq_init(&hash_seq, pgss_hash);
2649 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2650 : {
2651 0 : entry->query_offset = 0;
2652 0 : entry->query_len = -1;
2653 : }
2654 :
2655 : /*
2656 : * Destroy the query text file and create a new, empty one
2657 : */
2658 0 : (void) unlink(PGSS_TEXT_FILE);
2659 0 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2660 0 : if (qfile == NULL)
2661 0 : ereport(LOG,
2662 : (errcode_for_file_access(),
2663 : errmsg("could not recreate file \"%s\": %m",
2664 : PGSS_TEXT_FILE)));
2665 : else
2666 0 : FreeFile(qfile);
2667 :
2668 : /* Reset the shared extent pointer */
2669 0 : pgss->extent = 0;
2670 :
2671 : /* Reset mean_query_len to match the new state */
2672 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2673 :
2674 : /*
2675 : * Bump the GC count even though we failed.
2676 : *
2677 : * This is needed to make concurrent readers of file without any lock on
2678 : * pgss->lock notice existence of new version of file. Once readers
2679 : * subsequently observe a change in GC count with pgss->lock held, that
2680 : * forces a safe reopen of file. Writers also require that we bump here,
2681 : * of course. (As required by locking protocol, readers and writers don't
2682 : * trust earlier file contents until gc_count is found unchanged after
2683 : * pgss->lock acquired in shared or exclusive mode respectively.)
2684 : */
2685 0 : record_gc_qtexts();
2686 : }
2687 :
2688 : #define SINGLE_ENTRY_RESET(e) \
2689 : if (e) { \
2690 : if (minmax_only) { \
2691 : /* When requested reset only min/max statistics of an entry */ \
2692 : for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2693 : { \
2694 : e->counters.max_time[kind] = 0; \
2695 : e->counters.min_time[kind] = 0; \
2696 : } \
2697 : e->minmax_stats_since = stats_reset; \
2698 : } \
2699 : else \
2700 : { \
2701 : /* Remove the key otherwise */ \
2702 : hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2703 : num_remove++; \
2704 : } \
2705 : }
2706 :
2707 : /*
2708 : * Reset entries corresponding to parameters passed.
2709 : */
2710 : static TimestampTz
2711 236 : entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2712 : {
2713 : HASH_SEQ_STATUS hash_seq;
2714 : pgssEntry *entry;
2715 : FILE *qfile;
2716 : long num_entries;
2717 236 : long num_remove = 0;
2718 : pgssHashKey key;
2719 : TimestampTz stats_reset;
2720 :
2721 236 : if (!pgss || !pgss_hash)
2722 0 : ereport(ERROR,
2723 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2724 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2725 :
2726 236 : LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
2727 236 : num_entries = hash_get_num_entries(pgss_hash);
2728 :
2729 236 : stats_reset = GetCurrentTimestamp();
2730 :
2731 236 : if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2732 : {
2733 : /* If all the parameters are available, use the fast path. */
2734 2 : memset(&key, 0, sizeof(pgssHashKey));
2735 2 : key.userid = userid;
2736 2 : key.dbid = dbid;
2737 2 : key.queryid = queryid;
2738 :
2739 : /*
2740 : * Reset the entry if it exists, starting with the non-top-level
2741 : * entry.
2742 : */
2743 2 : key.toplevel = false;
2744 2 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2745 :
2746 2 : SINGLE_ENTRY_RESET(entry);
2747 :
2748 : /* Also reset the top-level entry if it exists. */
2749 2 : key.toplevel = true;
2750 2 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2751 :
2752 2 : SINGLE_ENTRY_RESET(entry);
2753 : }
2754 234 : else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2755 : {
2756 : /* Reset entries corresponding to valid parameters. */
2757 8 : hash_seq_init(&hash_seq, pgss_hash);
2758 102 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2759 : {
2760 94 : if ((!userid || entry->key.userid == userid) &&
2761 72 : (!dbid || entry->key.dbid == dbid) &&
2762 68 : (!queryid || entry->key.queryid == queryid))
2763 : {
2764 14 : SINGLE_ENTRY_RESET(entry);
2765 : }
2766 : }
2767 : }
2768 : else
2769 : {
2770 : /* Reset all entries. */
2771 226 : hash_seq_init(&hash_seq, pgss_hash);
2772 2212 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2773 : {
2774 1804 : SINGLE_ENTRY_RESET(entry);
2775 : }
2776 : }
2777 :
2778 : /* All entries are removed? */
2779 236 : if (num_entries != num_remove)
2780 12 : goto release_lock;
2781 :
2782 : /*
2783 : * Reset global statistics for pg_stat_statements since all entries are
2784 : * removed.
2785 : */
2786 224 : SpinLockAcquire(&pgss->mutex);
2787 224 : pgss->stats.dealloc = 0;
2788 224 : pgss->stats.stats_reset = stats_reset;
2789 224 : SpinLockRelease(&pgss->mutex);
2790 :
2791 : /*
2792 : * Write new empty query file, perhaps even creating a new one to recover
2793 : * if the file was missing.
2794 : */
2795 224 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2796 224 : if (qfile == NULL)
2797 : {
2798 0 : ereport(LOG,
2799 : (errcode_for_file_access(),
2800 : errmsg("could not create file \"%s\": %m",
2801 : PGSS_TEXT_FILE)));
2802 0 : goto done;
2803 : }
2804 :
2805 : /* If ftruncate fails, log it, but it's not a fatal problem */
2806 224 : if (ftruncate(fileno(qfile), 0) != 0)
2807 0 : ereport(LOG,
2808 : (errcode_for_file_access(),
2809 : errmsg("could not truncate file \"%s\": %m",
2810 : PGSS_TEXT_FILE)));
2811 :
2812 224 : FreeFile(qfile);
2813 :
2814 224 : done:
2815 224 : pgss->extent = 0;
2816 : /* This counts as a query text garbage collection for our purposes */
2817 224 : record_gc_qtexts();
2818 :
2819 236 : release_lock:
2820 236 : LWLockRelease(pgss->lock);
2821 :
2822 236 : return stats_reset;
2823 : }
2824 :
2825 : /*
2826 : * Generate a normalized version of the query string that will be used to
2827 : * represent all similar queries.
2828 : *
2829 : * Note that the normalized representation may well vary depending on
2830 : * just which "equivalent" query is used to create the hashtable entry.
2831 : * We assume this is OK.
2832 : *
2833 : * If query_loc > 0, then "query" has been advanced by that much compared to
2834 : * the original string start, so we need to translate the provided locations
2835 : * to compensate. (This lets us avoid re-scanning statements before the one
2836 : * of interest, so it's worth doing.)
2837 : *
2838 : * *query_len_p contains the input string length, and is updated with
2839 : * the result string length on exit. The resulting string might be longer
2840 : * or shorter depending on what happens with replacement of constants.
2841 : *
2842 : * Returns a palloc'd string.
2843 : */
2844 : static char *
2845 21142 : generate_normalized_query(JumbleState *jstate, const char *query,
2846 : int query_loc, int *query_len_p)
2847 : {
2848 : char *norm_query;
2849 21142 : int query_len = *query_len_p;
2850 : int norm_query_buflen, /* Space allowed for norm_query */
2851 : len_to_wrt, /* Length (in bytes) to write */
2852 21142 : quer_loc = 0, /* Source query byte location */
2853 21142 : n_quer_loc = 0, /* Normalized query byte location */
2854 21142 : last_off = 0, /* Offset from start for previous tok */
2855 21142 : last_tok_len = 0; /* Length (in bytes) of that tok */
2856 21142 : int num_constants_replaced = 0;
2857 :
2858 : /*
2859 : * Get constants' lengths (core system only gives us locations). Note
2860 : * this also ensures the items are sorted by location.
2861 : */
2862 21142 : fill_in_constant_lengths(jstate, query, query_loc);
2863 :
2864 : /*
2865 : * Allow for $n symbols to be longer than the constants they replace.
2866 : * Constants must take at least one byte in text form, while a $n symbol
2867 : * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2868 : * could refine that limit based on the max value of n for the current
2869 : * query, but it hardly seems worth any extra effort to do so.
2870 : */
2871 21142 : norm_query_buflen = query_len + jstate->clocations_count * 10;
2872 :
2873 : /* Allocate result buffer */
2874 21142 : norm_query = palloc(norm_query_buflen + 1);
2875 :
2876 83450 : for (int i = 0; i < jstate->clocations_count; i++)
2877 : {
2878 : int off, /* Offset from start for cur tok */
2879 : tok_len; /* Length (in bytes) of that tok */
2880 :
2881 : /*
2882 : * If we have an external param at this location, but no lists are
2883 : * being squashed across the query, then we skip here; this will make
2884 : * us print the characters found in the original query that represent
2885 : * the parameter in the next iteration (or after the loop is done),
2886 : * which is a bit odd but seems to work okay in most cases.
2887 : */
2888 62308 : if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
2889 298 : continue;
2890 :
2891 62010 : off = jstate->clocations[i].location;
2892 :
2893 : /* Adjust recorded location if we're dealing with partial string */
2894 62010 : off -= query_loc;
2895 :
2896 62010 : tok_len = jstate->clocations[i].length;
2897 :
2898 62010 : if (tok_len < 0)
2899 478 : continue; /* ignore any duplicates */
2900 :
2901 : /* Copy next chunk (what precedes the next constant) */
2902 61532 : len_to_wrt = off - last_off;
2903 61532 : len_to_wrt -= last_tok_len;
2904 : Assert(len_to_wrt >= 0);
2905 61532 : memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2906 61532 : n_quer_loc += len_to_wrt;
2907 :
2908 : /*
2909 : * And insert a param symbol in place of the constant token; and, if
2910 : * we have a squashable list, insert a placeholder comment starting
2911 : * from the list's second value.
2912 : */
2913 61532 : n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
2914 61532 : num_constants_replaced + 1 + jstate->highest_extern_param_id,
2915 61532 : jstate->clocations[i].squashed ? " /*, ... */" : "");
2916 61532 : num_constants_replaced++;
2917 :
2918 : /* move forward */
2919 61532 : quer_loc = off + tok_len;
2920 61532 : last_off = off;
2921 61532 : last_tok_len = tok_len;
2922 : }
2923 :
2924 : /*
2925 : * We've copied up until the last ignorable constant. Copy over the
2926 : * remaining bytes of the original query string.
2927 : */
2928 21142 : len_to_wrt = query_len - quer_loc;
2929 :
2930 : Assert(len_to_wrt >= 0);
2931 21142 : memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2932 21142 : n_quer_loc += len_to_wrt;
2933 :
2934 : Assert(n_quer_loc <= norm_query_buflen);
2935 21142 : norm_query[n_quer_loc] = '\0';
2936 :
2937 21142 : *query_len_p = n_quer_loc;
2938 21142 : return norm_query;
2939 : }
2940 :
2941 : /*
2942 : * Given a valid SQL string and an array of constant-location records,
2943 : * fill in the textual lengths of those constants.
2944 : *
2945 : * The constants may use any allowed constant syntax, such as float literals,
2946 : * bit-strings, single-quoted strings and dollar-quoted strings. This is
2947 : * accomplished by using the public API for the core scanner.
2948 : *
2949 : * It is the caller's job to ensure that the string is a valid SQL statement
2950 : * with constants at the indicated locations. Since in practice the string
2951 : * has already been parsed, and the locations that the caller provides will
2952 : * have originated from within the authoritative parser, this should not be
2953 : * a problem.
2954 : *
2955 : * Duplicate constant pointers are possible, and will have their lengths
2956 : * marked as '-1', so that they are later ignored. (Actually, we assume the
2957 : * lengths were initialized as -1 to start with, and don't change them here.)
2958 : *
2959 : * If query_loc > 0, then "query" has been advanced by that much compared to
2960 : * the original string start, so we need to translate the provided locations
2961 : * to compensate. (This lets us avoid re-scanning statements before the one
2962 : * of interest, so it's worth doing.)
2963 : *
2964 : * N.B. There is an assumption that a '-' character at a Const location begins
2965 : * a negative numeric constant. This precludes there ever being another
2966 : * reason for a constant to start with a '-'.
2967 : */
2968 : static void
2969 21142 : fill_in_constant_lengths(JumbleState *jstate, const char *query,
2970 : int query_loc)
2971 : {
2972 : LocationLen *locs;
2973 : core_yyscan_t yyscanner;
2974 : core_yy_extra_type yyextra;
2975 : core_YYSTYPE yylval;
2976 : YYLTYPE yylloc;
2977 21142 : int last_loc = -1;
2978 : int i;
2979 :
2980 : /*
2981 : * Sort the records by location so that we can process them in order while
2982 : * scanning the query text.
2983 : */
2984 21142 : if (jstate->clocations_count > 1)
2985 13360 : qsort(jstate->clocations, jstate->clocations_count,
2986 : sizeof(LocationLen), comp_location);
2987 21142 : locs = jstate->clocations;
2988 :
2989 : /* initialize the flex scanner --- should match raw_parser() */
2990 21142 : yyscanner = scanner_init(query,
2991 : &yyextra,
2992 : &ScanKeywords,
2993 : ScanKeywordTokens);
2994 :
2995 : /* we don't want to re-emit any escape string warnings */
2996 21142 : yyextra.escape_string_warning = false;
2997 :
2998 : /* Search for each constant, in sequence */
2999 83450 : for (i = 0; i < jstate->clocations_count; i++)
3000 : {
3001 62308 : int loc = locs[i].location;
3002 : int tok;
3003 :
3004 : /* Adjust recorded location if we're dealing with partial string */
3005 62308 : loc -= query_loc;
3006 :
3007 : Assert(loc >= 0);
3008 :
3009 62308 : if (locs[i].squashed)
3010 1254 : continue; /* squashable list, ignore */
3011 :
3012 61054 : if (loc <= last_loc)
3013 482 : continue; /* Duplicate constant, ignore */
3014 :
3015 : /* Lex tokens until we find the desired constant */
3016 : for (;;)
3017 : {
3018 473300 : tok = core_yylex(&yylval, &yylloc, yyscanner);
3019 :
3020 : /* We should not hit end-of-string, but if we do, behave sanely */
3021 473300 : if (tok == 0)
3022 0 : break; /* out of inner for-loop */
3023 :
3024 : /*
3025 : * We should find the token position exactly, but if we somehow
3026 : * run past it, work with that.
3027 : */
3028 473300 : if (yylloc >= loc)
3029 : {
3030 60572 : if (query[loc] == '-')
3031 : {
3032 : /*
3033 : * It's a negative value - this is the one and only case
3034 : * where we replace more than a single token.
3035 : *
3036 : * Do not compensate for the core system's special-case
3037 : * adjustment of location to that of the leading '-'
3038 : * operator in the event of a negative constant. It is
3039 : * also useful for our purposes to start from the minus
3040 : * symbol. In this way, queries like "select * from foo
3041 : * where bar = 1" and "select * from foo where bar = -2"
3042 : * will have identical normalized query strings.
3043 : */
3044 736 : tok = core_yylex(&yylval, &yylloc, yyscanner);
3045 736 : if (tok == 0)
3046 0 : break; /* out of inner for-loop */
3047 : }
3048 :
3049 : /*
3050 : * We now rely on the assumption that flex has placed a zero
3051 : * byte after the text of the current token in scanbuf.
3052 : */
3053 60572 : locs[i].length = strlen(yyextra.scanbuf + loc);
3054 60572 : break; /* out of inner for-loop */
3055 : }
3056 : }
3057 :
3058 : /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3059 60572 : if (tok == 0)
3060 0 : break;
3061 :
3062 60572 : last_loc = loc;
3063 : }
3064 :
3065 21142 : scanner_finish(yyscanner);
3066 21142 : }
3067 :
3068 : /*
3069 : * comp_location: comparator for qsorting LocationLen structs by location
3070 : */
3071 : static int
3072 69388 : comp_location(const void *a, const void *b)
3073 : {
3074 69388 : int l = ((const LocationLen *) a)->location;
3075 69388 : int r = ((const LocationLen *) b)->location;
3076 :
3077 69388 : return pg_cmp_s32(l, r);
3078 : }
|