Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_stat_statements.c
4 : * Track statement planning and execution times as well as resource
5 : * usage across a whole database cluster.
6 : *
7 : * Execution costs are totaled for each distinct source query, and kept in
8 : * a shared hashtable. (We track only as many distinct queries as will fit
9 : * in the designated amount of shared memory.)
10 : *
11 : * Starting in Postgres 9.2, this module normalized query entries. As of
12 : * Postgres 14, the normalization is done by the core if compute_query_id is
13 : * enabled, or optionally by third-party modules.
14 : *
15 : * To facilitate presenting entries to users, we create "representative" query
16 : * strings in which constants are replaced with parameter symbols ($n), to
17 : * make it clearer what a normalized entry can represent. To save on shared
18 : * memory, and to avoid having to truncate oversized query strings, we store
19 : * these strings in a temporary external query-texts file. Offsets into this
20 : * file are kept in shared memory.
21 : *
22 : * Note about locking issues: to create or delete an entry in the shared
23 : * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 : * in an entry except the counters requires the same. To look up an entry,
25 : * one must hold the lock shared. To read or update the counters within
26 : * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 : * disappear!) and also take the entry's mutex spinlock.
28 : * The shared state variable pgss->extent (the next free spot in the external
29 : * query-text file) should be accessed only while holding either the
30 : * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 : * allow reserving file space while holding only shared lock on pgss->lock.
32 : * Rewriting the entire external query-text file, eg for garbage collection,
33 : * requires holding pgss->lock exclusively; this allows individual entries
34 : * in the file to be read or written while holding only shared lock.
35 : *
36 : *
37 : * Copyright (c) 2008-2025, PostgreSQL Global Development Group
38 : *
39 : * IDENTIFICATION
40 : * contrib/pg_stat_statements/pg_stat_statements.c
41 : *
42 : *-------------------------------------------------------------------------
43 : */
44 : #include "postgres.h"
45 :
46 : #include <math.h>
47 : #include <sys/stat.h>
48 : #include <unistd.h>
49 :
50 : #include "access/parallel.h"
51 : #include "catalog/pg_authid.h"
52 : #include "common/int.h"
53 : #include "executor/instrument.h"
54 : #include "funcapi.h"
55 : #include "jit/jit.h"
56 : #include "mb/pg_wchar.h"
57 : #include "miscadmin.h"
58 : #include "nodes/queryjumble.h"
59 : #include "optimizer/planner.h"
60 : #include "parser/analyze.h"
61 : #include "parser/scanner.h"
62 : #include "pgstat.h"
63 : #include "storage/fd.h"
64 : #include "storage/ipc.h"
65 : #include "storage/lwlock.h"
66 : #include "storage/shmem.h"
67 : #include "storage/spin.h"
68 : #include "tcop/utility.h"
69 : #include "utils/acl.h"
70 : #include "utils/builtins.h"
71 : #include "utils/memutils.h"
72 : #include "utils/timestamp.h"
73 :
74 16 : PG_MODULE_MAGIC;
75 :
76 : /* Location of permanent stats file (valid when database is shut down) */
77 : #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
78 :
79 : /*
80 : * Location of external query text file.
81 : */
82 : #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
83 :
84 : /* Magic number identifying the stats file format */
85 : static const uint32 PGSS_FILE_HEADER = 0x20220408;
86 :
87 : /* PostgreSQL major version number, changes in which invalidate all entries */
88 : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
89 :
90 : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
91 : #define USAGE_EXEC(duration) (1.0)
92 : #define USAGE_INIT (1.0) /* including initial planning */
93 : #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
94 : #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
95 : #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
96 : #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
97 : #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
98 : #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
99 :
100 : /*
101 : * Extension version number, for supporting older extension versions' objects
102 : */
103 : typedef enum pgssVersion
104 : {
105 : PGSS_V1_0 = 0,
106 : PGSS_V1_1,
107 : PGSS_V1_2,
108 : PGSS_V1_3,
109 : PGSS_V1_8,
110 : PGSS_V1_9,
111 : PGSS_V1_10,
112 : PGSS_V1_11,
113 : PGSS_V1_12,
114 : } pgssVersion;
115 :
116 : typedef enum pgssStoreKind
117 : {
118 : PGSS_INVALID = -1,
119 :
120 : /*
121 : * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
122 : * reference the underlying values in the arrays in the Counters struct,
123 : * and this order is required in pg_stat_statements_internal().
124 : */
125 : PGSS_PLAN = 0,
126 : PGSS_EXEC,
127 : } pgssStoreKind;
128 :
129 : #define PGSS_NUMKIND (PGSS_EXEC + 1)
130 :
131 : /*
132 : * Hashtable key that defines the identity of a hashtable entry. We separate
133 : * queries by user and by database even if they are otherwise identical.
134 : *
135 : * If you add a new key to this struct, make sure to teach pgss_store() to
136 : * zero the padding bytes. Otherwise, things will break, because pgss_hash is
137 : * created using HASH_BLOBS, and thus tag_hash is used to hash this.
138 :
139 : */
140 : typedef struct pgssHashKey
141 : {
142 : Oid userid; /* user OID */
143 : Oid dbid; /* database OID */
144 : uint64 queryid; /* query identifier */
145 : bool toplevel; /* query executed at top level */
146 : } pgssHashKey;
147 :
148 : /*
149 : * The actual stats counters kept within pgssEntry.
150 : */
151 : typedef struct Counters
152 : {
153 : int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
154 : double total_time[PGSS_NUMKIND]; /* total planning/execution time,
155 : * in msec */
156 : double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
157 : * msec since min/max reset */
158 : double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
159 : * msec since min/max reset */
160 : double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
161 : * msec */
162 : double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
163 : * planning/execution time in msec */
164 : int64 rows; /* total # of retrieved or affected rows */
165 : int64 shared_blks_hit; /* # of shared buffer hits */
166 : int64 shared_blks_read; /* # of shared disk blocks read */
167 : int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
168 : int64 shared_blks_written; /* # of shared disk blocks written */
169 : int64 local_blks_hit; /* # of local buffer hits */
170 : int64 local_blks_read; /* # of local disk blocks read */
171 : int64 local_blks_dirtied; /* # of local disk blocks dirtied */
172 : int64 local_blks_written; /* # of local disk blocks written */
173 : int64 temp_blks_read; /* # of temp blocks read */
174 : int64 temp_blks_written; /* # of temp blocks written */
175 : double shared_blk_read_time; /* time spent reading shared blocks,
176 : * in msec */
177 : double shared_blk_write_time; /* time spent writing shared blocks,
178 : * in msec */
179 : double local_blk_read_time; /* time spent reading local blocks, in
180 : * msec */
181 : double local_blk_write_time; /* time spent writing local blocks, in
182 : * msec */
183 : double temp_blk_read_time; /* time spent reading temp blocks, in msec */
184 : double temp_blk_write_time; /* time spent writing temp blocks, in
185 : * msec */
186 : double usage; /* usage factor */
187 : int64 wal_records; /* # of WAL records generated */
188 : int64 wal_fpi; /* # of WAL full page images generated */
189 : uint64 wal_bytes; /* total amount of WAL generated in bytes */
190 : int64 wal_buffers_full; /* # of times the WAL buffers became full */
191 : int64 jit_functions; /* total number of JIT functions emitted */
192 : double jit_generation_time; /* total time to generate jit code */
193 : int64 jit_inlining_count; /* number of times inlining time has been
194 : * > 0 */
195 : double jit_deform_time; /* total time to deform tuples in jit code */
196 : int64 jit_deform_count; /* number of times deform time has been >
197 : * 0 */
198 :
199 : double jit_inlining_time; /* total time to inline jit code */
200 : int64 jit_optimization_count; /* number of times optimization time
201 : * has been > 0 */
202 : double jit_optimization_time; /* total time to optimize jit code */
203 : int64 jit_emission_count; /* number of times emission time has been
204 : * > 0 */
205 : double jit_emission_time; /* total time to emit jit code */
206 : int64 parallel_workers_to_launch; /* # of parallel workers planned
207 : * to be launched */
208 : int64 parallel_workers_launched; /* # of parallel workers actually
209 : * launched */
210 : } Counters;
211 :
212 : /*
213 : * Global statistics for pg_stat_statements
214 : */
215 : typedef struct pgssGlobalStats
216 : {
217 : int64 dealloc; /* # of times entries were deallocated */
218 : TimestampTz stats_reset; /* timestamp with all stats reset */
219 : } pgssGlobalStats;
220 :
221 : /*
222 : * Statistics per statement
223 : *
224 : * Note: in event of a failure in garbage collection of the query text file,
225 : * we reset query_offset to zero and query_len to -1. This will be seen as
226 : * an invalid state by qtext_fetch().
227 : */
228 : typedef struct pgssEntry
229 : {
230 : pgssHashKey key; /* hash key of entry - MUST BE FIRST */
231 : Counters counters; /* the statistics for this query */
232 : Size query_offset; /* query text offset in external file */
233 : int query_len; /* # of valid bytes in query string, or -1 */
234 : int encoding; /* query text encoding */
235 : TimestampTz stats_since; /* timestamp of entry allocation */
236 : TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
237 : slock_t mutex; /* protects the counters only */
238 : } pgssEntry;
239 :
240 : /*
241 : * Global shared state
242 : */
243 : typedef struct pgssSharedState
244 : {
245 : LWLock *lock; /* protects hashtable search/modification */
246 : double cur_median_usage; /* current median usage in hashtable */
247 : Size mean_query_len; /* current mean entry text length */
248 : slock_t mutex; /* protects following fields only: */
249 : Size extent; /* current extent of query file */
250 : int n_writers; /* number of active writers to query file */
251 : int gc_count; /* query file garbage collection cycle count */
252 : pgssGlobalStats stats; /* global statistics for pgss */
253 : } pgssSharedState;
254 :
255 : /*---- Local variables ----*/
256 :
257 : /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
258 : static int nesting_level = 0;
259 :
260 : /* Saved hook values */
261 : static shmem_request_hook_type prev_shmem_request_hook = NULL;
262 : static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
263 : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
264 : static planner_hook_type prev_planner_hook = NULL;
265 : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
266 : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
267 : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
268 : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
269 : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
270 :
271 : /* Links to shared memory state */
272 : static pgssSharedState *pgss = NULL;
273 : static HTAB *pgss_hash = NULL;
274 :
275 : /*---- GUC variables ----*/
276 :
277 : typedef enum
278 : {
279 : PGSS_TRACK_NONE, /* track no statements */
280 : PGSS_TRACK_TOP, /* only top level statements */
281 : PGSS_TRACK_ALL, /* all statements, including nested ones */
282 : } PGSSTrackLevel;
283 :
284 : static const struct config_enum_entry track_options[] =
285 : {
286 : {"none", PGSS_TRACK_NONE, false},
287 : {"top", PGSS_TRACK_TOP, false},
288 : {"all", PGSS_TRACK_ALL, false},
289 : {NULL, 0, false}
290 : };
291 :
292 : static int pgss_max = 5000; /* max # statements to track */
293 : static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
294 : static bool pgss_track_utility = true; /* whether to track utility commands */
295 : static bool pgss_track_planning = false; /* whether to track planning
296 : * duration */
297 : static bool pgss_save = true; /* whether to save stats across shutdown */
298 :
299 :
300 : #define pgss_enabled(level) \
301 : (!IsParallelWorker() && \
302 : (pgss_track == PGSS_TRACK_ALL || \
303 : (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
304 :
305 : #define record_gc_qtexts() \
306 : do { \
307 : SpinLockAcquire(&pgss->mutex); \
308 : pgss->gc_count++; \
309 : SpinLockRelease(&pgss->mutex); \
310 : } while(0)
311 :
312 : /*---- Function declarations ----*/
313 :
314 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
315 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
316 36 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_11);
317 0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
318 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
319 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
320 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
321 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
322 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_11);
323 44 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_12);
324 0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
325 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
326 :
327 : static void pgss_shmem_request(void);
328 : static void pgss_shmem_startup(void);
329 : static void pgss_shmem_shutdown(int code, Datum arg);
330 : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
331 : JumbleState *jstate);
332 : static PlannedStmt *pgss_planner(Query *parse,
333 : const char *query_string,
334 : int cursorOptions,
335 : ParamListInfo boundParams);
336 : static bool pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
337 : static void pgss_ExecutorRun(QueryDesc *queryDesc,
338 : ScanDirection direction,
339 : uint64 count);
340 : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
341 : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
342 : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
343 : bool readOnlyTree,
344 : ProcessUtilityContext context, ParamListInfo params,
345 : QueryEnvironment *queryEnv,
346 : DestReceiver *dest, QueryCompletion *qc);
347 : static void pgss_store(const char *query, uint64 queryId,
348 : int query_location, int query_len,
349 : pgssStoreKind kind,
350 : double total_time, uint64 rows,
351 : const BufferUsage *bufusage,
352 : const WalUsage *walusage,
353 : const struct JitInstrumentation *jitusage,
354 : JumbleState *jstate,
355 : int parallel_workers_to_launch,
356 : int parallel_workers_launched);
357 : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
358 : pgssVersion api_version,
359 : bool showtext);
360 : static Size pgss_memsize(void);
361 : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
362 : int encoding, bool sticky);
363 : static void entry_dealloc(void);
364 : static bool qtext_store(const char *query, int query_len,
365 : Size *query_offset, int *gc_count);
366 : static char *qtext_load_file(Size *buffer_size);
367 : static char *qtext_fetch(Size query_offset, int query_len,
368 : char *buffer, Size buffer_size);
369 : static bool need_gc_qtexts(void);
370 : static void gc_qtexts(void);
371 : static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only);
372 : static char *generate_normalized_query(JumbleState *jstate, const char *query,
373 : int query_loc, int *query_len_p);
374 : static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
375 : int query_loc);
376 : static int comp_location(const void *a, const void *b);
377 :
378 :
379 : /*
380 : * Module load callback
381 : */
382 : void
383 16 : _PG_init(void)
384 : {
385 : /*
386 : * In order to create our shared memory area, we have to be loaded via
387 : * shared_preload_libraries. If not, fall out without hooking into any of
388 : * the main system. (We don't throw error here because it seems useful to
389 : * allow the pg_stat_statements functions to be created even when the
390 : * module isn't active. The functions must protect themselves against
391 : * being called then, however.)
392 : */
393 16 : if (!process_shared_preload_libraries_in_progress)
394 2 : return;
395 :
396 : /*
397 : * Inform the postmaster that we want to enable query_id calculation if
398 : * compute_query_id is set to auto.
399 : */
400 14 : EnableQueryId();
401 :
402 : /*
403 : * Define (or redefine) custom GUC variables.
404 : */
405 14 : DefineCustomIntVariable("pg_stat_statements.max",
406 : "Sets the maximum number of statements tracked by pg_stat_statements.",
407 : NULL,
408 : &pgss_max,
409 : 5000,
410 : 100,
411 : INT_MAX / 2,
412 : PGC_POSTMASTER,
413 : 0,
414 : NULL,
415 : NULL,
416 : NULL);
417 :
418 14 : DefineCustomEnumVariable("pg_stat_statements.track",
419 : "Selects which statements are tracked by pg_stat_statements.",
420 : NULL,
421 : &pgss_track,
422 : PGSS_TRACK_TOP,
423 : track_options,
424 : PGC_SUSET,
425 : 0,
426 : NULL,
427 : NULL,
428 : NULL);
429 :
430 14 : DefineCustomBoolVariable("pg_stat_statements.track_utility",
431 : "Selects whether utility commands are tracked by pg_stat_statements.",
432 : NULL,
433 : &pgss_track_utility,
434 : true,
435 : PGC_SUSET,
436 : 0,
437 : NULL,
438 : NULL,
439 : NULL);
440 :
441 14 : DefineCustomBoolVariable("pg_stat_statements.track_planning",
442 : "Selects whether planning duration is tracked by pg_stat_statements.",
443 : NULL,
444 : &pgss_track_planning,
445 : false,
446 : PGC_SUSET,
447 : 0,
448 : NULL,
449 : NULL,
450 : NULL);
451 :
452 14 : DefineCustomBoolVariable("pg_stat_statements.save",
453 : "Save pg_stat_statements statistics across server shutdowns.",
454 : NULL,
455 : &pgss_save,
456 : true,
457 : PGC_SIGHUP,
458 : 0,
459 : NULL,
460 : NULL,
461 : NULL);
462 :
463 14 : MarkGUCPrefixReserved("pg_stat_statements");
464 :
465 : /*
466 : * Install hooks.
467 : */
468 14 : prev_shmem_request_hook = shmem_request_hook;
469 14 : shmem_request_hook = pgss_shmem_request;
470 14 : prev_shmem_startup_hook = shmem_startup_hook;
471 14 : shmem_startup_hook = pgss_shmem_startup;
472 14 : prev_post_parse_analyze_hook = post_parse_analyze_hook;
473 14 : post_parse_analyze_hook = pgss_post_parse_analyze;
474 14 : prev_planner_hook = planner_hook;
475 14 : planner_hook = pgss_planner;
476 14 : prev_ExecutorStart = ExecutorStart_hook;
477 14 : ExecutorStart_hook = pgss_ExecutorStart;
478 14 : prev_ExecutorRun = ExecutorRun_hook;
479 14 : ExecutorRun_hook = pgss_ExecutorRun;
480 14 : prev_ExecutorFinish = ExecutorFinish_hook;
481 14 : ExecutorFinish_hook = pgss_ExecutorFinish;
482 14 : prev_ExecutorEnd = ExecutorEnd_hook;
483 14 : ExecutorEnd_hook = pgss_ExecutorEnd;
484 14 : prev_ProcessUtility = ProcessUtility_hook;
485 14 : ProcessUtility_hook = pgss_ProcessUtility;
486 : }
487 :
488 : /*
489 : * shmem_request hook: request additional shared resources. We'll allocate or
490 : * attach to the shared resources in pgss_shmem_startup().
491 : */
492 : static void
493 14 : pgss_shmem_request(void)
494 : {
495 14 : if (prev_shmem_request_hook)
496 0 : prev_shmem_request_hook();
497 :
498 14 : RequestAddinShmemSpace(pgss_memsize());
499 14 : RequestNamedLWLockTranche("pg_stat_statements", 1);
500 14 : }
501 :
502 : /*
503 : * shmem_startup hook: allocate or attach to shared memory,
504 : * then load any pre-existing statistics from file.
505 : * Also create and load the query-texts file, which is expected to exist
506 : * (even if empty) while the module is enabled.
507 : */
508 : static void
509 14 : pgss_shmem_startup(void)
510 : {
511 : bool found;
512 : HASHCTL info;
513 14 : FILE *file = NULL;
514 14 : FILE *qfile = NULL;
515 : uint32 header;
516 : int32 num;
517 : int32 pgver;
518 : int32 i;
519 : int buffer_size;
520 14 : char *buffer = NULL;
521 :
522 14 : if (prev_shmem_startup_hook)
523 0 : prev_shmem_startup_hook();
524 :
525 : /* reset in case this is a restart within the postmaster */
526 14 : pgss = NULL;
527 14 : pgss_hash = NULL;
528 :
529 : /*
530 : * Create or attach to the shared memory state, including hash table
531 : */
532 14 : LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
533 :
534 14 : pgss = ShmemInitStruct("pg_stat_statements",
535 : sizeof(pgssSharedState),
536 : &found);
537 :
538 14 : if (!found)
539 : {
540 : /* First time through ... */
541 14 : pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
542 14 : pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
543 14 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
544 14 : SpinLockInit(&pgss->mutex);
545 14 : pgss->extent = 0;
546 14 : pgss->n_writers = 0;
547 14 : pgss->gc_count = 0;
548 14 : pgss->stats.dealloc = 0;
549 14 : pgss->stats.stats_reset = GetCurrentTimestamp();
550 : }
551 :
552 14 : info.keysize = sizeof(pgssHashKey);
553 14 : info.entrysize = sizeof(pgssEntry);
554 14 : pgss_hash = ShmemInitHash("pg_stat_statements hash",
555 : pgss_max, pgss_max,
556 : &info,
557 : HASH_ELEM | HASH_BLOBS);
558 :
559 14 : LWLockRelease(AddinShmemInitLock);
560 :
561 : /*
562 : * If we're in the postmaster (or a standalone backend...), set up a shmem
563 : * exit hook to dump the statistics to disk.
564 : */
565 14 : if (!IsUnderPostmaster)
566 14 : on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
567 :
568 : /*
569 : * Done if some other process already completed our initialization.
570 : */
571 14 : if (found)
572 14 : return;
573 :
574 : /*
575 : * Note: we don't bother with locks here, because there should be no other
576 : * processes running when this code is reached.
577 : */
578 :
579 : /* Unlink query text file possibly left over from crash */
580 14 : unlink(PGSS_TEXT_FILE);
581 :
582 : /* Allocate new query text temp file */
583 14 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
584 14 : if (qfile == NULL)
585 0 : goto write_error;
586 :
587 : /*
588 : * If we were told not to load old statistics, we're done. (Note we do
589 : * not try to unlink any old dump file in this case. This seems a bit
590 : * questionable but it's the historical behavior.)
591 : */
592 14 : if (!pgss_save)
593 : {
594 2 : FreeFile(qfile);
595 2 : return;
596 : }
597 :
598 : /*
599 : * Attempt to load old statistics from the dump file.
600 : */
601 12 : file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
602 12 : if (file == NULL)
603 : {
604 8 : if (errno != ENOENT)
605 0 : goto read_error;
606 : /* No existing persisted stats file, so we're done */
607 8 : FreeFile(qfile);
608 8 : return;
609 : }
610 :
611 4 : buffer_size = 2048;
612 4 : buffer = (char *) palloc(buffer_size);
613 :
614 8 : if (fread(&header, sizeof(uint32), 1, file) != 1 ||
615 8 : fread(&pgver, sizeof(uint32), 1, file) != 1 ||
616 4 : fread(&num, sizeof(int32), 1, file) != 1)
617 0 : goto read_error;
618 :
619 4 : if (header != PGSS_FILE_HEADER ||
620 4 : pgver != PGSS_PG_MAJOR_VERSION)
621 0 : goto data_error;
622 :
623 52702 : for (i = 0; i < num; i++)
624 : {
625 : pgssEntry temp;
626 : pgssEntry *entry;
627 : Size query_offset;
628 :
629 52698 : if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
630 0 : goto read_error;
631 :
632 : /* Encoding is the only field we can easily sanity-check */
633 52698 : if (!PG_VALID_BE_ENCODING(temp.encoding))
634 0 : goto data_error;
635 :
636 : /* Resize buffer as needed */
637 52698 : if (temp.query_len >= buffer_size)
638 : {
639 6 : buffer_size = Max(buffer_size * 2, temp.query_len + 1);
640 6 : buffer = repalloc(buffer, buffer_size);
641 : }
642 :
643 52698 : if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
644 0 : goto read_error;
645 :
646 : /* Should have a trailing null, but let's make sure */
647 52698 : buffer[temp.query_len] = '\0';
648 :
649 : /* Skip loading "sticky" entries */
650 52698 : if (IS_STICKY(temp.counters))
651 1502 : continue;
652 :
653 : /* Store the query text */
654 51196 : query_offset = pgss->extent;
655 51196 : if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
656 0 : goto write_error;
657 51196 : pgss->extent += temp.query_len + 1;
658 :
659 : /* make the hashtable entry (discards old entries if too many) */
660 51196 : entry = entry_alloc(&temp.key, query_offset, temp.query_len,
661 : temp.encoding,
662 : false);
663 :
664 : /* copy in the actual stats */
665 51196 : entry->counters = temp.counters;
666 51196 : entry->stats_since = temp.stats_since;
667 51196 : entry->minmax_stats_since = temp.minmax_stats_since;
668 : }
669 :
670 : /* Read global statistics for pg_stat_statements */
671 4 : if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
672 0 : goto read_error;
673 :
674 4 : pfree(buffer);
675 4 : FreeFile(file);
676 4 : FreeFile(qfile);
677 :
678 : /*
679 : * Remove the persisted stats file so it's not included in
680 : * backups/replication standbys, etc. A new file will be written on next
681 : * shutdown.
682 : *
683 : * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
684 : * because we remove that file on startup; it acts inversely to
685 : * PGSS_DUMP_FILE, in that it is only supposed to be around when the
686 : * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
687 : * when the server is not running. Leaving the file creates no danger of
688 : * a newly restored database having a spurious record of execution costs,
689 : * which is what we're really concerned about here.
690 : */
691 4 : unlink(PGSS_DUMP_FILE);
692 :
693 4 : return;
694 :
695 0 : read_error:
696 0 : ereport(LOG,
697 : (errcode_for_file_access(),
698 : errmsg("could not read file \"%s\": %m",
699 : PGSS_DUMP_FILE)));
700 0 : goto fail;
701 0 : data_error:
702 0 : ereport(LOG,
703 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
704 : errmsg("ignoring invalid data in file \"%s\"",
705 : PGSS_DUMP_FILE)));
706 0 : goto fail;
707 0 : write_error:
708 0 : ereport(LOG,
709 : (errcode_for_file_access(),
710 : errmsg("could not write file \"%s\": %m",
711 : PGSS_TEXT_FILE)));
712 0 : fail:
713 0 : if (buffer)
714 0 : pfree(buffer);
715 0 : if (file)
716 0 : FreeFile(file);
717 0 : if (qfile)
718 0 : FreeFile(qfile);
719 : /* If possible, throw away the bogus file; ignore any error */
720 0 : unlink(PGSS_DUMP_FILE);
721 :
722 : /*
723 : * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
724 : * server is running with pg_stat_statements enabled
725 : */
726 : }
727 :
728 : /*
729 : * shmem_shutdown hook: Dump statistics into file.
730 : *
731 : * Note: we don't bother with acquiring lock, because there should be no
732 : * other processes running when this is called.
733 : */
734 : static void
735 14 : pgss_shmem_shutdown(int code, Datum arg)
736 : {
737 : FILE *file;
738 14 : char *qbuffer = NULL;
739 14 : Size qbuffer_size = 0;
740 : HASH_SEQ_STATUS hash_seq;
741 : int32 num_entries;
742 : pgssEntry *entry;
743 :
744 : /* Don't try to dump during a crash. */
745 14 : if (code)
746 14 : return;
747 :
748 : /* Safety check ... shouldn't get here unless shmem is set up. */
749 14 : if (!pgss || !pgss_hash)
750 0 : return;
751 :
752 : /* Don't dump if told not to. */
753 14 : if (!pgss_save)
754 4 : return;
755 :
756 10 : file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
757 10 : if (file == NULL)
758 0 : goto error;
759 :
760 10 : if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
761 0 : goto error;
762 10 : if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
763 0 : goto error;
764 10 : num_entries = hash_get_num_entries(pgss_hash);
765 10 : if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
766 0 : goto error;
767 :
768 10 : qbuffer = qtext_load_file(&qbuffer_size);
769 10 : if (qbuffer == NULL)
770 0 : goto error;
771 :
772 : /*
773 : * When serializing to disk, we store query texts immediately after their
774 : * entry data. Any orphaned query texts are thereby excluded.
775 : */
776 10 : hash_seq_init(&hash_seq, pgss_hash);
777 105870 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
778 : {
779 105860 : int len = entry->query_len;
780 105860 : char *qstr = qtext_fetch(entry->query_offset, len,
781 : qbuffer, qbuffer_size);
782 :
783 105860 : if (qstr == NULL)
784 0 : continue; /* Ignore any entries with bogus texts */
785 :
786 105860 : if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
787 105860 : fwrite(qstr, 1, len + 1, file) != len + 1)
788 : {
789 : /* note: we assume hash_seq_term won't change errno */
790 0 : hash_seq_term(&hash_seq);
791 0 : goto error;
792 : }
793 : }
794 :
795 : /* Dump global statistics for pg_stat_statements */
796 10 : if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
797 0 : goto error;
798 :
799 10 : free(qbuffer);
800 10 : qbuffer = NULL;
801 :
802 10 : if (FreeFile(file))
803 : {
804 0 : file = NULL;
805 0 : goto error;
806 : }
807 :
808 : /*
809 : * Rename file into place, so we atomically replace any old one.
810 : */
811 10 : (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
812 :
813 : /* Unlink query-texts file; it's not needed while shutdown */
814 10 : unlink(PGSS_TEXT_FILE);
815 :
816 10 : return;
817 :
818 0 : error:
819 0 : ereport(LOG,
820 : (errcode_for_file_access(),
821 : errmsg("could not write file \"%s\": %m",
822 : PGSS_DUMP_FILE ".tmp")));
823 0 : free(qbuffer);
824 0 : if (file)
825 0 : FreeFile(file);
826 0 : unlink(PGSS_DUMP_FILE ".tmp");
827 0 : unlink(PGSS_TEXT_FILE);
828 : }
829 :
830 : /*
831 : * Post-parse-analysis hook: mark query with a queryId
832 : */
833 : static void
834 153086 : pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
835 : {
836 153086 : if (prev_post_parse_analyze_hook)
837 0 : prev_post_parse_analyze_hook(pstate, query, jstate);
838 :
839 : /* Safety check... */
840 153086 : if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
841 25548 : return;
842 :
843 : /*
844 : * If it's EXECUTE, clear the queryId so that stats will accumulate for
845 : * the underlying PREPARE. But don't do this if we're not tracking
846 : * utility statements, to avoid messing up another extension that might be
847 : * tracking them.
848 : */
849 127538 : if (query->utilityStmt)
850 : {
851 56448 : if (pgss_track_utility && IsA(query->utilityStmt, ExecuteStmt))
852 : {
853 6402 : query->queryId = UINT64CONST(0);
854 6402 : return;
855 : }
856 : }
857 :
858 : /*
859 : * If query jumbling were able to identify any ignorable constants, we
860 : * immediately create a hash table entry for the query, so that we can
861 : * record the normalized form of the query string. If there were no such
862 : * constants, the normalized string would be the same as the query text
863 : * anyway, so there's no need for an early entry.
864 : */
865 121136 : if (jstate && jstate->clocations_count > 0)
866 70092 : pgss_store(pstate->p_sourcetext,
867 : query->queryId,
868 : query->stmt_location,
869 : query->stmt_len,
870 : PGSS_INVALID,
871 : 0,
872 : 0,
873 : NULL,
874 : NULL,
875 : NULL,
876 : jstate,
877 : 0,
878 : 0);
879 : }
880 :
881 : /*
882 : * Planner hook: forward to regular planner, but measure planning time
883 : * if needed.
884 : */
885 : static PlannedStmt *
886 94568 : pgss_planner(Query *parse,
887 : const char *query_string,
888 : int cursorOptions,
889 : ParamListInfo boundParams)
890 : {
891 : PlannedStmt *result;
892 :
893 : /*
894 : * We can't process the query if no query_string is provided, as
895 : * pgss_store needs it. We also ignore query without queryid, as it would
896 : * be treated as a utility statement, which may not be the case.
897 : */
898 94568 : if (pgss_enabled(nesting_level)
899 71366 : && pgss_track_planning && query_string
900 146 : && parse->queryId != UINT64CONST(0))
901 146 : {
902 : instr_time start;
903 : instr_time duration;
904 : BufferUsage bufusage_start,
905 : bufusage;
906 : WalUsage walusage_start,
907 : walusage;
908 :
909 : /* We need to track buffer usage as the planner can access them. */
910 146 : bufusage_start = pgBufferUsage;
911 :
912 : /*
913 : * Similarly the planner could write some WAL records in some cases
914 : * (e.g. setting a hint bit with those being WAL-logged)
915 : */
916 146 : walusage_start = pgWalUsage;
917 146 : INSTR_TIME_SET_CURRENT(start);
918 :
919 146 : nesting_level++;
920 146 : PG_TRY();
921 : {
922 146 : if (prev_planner_hook)
923 0 : result = prev_planner_hook(parse, query_string, cursorOptions,
924 : boundParams);
925 : else
926 146 : result = standard_planner(parse, query_string, cursorOptions,
927 : boundParams);
928 : }
929 0 : PG_FINALLY();
930 : {
931 146 : nesting_level--;
932 : }
933 146 : PG_END_TRY();
934 :
935 146 : INSTR_TIME_SET_CURRENT(duration);
936 146 : INSTR_TIME_SUBTRACT(duration, start);
937 :
938 : /* calc differences of buffer counters. */
939 146 : memset(&bufusage, 0, sizeof(BufferUsage));
940 146 : BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
941 :
942 : /* calc differences of WAL counters. */
943 146 : memset(&walusage, 0, sizeof(WalUsage));
944 146 : WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
945 :
946 146 : pgss_store(query_string,
947 : parse->queryId,
948 : parse->stmt_location,
949 : parse->stmt_len,
950 : PGSS_PLAN,
951 146 : INSTR_TIME_GET_MILLISEC(duration),
952 : 0,
953 : &bufusage,
954 : &walusage,
955 : NULL,
956 : NULL,
957 : 0,
958 : 0);
959 : }
960 : else
961 : {
962 : /*
963 : * Even though we're not tracking plan time for this statement, we
964 : * must still increment the nesting level, to ensure that functions
965 : * evaluated during planning are not seen as top-level calls.
966 : */
967 94422 : nesting_level++;
968 94422 : PG_TRY();
969 : {
970 94422 : if (prev_planner_hook)
971 0 : result = prev_planner_hook(parse, query_string, cursorOptions,
972 : boundParams);
973 : else
974 94422 : result = standard_planner(parse, query_string, cursorOptions,
975 : boundParams);
976 : }
977 1320 : PG_FINALLY();
978 : {
979 94422 : nesting_level--;
980 : }
981 94422 : PG_END_TRY();
982 : }
983 :
984 93248 : return result;
985 : }
986 :
987 : /*
988 : * ExecutorStart hook: start up tracking if needed
989 : */
990 : static bool
991 112706 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
992 : {
993 : bool plan_valid;
994 :
995 112706 : if (prev_ExecutorStart)
996 0 : plan_valid = prev_ExecutorStart(queryDesc, eflags);
997 : else
998 112706 : plan_valid = standard_ExecutorStart(queryDesc, eflags);
999 :
1000 : /* The plan may have become invalid during standard_ExecutorStart() */
1001 112074 : if (!plan_valid)
1002 0 : return false;
1003 :
1004 : /*
1005 : * If query has queryId zero, don't track it. This prevents double
1006 : * counting of optimizable statements that are directly contained in
1007 : * utility statements.
1008 : */
1009 112074 : if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
1010 : {
1011 : /*
1012 : * Set up to track total elapsed time in ExecutorRun. Make sure the
1013 : * space is allocated in the per-query context so it will go away at
1014 : * ExecutorEnd.
1015 : */
1016 75242 : if (queryDesc->totaltime == NULL)
1017 : {
1018 : MemoryContext oldcxt;
1019 :
1020 75242 : oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1021 75242 : queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1022 75242 : MemoryContextSwitchTo(oldcxt);
1023 : }
1024 : }
1025 :
1026 112074 : return true;
1027 : }
1028 :
1029 : /*
1030 : * ExecutorRun hook: all we need do is track nesting depth
1031 : */
1032 : static void
1033 109664 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1034 : {
1035 109664 : nesting_level++;
1036 109664 : PG_TRY();
1037 : {
1038 109664 : if (prev_ExecutorRun)
1039 0 : prev_ExecutorRun(queryDesc, direction, count);
1040 : else
1041 109664 : standard_ExecutorRun(queryDesc, direction, count);
1042 : }
1043 6908 : PG_FINALLY();
1044 : {
1045 109664 : nesting_level--;
1046 : }
1047 109664 : PG_END_TRY();
1048 102756 : }
1049 :
1050 : /*
1051 : * ExecutorFinish hook: all we need do is track nesting depth
1052 : */
1053 : static void
1054 99500 : pgss_ExecutorFinish(QueryDesc *queryDesc)
1055 : {
1056 99500 : nesting_level++;
1057 99500 : PG_TRY();
1058 : {
1059 99500 : if (prev_ExecutorFinish)
1060 0 : prev_ExecutorFinish(queryDesc);
1061 : else
1062 99500 : standard_ExecutorFinish(queryDesc);
1063 : }
1064 338 : PG_FINALLY();
1065 : {
1066 99500 : nesting_level--;
1067 : }
1068 99500 : PG_END_TRY();
1069 99162 : }
1070 :
1071 : /*
1072 : * ExecutorEnd hook: store results if needed
1073 : */
1074 : static void
1075 104748 : pgss_ExecutorEnd(QueryDesc *queryDesc)
1076 : {
1077 104748 : uint64 queryId = queryDesc->plannedstmt->queryId;
1078 :
1079 104748 : if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1080 72194 : pgss_enabled(nesting_level))
1081 : {
1082 : /*
1083 : * Make sure stats accumulation is done. (Note: it's okay if several
1084 : * levels of hook all do this.)
1085 : */
1086 72194 : InstrEndLoop(queryDesc->totaltime);
1087 :
1088 72036 : pgss_store(queryDesc->sourceText,
1089 : queryId,
1090 72194 : queryDesc->plannedstmt->stmt_location,
1091 72194 : queryDesc->plannedstmt->stmt_len,
1092 : PGSS_EXEC,
1093 72194 : queryDesc->totaltime->total * 1000.0, /* convert to msec */
1094 72194 : queryDesc->estate->es_total_processed,
1095 72194 : &queryDesc->totaltime->bufusage,
1096 72194 : &queryDesc->totaltime->walusage,
1097 158 : queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1098 : NULL,
1099 72194 : queryDesc->estate->es_parallel_workers_to_launch,
1100 72194 : queryDesc->estate->es_parallel_workers_launched);
1101 : }
1102 :
1103 104748 : if (prev_ExecutorEnd)
1104 0 : prev_ExecutorEnd(queryDesc);
1105 : else
1106 104748 : standard_ExecutorEnd(queryDesc);
1107 104748 : }
1108 :
1109 : /*
1110 : * ProcessUtility hook
1111 : */
1112 : static void
1113 65848 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1114 : bool readOnlyTree,
1115 : ProcessUtilityContext context,
1116 : ParamListInfo params, QueryEnvironment *queryEnv,
1117 : DestReceiver *dest, QueryCompletion *qc)
1118 : {
1119 65848 : Node *parsetree = pstmt->utilityStmt;
1120 65848 : uint64 saved_queryId = pstmt->queryId;
1121 65848 : int saved_stmt_location = pstmt->stmt_location;
1122 65848 : int saved_stmt_len = pstmt->stmt_len;
1123 65848 : bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
1124 :
1125 : /*
1126 : * Force utility statements to get queryId zero. We do this even in cases
1127 : * where the statement contains an optimizable statement for which a
1128 : * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1129 : * cases, runtime control will first go through ProcessUtility and then
1130 : * the executor, and we don't want the executor hooks to do anything,
1131 : * since we are already measuring the statement's costs at the utility
1132 : * level.
1133 : *
1134 : * Note that this is only done if pg_stat_statements is enabled and
1135 : * configured to track utility statements, in the unlikely possibility
1136 : * that user configured another extension to handle utility statements
1137 : * only.
1138 : */
1139 65848 : if (enabled)
1140 56296 : pstmt->queryId = UINT64CONST(0);
1141 :
1142 : /*
1143 : * If it's an EXECUTE statement, we don't track it and don't increment the
1144 : * nesting level. This allows the cycles to be charged to the underlying
1145 : * PREPARE instead (by the Executor hooks), which is much more useful.
1146 : *
1147 : * We also don't track execution of PREPARE. If we did, we would get one
1148 : * hash table entry for the PREPARE (with hash calculated from the query
1149 : * string), and then a different one with the same query string (but hash
1150 : * calculated from the query tree) would be used to accumulate costs of
1151 : * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1152 : * actually run the planner (only parse+rewrite), its costs are generally
1153 : * pretty negligible and it seems okay to just ignore it.
1154 : */
1155 65848 : if (enabled &&
1156 56296 : !IsA(parsetree, ExecuteStmt) &&
1157 49898 : !IsA(parsetree, PrepareStmt))
1158 44992 : {
1159 : instr_time start;
1160 : instr_time duration;
1161 : uint64 rows;
1162 : BufferUsage bufusage_start,
1163 : bufusage;
1164 : WalUsage walusage_start,
1165 : walusage;
1166 :
1167 49664 : bufusage_start = pgBufferUsage;
1168 49664 : walusage_start = pgWalUsage;
1169 49664 : INSTR_TIME_SET_CURRENT(start);
1170 :
1171 49664 : nesting_level++;
1172 49664 : PG_TRY();
1173 : {
1174 49664 : if (prev_ProcessUtility)
1175 0 : prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1176 : context, params, queryEnv,
1177 : dest, qc);
1178 : else
1179 49664 : standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1180 : context, params, queryEnv,
1181 : dest, qc);
1182 : }
1183 4672 : PG_FINALLY();
1184 : {
1185 49664 : nesting_level--;
1186 : }
1187 49664 : PG_END_TRY();
1188 :
1189 : /*
1190 : * CAUTION: do not access the *pstmt data structure again below here.
1191 : * If it was a ROLLBACK or similar, that data structure may have been
1192 : * freed. We must copy everything we still need into local variables,
1193 : * which we did above.
1194 : *
1195 : * For the same reason, we can't risk restoring pstmt->queryId to its
1196 : * former value, which'd otherwise be a good idea.
1197 : */
1198 :
1199 44992 : INSTR_TIME_SET_CURRENT(duration);
1200 44992 : INSTR_TIME_SUBTRACT(duration, start);
1201 :
1202 : /*
1203 : * Track the total number of rows retrieved or affected by the utility
1204 : * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1205 : * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1206 : */
1207 44986 : rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1208 41790 : qc->commandTag == CMDTAG_FETCH ||
1209 41324 : qc->commandTag == CMDTAG_SELECT ||
1210 40956 : qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1211 89978 : qc->nprocessed : 0;
1212 :
1213 : /* calc differences of buffer counters. */
1214 44992 : memset(&bufusage, 0, sizeof(BufferUsage));
1215 44992 : BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1216 :
1217 : /* calc differences of WAL counters. */
1218 44992 : memset(&walusage, 0, sizeof(WalUsage));
1219 44992 : WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1220 :
1221 44992 : pgss_store(queryString,
1222 : saved_queryId,
1223 : saved_stmt_location,
1224 : saved_stmt_len,
1225 : PGSS_EXEC,
1226 44992 : INSTR_TIME_GET_MILLISEC(duration),
1227 : rows,
1228 : &bufusage,
1229 : &walusage,
1230 : NULL,
1231 : NULL,
1232 : 0,
1233 : 0);
1234 : }
1235 : else
1236 : {
1237 : /*
1238 : * Even though we're not tracking execution time for this statement,
1239 : * we must still increment the nesting level, to ensure that functions
1240 : * evaluated within it are not seen as top-level calls. But don't do
1241 : * so for EXECUTE; that way, when control reaches pgss_planner or
1242 : * pgss_ExecutorStart, we will treat the costs as top-level if
1243 : * appropriate. Likewise, don't bump for PREPARE, so that parse
1244 : * analysis will treat the statement as top-level if appropriate.
1245 : *
1246 : * To be absolutely certain we don't mess up the nesting level,
1247 : * evaluate the bump_level condition just once.
1248 : */
1249 16184 : bool bump_level =
1250 25968 : !IsA(parsetree, ExecuteStmt) &&
1251 9784 : !IsA(parsetree, PrepareStmt);
1252 :
1253 16184 : if (bump_level)
1254 9548 : nesting_level++;
1255 16184 : PG_TRY();
1256 : {
1257 16184 : if (prev_ProcessUtility)
1258 0 : prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1259 : context, params, queryEnv,
1260 : dest, qc);
1261 : else
1262 16184 : standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1263 : context, params, queryEnv,
1264 : dest, qc);
1265 : }
1266 252 : PG_FINALLY();
1267 : {
1268 16184 : if (bump_level)
1269 9548 : nesting_level--;
1270 : }
1271 16184 : PG_END_TRY();
1272 : }
1273 60924 : }
1274 :
1275 : /*
1276 : * Store some statistics for a statement.
1277 : *
1278 : * If jstate is not NULL then we're trying to create an entry for which
1279 : * we have no statistics as yet; we just want to record the normalized
1280 : * query string. total_time, rows, bufusage and walusage are ignored in this
1281 : * case.
1282 : *
1283 : * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1284 : * for the arrays in the Counters field.
1285 : */
1286 : static void
1287 187424 : pgss_store(const char *query, uint64 queryId,
1288 : int query_location, int query_len,
1289 : pgssStoreKind kind,
1290 : double total_time, uint64 rows,
1291 : const BufferUsage *bufusage,
1292 : const WalUsage *walusage,
1293 : const struct JitInstrumentation *jitusage,
1294 : JumbleState *jstate,
1295 : int parallel_workers_to_launch,
1296 : int parallel_workers_launched)
1297 : {
1298 : pgssHashKey key;
1299 : pgssEntry *entry;
1300 187424 : char *norm_query = NULL;
1301 187424 : int encoding = GetDatabaseEncoding();
1302 :
1303 : Assert(query != NULL);
1304 :
1305 : /* Safety check... */
1306 187424 : if (!pgss || !pgss_hash)
1307 0 : return;
1308 :
1309 : /*
1310 : * Nothing to do if compute_query_id isn't enabled and no other module
1311 : * computed a query identifier.
1312 : */
1313 187424 : if (queryId == UINT64CONST(0))
1314 0 : return;
1315 :
1316 : /*
1317 : * Confine our attention to the relevant part of the string, if the query
1318 : * is a portion of a multi-statement source string, and update query
1319 : * location and length if needed.
1320 : */
1321 187424 : query = CleanQuerytext(query, &query_location, &query_len);
1322 :
1323 : /* Set up key for hashtable search */
1324 :
1325 : /* clear padding */
1326 187424 : memset(&key, 0, sizeof(pgssHashKey));
1327 :
1328 187424 : key.userid = GetUserId();
1329 187424 : key.dbid = MyDatabaseId;
1330 187424 : key.queryid = queryId;
1331 187424 : key.toplevel = (nesting_level == 0);
1332 :
1333 : /* Lookup the hash table entry with shared lock. */
1334 187424 : LWLockAcquire(pgss->lock, LW_SHARED);
1335 :
1336 187424 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1337 :
1338 : /* Create new entry, if not present */
1339 187424 : if (!entry)
1340 : {
1341 : Size query_offset;
1342 : int gc_count;
1343 : bool stored;
1344 : bool do_gc;
1345 :
1346 : /*
1347 : * Create a new, normalized query string if caller asked. We don't
1348 : * need to hold the lock while doing this work. (Note: in any case,
1349 : * it's possible that someone else creates a duplicate hashtable entry
1350 : * in the interval where we don't hold the lock below. That case is
1351 : * handled by entry_alloc.)
1352 : */
1353 56002 : if (jstate)
1354 : {
1355 21138 : LWLockRelease(pgss->lock);
1356 21138 : norm_query = generate_normalized_query(jstate, query,
1357 : query_location,
1358 : &query_len);
1359 21138 : LWLockAcquire(pgss->lock, LW_SHARED);
1360 : }
1361 :
1362 : /* Append new query text to file with only shared lock held */
1363 56002 : stored = qtext_store(norm_query ? norm_query : query, query_len,
1364 : &query_offset, &gc_count);
1365 :
1366 : /*
1367 : * Determine whether we need to garbage collect external query texts
1368 : * while the shared lock is still held. This micro-optimization
1369 : * avoids taking the time to decide this while holding exclusive lock.
1370 : */
1371 56002 : do_gc = need_gc_qtexts();
1372 :
1373 : /* Need exclusive lock to make a new hashtable entry - promote */
1374 56002 : LWLockRelease(pgss->lock);
1375 56002 : LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1376 :
1377 : /*
1378 : * A garbage collection may have occurred while we weren't holding the
1379 : * lock. In the unlikely event that this happens, the query text we
1380 : * stored above will have been garbage collected, so write it again.
1381 : * This should be infrequent enough that doing it while holding
1382 : * exclusive lock isn't a performance problem.
1383 : */
1384 56002 : if (!stored || pgss->gc_count != gc_count)
1385 0 : stored = qtext_store(norm_query ? norm_query : query, query_len,
1386 : &query_offset, NULL);
1387 :
1388 : /* If we failed to write to the text file, give up */
1389 56002 : if (!stored)
1390 0 : goto done;
1391 :
1392 : /* OK to create a new hashtable entry */
1393 56002 : entry = entry_alloc(&key, query_offset, query_len, encoding,
1394 : jstate != NULL);
1395 :
1396 : /* If needed, perform garbage collection while exclusive lock held */
1397 56002 : if (do_gc)
1398 0 : gc_qtexts();
1399 : }
1400 :
1401 : /* Increment the counts, except when jstate is not NULL */
1402 187424 : if (!jstate)
1403 : {
1404 : Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1405 :
1406 : /*
1407 : * Grab the spinlock while updating the counters (see comment about
1408 : * locking rules at the head of the file)
1409 : */
1410 117332 : SpinLockAcquire(&entry->mutex);
1411 :
1412 : /* "Unstick" entry if it was previously sticky */
1413 117332 : if (IS_STICKY(entry->counters))
1414 54452 : entry->counters.usage = USAGE_INIT;
1415 :
1416 117332 : entry->counters.calls[kind] += 1;
1417 117332 : entry->counters.total_time[kind] += total_time;
1418 :
1419 117332 : if (entry->counters.calls[kind] == 1)
1420 : {
1421 54560 : entry->counters.min_time[kind] = total_time;
1422 54560 : entry->counters.max_time[kind] = total_time;
1423 54560 : entry->counters.mean_time[kind] = total_time;
1424 : }
1425 : else
1426 : {
1427 : /*
1428 : * Welford's method for accurately computing variance. See
1429 : * <http://www.johndcook.com/blog/standard_deviation/>
1430 : */
1431 62772 : double old_mean = entry->counters.mean_time[kind];
1432 :
1433 62772 : entry->counters.mean_time[kind] +=
1434 62772 : (total_time - old_mean) / entry->counters.calls[kind];
1435 62772 : entry->counters.sum_var_time[kind] +=
1436 62772 : (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1437 :
1438 : /*
1439 : * Calculate min and max time. min = 0 and max = 0 means that the
1440 : * min/max statistics were reset
1441 : */
1442 62772 : if (entry->counters.min_time[kind] == 0
1443 4 : && entry->counters.max_time[kind] == 0)
1444 : {
1445 4 : entry->counters.min_time[kind] = total_time;
1446 4 : entry->counters.max_time[kind] = total_time;
1447 : }
1448 : else
1449 : {
1450 62768 : if (entry->counters.min_time[kind] > total_time)
1451 12694 : entry->counters.min_time[kind] = total_time;
1452 62768 : if (entry->counters.max_time[kind] < total_time)
1453 5704 : entry->counters.max_time[kind] = total_time;
1454 : }
1455 : }
1456 117332 : entry->counters.rows += rows;
1457 117332 : entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1458 117332 : entry->counters.shared_blks_read += bufusage->shared_blks_read;
1459 117332 : entry->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1460 117332 : entry->counters.shared_blks_written += bufusage->shared_blks_written;
1461 117332 : entry->counters.local_blks_hit += bufusage->local_blks_hit;
1462 117332 : entry->counters.local_blks_read += bufusage->local_blks_read;
1463 117332 : entry->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1464 117332 : entry->counters.local_blks_written += bufusage->local_blks_written;
1465 117332 : entry->counters.temp_blks_read += bufusage->temp_blks_read;
1466 117332 : entry->counters.temp_blks_written += bufusage->temp_blks_written;
1467 117332 : entry->counters.shared_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_read_time);
1468 117332 : entry->counters.shared_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_write_time);
1469 117332 : entry->counters.local_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_read_time);
1470 117332 : entry->counters.local_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_write_time);
1471 117332 : entry->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
1472 117332 : entry->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
1473 117332 : entry->counters.usage += USAGE_EXEC(total_time);
1474 117332 : entry->counters.wal_records += walusage->wal_records;
1475 117332 : entry->counters.wal_fpi += walusage->wal_fpi;
1476 117332 : entry->counters.wal_bytes += walusage->wal_bytes;
1477 117332 : entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1478 117332 : if (jitusage)
1479 : {
1480 158 : entry->counters.jit_functions += jitusage->created_functions;
1481 158 : entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1482 :
1483 158 : if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1484 154 : entry->counters.jit_deform_count++;
1485 158 : entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1486 :
1487 158 : if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1488 74 : entry->counters.jit_inlining_count++;
1489 158 : entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1490 :
1491 158 : if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1492 154 : entry->counters.jit_optimization_count++;
1493 158 : entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1494 :
1495 158 : if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1496 154 : entry->counters.jit_emission_count++;
1497 158 : entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1498 : }
1499 :
1500 : /* parallel worker counters */
1501 117332 : entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1502 117332 : entry->counters.parallel_workers_launched += parallel_workers_launched;
1503 :
1504 117332 : SpinLockRelease(&entry->mutex);
1505 : }
1506 :
1507 70092 : done:
1508 187424 : LWLockRelease(pgss->lock);
1509 :
1510 : /* We postpone this clean-up until we're out of the lock */
1511 187424 : if (norm_query)
1512 21138 : pfree(norm_query);
1513 : }
1514 :
1515 : /*
1516 : * Reset statement statistics corresponding to userid, dbid, and queryid.
1517 : */
1518 : Datum
1519 2 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
1520 : {
1521 : Oid userid;
1522 : Oid dbid;
1523 : uint64 queryid;
1524 :
1525 2 : userid = PG_GETARG_OID(0);
1526 2 : dbid = PG_GETARG_OID(1);
1527 2 : queryid = (uint64) PG_GETARG_INT64(2);
1528 :
1529 2 : entry_reset(userid, dbid, queryid, false);
1530 :
1531 2 : PG_RETURN_VOID();
1532 : }
1533 :
1534 : Datum
1535 156 : pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
1536 : {
1537 : Oid userid;
1538 : Oid dbid;
1539 : uint64 queryid;
1540 : bool minmax_only;
1541 :
1542 156 : userid = PG_GETARG_OID(0);
1543 156 : dbid = PG_GETARG_OID(1);
1544 156 : queryid = (uint64) PG_GETARG_INT64(2);
1545 156 : minmax_only = PG_GETARG_BOOL(3);
1546 :
1547 156 : PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1548 : }
1549 :
1550 : /*
1551 : * Reset statement statistics.
1552 : */
1553 : Datum
1554 2 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
1555 : {
1556 2 : entry_reset(0, 0, 0, false);
1557 :
1558 2 : PG_RETURN_VOID();
1559 : }
1560 :
1561 : /* Number of output arguments (columns) for various API versions */
1562 : #define PG_STAT_STATEMENTS_COLS_V1_0 14
1563 : #define PG_STAT_STATEMENTS_COLS_V1_1 18
1564 : #define PG_STAT_STATEMENTS_COLS_V1_2 19
1565 : #define PG_STAT_STATEMENTS_COLS_V1_3 23
1566 : #define PG_STAT_STATEMENTS_COLS_V1_8 32
1567 : #define PG_STAT_STATEMENTS_COLS_V1_9 33
1568 : #define PG_STAT_STATEMENTS_COLS_V1_10 43
1569 : #define PG_STAT_STATEMENTS_COLS_V1_11 49
1570 : #define PG_STAT_STATEMENTS_COLS_V1_12 52
1571 : #define PG_STAT_STATEMENTS_COLS 52 /* maximum of above */
1572 :
1573 : /*
1574 : * Retrieve statement statistics.
1575 : *
1576 : * The SQL API of this function has changed multiple times, and will likely
1577 : * do so again in future. To support the case where a newer version of this
1578 : * loadable module is being used with an old SQL declaration of the function,
1579 : * we continue to support the older API versions. For 1.2 and later, the
1580 : * expected API version is identified by embedding it in the C name of the
1581 : * function. Unfortunately we weren't bright enough to do that for 1.1.
1582 : */
1583 : Datum
1584 176 : pg_stat_statements_1_12(PG_FUNCTION_ARGS)
1585 : {
1586 176 : bool showtext = PG_GETARG_BOOL(0);
1587 :
1588 176 : pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
1589 :
1590 176 : return (Datum) 0;
1591 : }
1592 :
1593 : Datum
1594 2 : pg_stat_statements_1_11(PG_FUNCTION_ARGS)
1595 : {
1596 2 : bool showtext = PG_GETARG_BOOL(0);
1597 :
1598 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1599 :
1600 2 : return (Datum) 0;
1601 : }
1602 :
1603 : Datum
1604 2 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
1605 : {
1606 2 : bool showtext = PG_GETARG_BOOL(0);
1607 :
1608 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1609 :
1610 2 : return (Datum) 0;
1611 : }
1612 :
1613 : Datum
1614 2 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
1615 : {
1616 2 : bool showtext = PG_GETARG_BOOL(0);
1617 :
1618 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1619 :
1620 2 : return (Datum) 0;
1621 : }
1622 :
1623 : Datum
1624 2 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
1625 : {
1626 2 : bool showtext = PG_GETARG_BOOL(0);
1627 :
1628 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1629 :
1630 2 : return (Datum) 0;
1631 : }
1632 :
1633 : Datum
1634 2 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
1635 : {
1636 2 : bool showtext = PG_GETARG_BOOL(0);
1637 :
1638 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1639 :
1640 2 : return (Datum) 0;
1641 : }
1642 :
1643 : Datum
1644 0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
1645 : {
1646 0 : bool showtext = PG_GETARG_BOOL(0);
1647 :
1648 0 : pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1649 :
1650 0 : return (Datum) 0;
1651 : }
1652 :
1653 : /*
1654 : * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1655 : * This can be removed someday, perhaps.
1656 : */
1657 : Datum
1658 0 : pg_stat_statements(PG_FUNCTION_ARGS)
1659 : {
1660 : /* If it's really API 1.1, we'll figure that out below */
1661 0 : pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1662 :
1663 0 : return (Datum) 0;
1664 : }
1665 :
1666 : /* Common code for all versions of pg_stat_statements() */
1667 : static void
1668 186 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
1669 : pgssVersion api_version,
1670 : bool showtext)
1671 : {
1672 186 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1673 186 : Oid userid = GetUserId();
1674 186 : bool is_allowed_role = false;
1675 186 : char *qbuffer = NULL;
1676 186 : Size qbuffer_size = 0;
1677 186 : Size extent = 0;
1678 186 : int gc_count = 0;
1679 : HASH_SEQ_STATUS hash_seq;
1680 : pgssEntry *entry;
1681 :
1682 : /*
1683 : * Superusers or roles with the privileges of pg_read_all_stats members
1684 : * are allowed
1685 : */
1686 186 : is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1687 :
1688 : /* hash table must exist already */
1689 186 : if (!pgss || !pgss_hash)
1690 0 : ereport(ERROR,
1691 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1692 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1693 :
1694 186 : InitMaterializedSRF(fcinfo, 0);
1695 :
1696 : /*
1697 : * Check we have the expected number of output arguments. Aside from
1698 : * being a good safety check, we need a kluge here to detect API version
1699 : * 1.1, which was wedged into the code in an ill-considered way.
1700 : */
1701 186 : switch (rsinfo->setDesc->natts)
1702 : {
1703 0 : case PG_STAT_STATEMENTS_COLS_V1_0:
1704 0 : if (api_version != PGSS_V1_0)
1705 0 : elog(ERROR, "incorrect number of output arguments");
1706 0 : break;
1707 0 : case PG_STAT_STATEMENTS_COLS_V1_1:
1708 : /* pg_stat_statements() should have told us 1.0 */
1709 0 : if (api_version != PGSS_V1_0)
1710 0 : elog(ERROR, "incorrect number of output arguments");
1711 0 : api_version = PGSS_V1_1;
1712 0 : break;
1713 0 : case PG_STAT_STATEMENTS_COLS_V1_2:
1714 0 : if (api_version != PGSS_V1_2)
1715 0 : elog(ERROR, "incorrect number of output arguments");
1716 0 : break;
1717 2 : case PG_STAT_STATEMENTS_COLS_V1_3:
1718 2 : if (api_version != PGSS_V1_3)
1719 0 : elog(ERROR, "incorrect number of output arguments");
1720 2 : break;
1721 2 : case PG_STAT_STATEMENTS_COLS_V1_8:
1722 2 : if (api_version != PGSS_V1_8)
1723 0 : elog(ERROR, "incorrect number of output arguments");
1724 2 : break;
1725 2 : case PG_STAT_STATEMENTS_COLS_V1_9:
1726 2 : if (api_version != PGSS_V1_9)
1727 0 : elog(ERROR, "incorrect number of output arguments");
1728 2 : break;
1729 2 : case PG_STAT_STATEMENTS_COLS_V1_10:
1730 2 : if (api_version != PGSS_V1_10)
1731 0 : elog(ERROR, "incorrect number of output arguments");
1732 2 : break;
1733 2 : case PG_STAT_STATEMENTS_COLS_V1_11:
1734 2 : if (api_version != PGSS_V1_11)
1735 0 : elog(ERROR, "incorrect number of output arguments");
1736 2 : break;
1737 176 : case PG_STAT_STATEMENTS_COLS_V1_12:
1738 176 : if (api_version != PGSS_V1_12)
1739 0 : elog(ERROR, "incorrect number of output arguments");
1740 176 : break;
1741 0 : default:
1742 0 : elog(ERROR, "incorrect number of output arguments");
1743 : }
1744 :
1745 : /*
1746 : * We'd like to load the query text file (if needed) while not holding any
1747 : * lock on pgss->lock. In the worst case we'll have to do this again
1748 : * after we have the lock, but it's unlikely enough to make this a win
1749 : * despite occasional duplicated work. We need to reload if anybody
1750 : * writes to the file (either a retail qtext_store(), or a garbage
1751 : * collection) between this point and where we've gotten shared lock. If
1752 : * a qtext_store is actually in progress when we look, we might as well
1753 : * skip the speculative load entirely.
1754 : */
1755 186 : if (showtext)
1756 : {
1757 : int n_writers;
1758 :
1759 : /* Take the mutex so we can examine variables */
1760 186 : SpinLockAcquire(&pgss->mutex);
1761 186 : extent = pgss->extent;
1762 186 : n_writers = pgss->n_writers;
1763 186 : gc_count = pgss->gc_count;
1764 186 : SpinLockRelease(&pgss->mutex);
1765 :
1766 : /* No point in loading file now if there are active writers */
1767 186 : if (n_writers == 0)
1768 186 : qbuffer = qtext_load_file(&qbuffer_size);
1769 : }
1770 :
1771 : /*
1772 : * Get shared lock, load or reload the query text file if we must, and
1773 : * iterate over the hashtable entries.
1774 : *
1775 : * With a large hash table, we might be holding the lock rather longer
1776 : * than one could wish. However, this only blocks creation of new hash
1777 : * table entries, and the larger the hash table the less likely that is to
1778 : * be needed. So we can hope this is okay. Perhaps someday we'll decide
1779 : * we need to partition the hash table to limit the time spent holding any
1780 : * one lock.
1781 : */
1782 186 : LWLockAcquire(pgss->lock, LW_SHARED);
1783 :
1784 186 : if (showtext)
1785 : {
1786 : /*
1787 : * Here it is safe to examine extent and gc_count without taking the
1788 : * mutex. Note that although other processes might change
1789 : * pgss->extent just after we look at it, the strings they then write
1790 : * into the file cannot yet be referenced in the hashtable, so we
1791 : * don't care whether we see them or not.
1792 : *
1793 : * If qtext_load_file fails, we just press on; we'll return NULL for
1794 : * every query text.
1795 : */
1796 186 : if (qbuffer == NULL ||
1797 186 : pgss->extent != extent ||
1798 186 : pgss->gc_count != gc_count)
1799 : {
1800 0 : free(qbuffer);
1801 0 : qbuffer = qtext_load_file(&qbuffer_size);
1802 : }
1803 : }
1804 :
1805 186 : hash_seq_init(&hash_seq, pgss_hash);
1806 53004 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
1807 : {
1808 : Datum values[PG_STAT_STATEMENTS_COLS];
1809 : bool nulls[PG_STAT_STATEMENTS_COLS];
1810 52818 : int i = 0;
1811 : Counters tmp;
1812 : double stddev;
1813 52818 : int64 queryid = entry->key.queryid;
1814 : TimestampTz stats_since;
1815 : TimestampTz minmax_stats_since;
1816 :
1817 52818 : memset(values, 0, sizeof(values));
1818 52818 : memset(nulls, 0, sizeof(nulls));
1819 :
1820 52818 : values[i++] = ObjectIdGetDatum(entry->key.userid);
1821 52818 : values[i++] = ObjectIdGetDatum(entry->key.dbid);
1822 52818 : if (api_version >= PGSS_V1_9)
1823 52792 : values[i++] = BoolGetDatum(entry->key.toplevel);
1824 :
1825 52818 : if (is_allowed_role || entry->key.userid == userid)
1826 : {
1827 52810 : if (api_version >= PGSS_V1_2)
1828 52810 : values[i++] = Int64GetDatumFast(queryid);
1829 :
1830 52810 : if (showtext)
1831 : {
1832 52810 : char *qstr = qtext_fetch(entry->query_offset,
1833 : entry->query_len,
1834 : qbuffer,
1835 : qbuffer_size);
1836 :
1837 52810 : if (qstr)
1838 : {
1839 : char *enc;
1840 :
1841 52810 : enc = pg_any_to_server(qstr,
1842 : entry->query_len,
1843 : entry->encoding);
1844 :
1845 52810 : values[i++] = CStringGetTextDatum(enc);
1846 :
1847 52810 : if (enc != qstr)
1848 0 : pfree(enc);
1849 : }
1850 : else
1851 : {
1852 : /* Just return a null if we fail to find the text */
1853 0 : nulls[i++] = true;
1854 : }
1855 : }
1856 : else
1857 : {
1858 : /* Query text not requested */
1859 0 : nulls[i++] = true;
1860 : }
1861 : }
1862 : else
1863 : {
1864 : /* Don't show queryid */
1865 8 : if (api_version >= PGSS_V1_2)
1866 8 : nulls[i++] = true;
1867 :
1868 : /*
1869 : * Don't show query text, but hint as to the reason for not doing
1870 : * so if it was requested
1871 : */
1872 8 : if (showtext)
1873 8 : values[i++] = CStringGetTextDatum("<insufficient privilege>");
1874 : else
1875 0 : nulls[i++] = true;
1876 : }
1877 :
1878 : /* copy counters to a local variable to keep locking time short */
1879 52818 : SpinLockAcquire(&entry->mutex);
1880 52818 : tmp = entry->counters;
1881 52818 : SpinLockRelease(&entry->mutex);
1882 :
1883 : /*
1884 : * The spinlock is not required when reading these two as they are
1885 : * always updated when holding pgss->lock exclusively.
1886 : */
1887 52818 : stats_since = entry->stats_since;
1888 52818 : minmax_stats_since = entry->minmax_stats_since;
1889 :
1890 : /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1891 52818 : if (IS_STICKY(tmp))
1892 78 : continue;
1893 :
1894 : /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1895 158220 : for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1896 : {
1897 105480 : if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1898 : {
1899 105472 : values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1900 105472 : values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1901 : }
1902 :
1903 105480 : if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1904 : api_version >= PGSS_V1_8)
1905 : {
1906 105472 : values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1907 105472 : values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1908 105472 : values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1909 :
1910 : /*
1911 : * Note we are calculating the population variance here, not
1912 : * the sample variance, as we have data for the whole
1913 : * population, so Bessel's correction is not used, and we
1914 : * don't divide by tmp.calls - 1.
1915 : */
1916 105472 : if (tmp.calls[kind] > 1)
1917 9454 : stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1918 : else
1919 96018 : stddev = 0.0;
1920 105472 : values[i++] = Float8GetDatumFast(stddev);
1921 : }
1922 : }
1923 52740 : values[i++] = Int64GetDatumFast(tmp.rows);
1924 52740 : values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1925 52740 : values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1926 52740 : if (api_version >= PGSS_V1_1)
1927 52740 : values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1928 52740 : values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1929 52740 : values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1930 52740 : values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1931 52740 : if (api_version >= PGSS_V1_1)
1932 52740 : values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1933 52740 : values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1934 52740 : values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1935 52740 : values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1936 52740 : if (api_version >= PGSS_V1_1)
1937 : {
1938 52740 : values[i++] = Float8GetDatumFast(tmp.shared_blk_read_time);
1939 52740 : values[i++] = Float8GetDatumFast(tmp.shared_blk_write_time);
1940 : }
1941 52740 : if (api_version >= PGSS_V1_11)
1942 : {
1943 52678 : values[i++] = Float8GetDatumFast(tmp.local_blk_read_time);
1944 52678 : values[i++] = Float8GetDatumFast(tmp.local_blk_write_time);
1945 : }
1946 52740 : if (api_version >= PGSS_V1_10)
1947 : {
1948 52700 : values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
1949 52700 : values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
1950 : }
1951 52740 : if (api_version >= PGSS_V1_8)
1952 : {
1953 : char buf[256];
1954 : Datum wal_bytes;
1955 :
1956 52732 : values[i++] = Int64GetDatumFast(tmp.wal_records);
1957 52732 : values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1958 :
1959 52732 : snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1960 :
1961 : /* Convert to numeric. */
1962 52732 : wal_bytes = DirectFunctionCall3(numeric_in,
1963 : CStringGetDatum(buf),
1964 : ObjectIdGetDatum(0),
1965 : Int32GetDatum(-1));
1966 52732 : values[i++] = wal_bytes;
1967 : }
1968 52740 : if (api_version >= PGSS_V1_12)
1969 : {
1970 52652 : values[i++] = Int64GetDatumFast(tmp.wal_buffers_full);
1971 : }
1972 52740 : if (api_version >= PGSS_V1_10)
1973 : {
1974 52700 : values[i++] = Int64GetDatumFast(tmp.jit_functions);
1975 52700 : values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
1976 52700 : values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
1977 52700 : values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
1978 52700 : values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
1979 52700 : values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
1980 52700 : values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
1981 52700 : values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
1982 : }
1983 52740 : if (api_version >= PGSS_V1_11)
1984 : {
1985 52678 : values[i++] = Int64GetDatumFast(tmp.jit_deform_count);
1986 52678 : values[i++] = Float8GetDatumFast(tmp.jit_deform_time);
1987 : }
1988 52740 : if (api_version >= PGSS_V1_12)
1989 : {
1990 52652 : values[i++] = Int64GetDatumFast(tmp.parallel_workers_to_launch);
1991 52652 : values[i++] = Int64GetDatumFast(tmp.parallel_workers_launched);
1992 : }
1993 52740 : if (api_version >= PGSS_V1_11)
1994 : {
1995 52678 : values[i++] = TimestampTzGetDatum(stats_since);
1996 52678 : values[i++] = TimestampTzGetDatum(minmax_stats_since);
1997 : }
1998 :
1999 : Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2000 : api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2001 : api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2002 : api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2003 : api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2004 : api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2005 : api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2006 : api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2007 : api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2008 : -1 /* fail if you forget to update this assert */ ));
2009 :
2010 52740 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2011 : }
2012 :
2013 186 : LWLockRelease(pgss->lock);
2014 :
2015 186 : free(qbuffer);
2016 186 : }
2017 :
2018 : /* Number of output arguments (columns) for pg_stat_statements_info */
2019 : #define PG_STAT_STATEMENTS_INFO_COLS 2
2020 :
2021 : /*
2022 : * Return statistics of pg_stat_statements.
2023 : */
2024 : Datum
2025 4 : pg_stat_statements_info(PG_FUNCTION_ARGS)
2026 : {
2027 : pgssGlobalStats stats;
2028 : TupleDesc tupdesc;
2029 4 : Datum values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2030 4 : bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2031 :
2032 4 : if (!pgss || !pgss_hash)
2033 0 : ereport(ERROR,
2034 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2035 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2036 :
2037 : /* Build a tuple descriptor for our result type */
2038 4 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2039 0 : elog(ERROR, "return type must be a row type");
2040 :
2041 : /* Read global statistics for pg_stat_statements */
2042 4 : SpinLockAcquire(&pgss->mutex);
2043 4 : stats = pgss->stats;
2044 4 : SpinLockRelease(&pgss->mutex);
2045 :
2046 4 : values[0] = Int64GetDatum(stats.dealloc);
2047 4 : values[1] = TimestampTzGetDatum(stats.stats_reset);
2048 :
2049 4 : PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
2050 : }
2051 :
2052 : /*
2053 : * Estimate shared memory space needed.
2054 : */
2055 : static Size
2056 14 : pgss_memsize(void)
2057 : {
2058 : Size size;
2059 :
2060 14 : size = MAXALIGN(sizeof(pgssSharedState));
2061 14 : size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2062 :
2063 14 : return size;
2064 : }
2065 :
2066 : /*
2067 : * Allocate a new hashtable entry.
2068 : * caller must hold an exclusive lock on pgss->lock
2069 : *
2070 : * "query" need not be null-terminated; we rely on query_len instead
2071 : *
2072 : * If "sticky" is true, make the new entry artificially sticky so that it will
2073 : * probably still be there when the query finishes execution. We do this by
2074 : * giving it a median usage value rather than the normal value. (Strictly
2075 : * speaking, query strings are normalized on a best effort basis, though it
2076 : * would be difficult to demonstrate this even under artificial conditions.)
2077 : *
2078 : * Note: despite needing exclusive lock, it's not an error for the target
2079 : * entry to already exist. This is because pgss_store releases and
2080 : * reacquires lock after failing to find a match; so someone else could
2081 : * have made the entry while we waited to get exclusive lock.
2082 : */
2083 : static pgssEntry *
2084 107198 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2085 : bool sticky)
2086 : {
2087 : pgssEntry *entry;
2088 : bool found;
2089 :
2090 : /* Make space if needed */
2091 107198 : while (hash_get_num_entries(pgss_hash) >= pgss_max)
2092 0 : entry_dealloc();
2093 :
2094 : /* Find or create an entry with desired hash code */
2095 107198 : entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2096 :
2097 107198 : if (!found)
2098 : {
2099 : /* New entry, initialize it */
2100 :
2101 : /* reset the statistics */
2102 107198 : memset(&entry->counters, 0, sizeof(Counters));
2103 : /* set the appropriate initial usage count */
2104 107198 : entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2105 : /* re-initialize the mutex each time ... we assume no one using it */
2106 107198 : SpinLockInit(&entry->mutex);
2107 : /* ... and don't forget the query text metadata */
2108 : Assert(query_len >= 0);
2109 107198 : entry->query_offset = query_offset;
2110 107198 : entry->query_len = query_len;
2111 107198 : entry->encoding = encoding;
2112 107198 : entry->stats_since = GetCurrentTimestamp();
2113 107198 : entry->minmax_stats_since = entry->stats_since;
2114 : }
2115 :
2116 107198 : return entry;
2117 : }
2118 :
2119 : /*
2120 : * qsort comparator for sorting into increasing usage order
2121 : */
2122 : static int
2123 0 : entry_cmp(const void *lhs, const void *rhs)
2124 : {
2125 0 : double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2126 0 : double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2127 :
2128 0 : if (l_usage < r_usage)
2129 0 : return -1;
2130 0 : else if (l_usage > r_usage)
2131 0 : return +1;
2132 : else
2133 0 : return 0;
2134 : }
2135 :
2136 : /*
2137 : * Deallocate least-used entries.
2138 : *
2139 : * Caller must hold an exclusive lock on pgss->lock.
2140 : */
2141 : static void
2142 0 : entry_dealloc(void)
2143 : {
2144 : HASH_SEQ_STATUS hash_seq;
2145 : pgssEntry **entries;
2146 : pgssEntry *entry;
2147 : int nvictims;
2148 : int i;
2149 : Size tottextlen;
2150 : int nvalidtexts;
2151 :
2152 : /*
2153 : * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2154 : * While we're scanning the table, apply the decay factor to the usage
2155 : * values, and update the mean query length.
2156 : *
2157 : * Note that the mean query length is almost immediately obsolete, since
2158 : * we compute it before not after discarding the least-used entries.
2159 : * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2160 : * making two passes to get a more current result. Likewise, the new
2161 : * cur_median_usage includes the entries we're about to zap.
2162 : */
2163 :
2164 0 : entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2165 :
2166 0 : i = 0;
2167 0 : tottextlen = 0;
2168 0 : nvalidtexts = 0;
2169 :
2170 0 : hash_seq_init(&hash_seq, pgss_hash);
2171 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2172 : {
2173 0 : entries[i++] = entry;
2174 : /* "Sticky" entries get a different usage decay rate. */
2175 0 : if (IS_STICKY(entry->counters))
2176 0 : entry->counters.usage *= STICKY_DECREASE_FACTOR;
2177 : else
2178 0 : entry->counters.usage *= USAGE_DECREASE_FACTOR;
2179 : /* In the mean length computation, ignore dropped texts. */
2180 0 : if (entry->query_len >= 0)
2181 : {
2182 0 : tottextlen += entry->query_len + 1;
2183 0 : nvalidtexts++;
2184 : }
2185 : }
2186 :
2187 : /* Sort into increasing order by usage */
2188 0 : qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2189 :
2190 : /* Record the (approximate) median usage */
2191 0 : if (i > 0)
2192 0 : pgss->cur_median_usage = entries[i / 2]->counters.usage;
2193 : /* Record the mean query length */
2194 0 : if (nvalidtexts > 0)
2195 0 : pgss->mean_query_len = tottextlen / nvalidtexts;
2196 : else
2197 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2198 :
2199 : /* Now zap an appropriate fraction of lowest-usage entries */
2200 0 : nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2201 0 : nvictims = Min(nvictims, i);
2202 :
2203 0 : for (i = 0; i < nvictims; i++)
2204 : {
2205 0 : hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2206 : }
2207 :
2208 0 : pfree(entries);
2209 :
2210 : /* Increment the number of times entries are deallocated */
2211 0 : SpinLockAcquire(&pgss->mutex);
2212 0 : pgss->stats.dealloc += 1;
2213 0 : SpinLockRelease(&pgss->mutex);
2214 0 : }
2215 :
2216 : /*
2217 : * Given a query string (not necessarily null-terminated), allocate a new
2218 : * entry in the external query text file and store the string there.
2219 : *
2220 : * If successful, returns true, and stores the new entry's offset in the file
2221 : * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2222 : * number of garbage collections that have occurred so far.
2223 : *
2224 : * On failure, returns false.
2225 : *
2226 : * At least a shared lock on pgss->lock must be held by the caller, so as
2227 : * to prevent a concurrent garbage collection. Share-lock-holding callers
2228 : * should pass a gc_count pointer to obtain the number of garbage collections,
2229 : * so that they can recheck the count after obtaining exclusive lock to
2230 : * detect whether a garbage collection occurred (and removed this entry).
2231 : */
2232 : static bool
2233 56002 : qtext_store(const char *query, int query_len,
2234 : Size *query_offset, int *gc_count)
2235 : {
2236 : Size off;
2237 : int fd;
2238 :
2239 : /*
2240 : * We use a spinlock to protect extent/n_writers/gc_count, so that
2241 : * multiple processes may execute this function concurrently.
2242 : */
2243 56002 : SpinLockAcquire(&pgss->mutex);
2244 56002 : off = pgss->extent;
2245 56002 : pgss->extent += query_len + 1;
2246 56002 : pgss->n_writers++;
2247 56002 : if (gc_count)
2248 56002 : *gc_count = pgss->gc_count;
2249 56002 : SpinLockRelease(&pgss->mutex);
2250 :
2251 56002 : *query_offset = off;
2252 :
2253 : /*
2254 : * Don't allow the file to grow larger than what qtext_load_file can
2255 : * (theoretically) handle. This has been seen to be reachable on 32-bit
2256 : * platforms.
2257 : */
2258 56002 : if (unlikely(query_len >= MaxAllocHugeSize - off))
2259 : {
2260 0 : errno = EFBIG; /* not quite right, but it'll do */
2261 0 : fd = -1;
2262 0 : goto error;
2263 : }
2264 :
2265 : /* Now write the data into the successfully-reserved part of the file */
2266 56002 : fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2267 56002 : if (fd < 0)
2268 0 : goto error;
2269 :
2270 56002 : if (pg_pwrite(fd, query, query_len, off) != query_len)
2271 0 : goto error;
2272 56002 : if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2273 0 : goto error;
2274 :
2275 56002 : CloseTransientFile(fd);
2276 :
2277 : /* Mark our write complete */
2278 56002 : SpinLockAcquire(&pgss->mutex);
2279 56002 : pgss->n_writers--;
2280 56002 : SpinLockRelease(&pgss->mutex);
2281 :
2282 56002 : return true;
2283 :
2284 0 : error:
2285 0 : ereport(LOG,
2286 : (errcode_for_file_access(),
2287 : errmsg("could not write file \"%s\": %m",
2288 : PGSS_TEXT_FILE)));
2289 :
2290 0 : if (fd >= 0)
2291 0 : CloseTransientFile(fd);
2292 :
2293 : /* Mark our write complete */
2294 0 : SpinLockAcquire(&pgss->mutex);
2295 0 : pgss->n_writers--;
2296 0 : SpinLockRelease(&pgss->mutex);
2297 :
2298 0 : return false;
2299 : }
2300 :
2301 : /*
2302 : * Read the external query text file into a malloc'd buffer.
2303 : *
2304 : * Returns NULL (without throwing an error) if unable to read, eg
2305 : * file not there or insufficient memory.
2306 : *
2307 : * On success, the buffer size is also returned into *buffer_size.
2308 : *
2309 : * This can be called without any lock on pgss->lock, but in that case
2310 : * the caller is responsible for verifying that the result is sane.
2311 : */
2312 : static char *
2313 196 : qtext_load_file(Size *buffer_size)
2314 : {
2315 : char *buf;
2316 : int fd;
2317 : struct stat stat;
2318 : Size nread;
2319 :
2320 196 : fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
2321 196 : if (fd < 0)
2322 : {
2323 0 : if (errno != ENOENT)
2324 0 : ereport(LOG,
2325 : (errcode_for_file_access(),
2326 : errmsg("could not read file \"%s\": %m",
2327 : PGSS_TEXT_FILE)));
2328 0 : return NULL;
2329 : }
2330 :
2331 : /* Get file length */
2332 196 : if (fstat(fd, &stat))
2333 : {
2334 0 : ereport(LOG,
2335 : (errcode_for_file_access(),
2336 : errmsg("could not stat file \"%s\": %m",
2337 : PGSS_TEXT_FILE)));
2338 0 : CloseTransientFile(fd);
2339 0 : return NULL;
2340 : }
2341 :
2342 : /* Allocate buffer; beware that off_t might be wider than size_t */
2343 196 : if (stat.st_size <= MaxAllocHugeSize)
2344 196 : buf = (char *) malloc(stat.st_size);
2345 : else
2346 0 : buf = NULL;
2347 196 : if (buf == NULL)
2348 : {
2349 0 : ereport(LOG,
2350 : (errcode(ERRCODE_OUT_OF_MEMORY),
2351 : errmsg("out of memory"),
2352 : errdetail("Could not allocate enough memory to read file \"%s\".",
2353 : PGSS_TEXT_FILE)));
2354 0 : CloseTransientFile(fd);
2355 0 : return NULL;
2356 : }
2357 :
2358 : /*
2359 : * OK, slurp in the file. Windows fails if we try to read more than
2360 : * INT_MAX bytes at once, and other platforms might not like that either,
2361 : * so read a very large file in 1GB segments.
2362 : */
2363 196 : nread = 0;
2364 390 : while (nread < stat.st_size)
2365 : {
2366 194 : int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2367 :
2368 : /*
2369 : * If we get a short read and errno doesn't get set, the reason is
2370 : * probably that garbage collection truncated the file since we did
2371 : * the fstat(), so we don't log a complaint --- but we don't return
2372 : * the data, either, since it's most likely corrupt due to concurrent
2373 : * writes from garbage collection.
2374 : */
2375 194 : errno = 0;
2376 194 : if (read(fd, buf + nread, toread) != toread)
2377 : {
2378 0 : if (errno)
2379 0 : ereport(LOG,
2380 : (errcode_for_file_access(),
2381 : errmsg("could not read file \"%s\": %m",
2382 : PGSS_TEXT_FILE)));
2383 0 : free(buf);
2384 0 : CloseTransientFile(fd);
2385 0 : return NULL;
2386 : }
2387 194 : nread += toread;
2388 : }
2389 :
2390 196 : if (CloseTransientFile(fd) != 0)
2391 0 : ereport(LOG,
2392 : (errcode_for_file_access(),
2393 : errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2394 :
2395 196 : *buffer_size = nread;
2396 196 : return buf;
2397 : }
2398 :
2399 : /*
2400 : * Locate a query text in the file image previously read by qtext_load_file().
2401 : *
2402 : * We validate the given offset/length, and return NULL if bogus. Otherwise,
2403 : * the result points to a null-terminated string within the buffer.
2404 : */
2405 : static char *
2406 158670 : qtext_fetch(Size query_offset, int query_len,
2407 : char *buffer, Size buffer_size)
2408 : {
2409 : /* File read failed? */
2410 158670 : if (buffer == NULL)
2411 0 : return NULL;
2412 : /* Bogus offset/length? */
2413 158670 : if (query_len < 0 ||
2414 158670 : query_offset + query_len >= buffer_size)
2415 0 : return NULL;
2416 : /* As a further sanity check, make sure there's a trailing null */
2417 158670 : if (buffer[query_offset + query_len] != '\0')
2418 0 : return NULL;
2419 : /* Looks OK */
2420 158670 : return buffer + query_offset;
2421 : }
2422 :
2423 : /*
2424 : * Do we need to garbage-collect the external query text file?
2425 : *
2426 : * Caller should hold at least a shared lock on pgss->lock.
2427 : */
2428 : static bool
2429 56002 : need_gc_qtexts(void)
2430 : {
2431 : Size extent;
2432 :
2433 : /* Read shared extent pointer */
2434 56002 : SpinLockAcquire(&pgss->mutex);
2435 56002 : extent = pgss->extent;
2436 56002 : SpinLockRelease(&pgss->mutex);
2437 :
2438 : /*
2439 : * Don't proceed if file does not exceed 512 bytes per possible entry.
2440 : *
2441 : * Here and in the next test, 32-bit machines have overflow hazards if
2442 : * pgss_max and/or mean_query_len are large. Force the multiplications
2443 : * and comparisons to be done in uint64 arithmetic to forestall trouble.
2444 : */
2445 56002 : if ((uint64) extent < (uint64) 512 * pgss_max)
2446 56002 : return false;
2447 :
2448 : /*
2449 : * Don't proceed if file is less than about 50% bloat. Nothing can or
2450 : * should be done in the event of unusually large query texts accounting
2451 : * for file's large size. We go to the trouble of maintaining the mean
2452 : * query length in order to prevent garbage collection from thrashing
2453 : * uselessly.
2454 : */
2455 0 : if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2456 0 : return false;
2457 :
2458 0 : return true;
2459 : }
2460 :
2461 : /*
2462 : * Garbage-collect orphaned query texts in external file.
2463 : *
2464 : * This won't be called often in the typical case, since it's likely that
2465 : * there won't be too much churn, and besides, a similar compaction process
2466 : * occurs when serializing to disk at shutdown or as part of resetting.
2467 : * Despite this, it seems prudent to plan for the edge case where the file
2468 : * becomes unreasonably large, with no other method of compaction likely to
2469 : * occur in the foreseeable future.
2470 : *
2471 : * The caller must hold an exclusive lock on pgss->lock.
2472 : *
2473 : * At the first sign of trouble we unlink the query text file to get a clean
2474 : * slate (although existing statistics are retained), rather than risk
2475 : * thrashing by allowing the same problem case to recur indefinitely.
2476 : */
2477 : static void
2478 0 : gc_qtexts(void)
2479 : {
2480 : char *qbuffer;
2481 : Size qbuffer_size;
2482 0 : FILE *qfile = NULL;
2483 : HASH_SEQ_STATUS hash_seq;
2484 : pgssEntry *entry;
2485 : Size extent;
2486 : int nentries;
2487 :
2488 : /*
2489 : * When called from pgss_store, some other session might have proceeded
2490 : * with garbage collection in the no-lock-held interim of lock strength
2491 : * escalation. Check once more that this is actually necessary.
2492 : */
2493 0 : if (!need_gc_qtexts())
2494 0 : return;
2495 :
2496 : /*
2497 : * Load the old texts file. If we fail (out of memory, for instance),
2498 : * invalidate query texts. Hopefully this is rare. It might seem better
2499 : * to leave things alone on an OOM failure, but the problem is that the
2500 : * file is only going to get bigger; hoping for a future non-OOM result is
2501 : * risky and can easily lead to complete denial of service.
2502 : */
2503 0 : qbuffer = qtext_load_file(&qbuffer_size);
2504 0 : if (qbuffer == NULL)
2505 0 : goto gc_fail;
2506 :
2507 : /*
2508 : * We overwrite the query texts file in place, so as to reduce the risk of
2509 : * an out-of-disk-space failure. Since the file is guaranteed not to get
2510 : * larger, this should always work on traditional filesystems; though we
2511 : * could still lose on copy-on-write filesystems.
2512 : */
2513 0 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2514 0 : if (qfile == NULL)
2515 : {
2516 0 : ereport(LOG,
2517 : (errcode_for_file_access(),
2518 : errmsg("could not write file \"%s\": %m",
2519 : PGSS_TEXT_FILE)));
2520 0 : goto gc_fail;
2521 : }
2522 :
2523 0 : extent = 0;
2524 0 : nentries = 0;
2525 :
2526 0 : hash_seq_init(&hash_seq, pgss_hash);
2527 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2528 : {
2529 0 : int query_len = entry->query_len;
2530 0 : char *qry = qtext_fetch(entry->query_offset,
2531 : query_len,
2532 : qbuffer,
2533 : qbuffer_size);
2534 :
2535 0 : if (qry == NULL)
2536 : {
2537 : /* Trouble ... drop the text */
2538 0 : entry->query_offset = 0;
2539 0 : entry->query_len = -1;
2540 : /* entry will not be counted in mean query length computation */
2541 0 : continue;
2542 : }
2543 :
2544 0 : if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2545 : {
2546 0 : ereport(LOG,
2547 : (errcode_for_file_access(),
2548 : errmsg("could not write file \"%s\": %m",
2549 : PGSS_TEXT_FILE)));
2550 0 : hash_seq_term(&hash_seq);
2551 0 : goto gc_fail;
2552 : }
2553 :
2554 0 : entry->query_offset = extent;
2555 0 : extent += query_len + 1;
2556 0 : nentries++;
2557 : }
2558 :
2559 : /*
2560 : * Truncate away any now-unused space. If this fails for some odd reason,
2561 : * we log it, but there's no need to fail.
2562 : */
2563 0 : if (ftruncate(fileno(qfile), extent) != 0)
2564 0 : ereport(LOG,
2565 : (errcode_for_file_access(),
2566 : errmsg("could not truncate file \"%s\": %m",
2567 : PGSS_TEXT_FILE)));
2568 :
2569 0 : if (FreeFile(qfile))
2570 : {
2571 0 : ereport(LOG,
2572 : (errcode_for_file_access(),
2573 : errmsg("could not write file \"%s\": %m",
2574 : PGSS_TEXT_FILE)));
2575 0 : qfile = NULL;
2576 0 : goto gc_fail;
2577 : }
2578 :
2579 0 : elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2580 : pgss->extent, extent);
2581 :
2582 : /* Reset the shared extent pointer */
2583 0 : pgss->extent = extent;
2584 :
2585 : /*
2586 : * Also update the mean query length, to be sure that need_gc_qtexts()
2587 : * won't still think we have a problem.
2588 : */
2589 0 : if (nentries > 0)
2590 0 : pgss->mean_query_len = extent / nentries;
2591 : else
2592 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2593 :
2594 0 : free(qbuffer);
2595 :
2596 : /*
2597 : * OK, count a garbage collection cycle. (Note: even though we have
2598 : * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2599 : * other processes may examine gc_count while holding only the mutex.
2600 : * Also, we have to advance the count *after* we've rewritten the file,
2601 : * else other processes might not realize they read a stale file.)
2602 : */
2603 0 : record_gc_qtexts();
2604 :
2605 0 : return;
2606 :
2607 0 : gc_fail:
2608 : /* clean up resources */
2609 0 : if (qfile)
2610 0 : FreeFile(qfile);
2611 0 : free(qbuffer);
2612 :
2613 : /*
2614 : * Since the contents of the external file are now uncertain, mark all
2615 : * hashtable entries as having invalid texts.
2616 : */
2617 0 : hash_seq_init(&hash_seq, pgss_hash);
2618 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2619 : {
2620 0 : entry->query_offset = 0;
2621 0 : entry->query_len = -1;
2622 : }
2623 :
2624 : /*
2625 : * Destroy the query text file and create a new, empty one
2626 : */
2627 0 : (void) unlink(PGSS_TEXT_FILE);
2628 0 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2629 0 : if (qfile == NULL)
2630 0 : ereport(LOG,
2631 : (errcode_for_file_access(),
2632 : errmsg("could not recreate file \"%s\": %m",
2633 : PGSS_TEXT_FILE)));
2634 : else
2635 0 : FreeFile(qfile);
2636 :
2637 : /* Reset the shared extent pointer */
2638 0 : pgss->extent = 0;
2639 :
2640 : /* Reset mean_query_len to match the new state */
2641 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2642 :
2643 : /*
2644 : * Bump the GC count even though we failed.
2645 : *
2646 : * This is needed to make concurrent readers of file without any lock on
2647 : * pgss->lock notice existence of new version of file. Once readers
2648 : * subsequently observe a change in GC count with pgss->lock held, that
2649 : * forces a safe reopen of file. Writers also require that we bump here,
2650 : * of course. (As required by locking protocol, readers and writers don't
2651 : * trust earlier file contents until gc_count is found unchanged after
2652 : * pgss->lock acquired in shared or exclusive mode respectively.)
2653 : */
2654 0 : record_gc_qtexts();
2655 : }
2656 :
2657 : #define SINGLE_ENTRY_RESET(e) \
2658 : if (e) { \
2659 : if (minmax_only) { \
2660 : /* When requested reset only min/max statistics of an entry */ \
2661 : for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2662 : { \
2663 : e->counters.max_time[kind] = 0; \
2664 : e->counters.min_time[kind] = 0; \
2665 : } \
2666 : e->minmax_stats_since = stats_reset; \
2667 : } \
2668 : else \
2669 : { \
2670 : /* Remove the key otherwise */ \
2671 : hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2672 : num_remove++; \
2673 : } \
2674 : }
2675 :
2676 : /*
2677 : * Reset entries corresponding to parameters passed.
2678 : */
2679 : static TimestampTz
2680 160 : entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
2681 : {
2682 : HASH_SEQ_STATUS hash_seq;
2683 : pgssEntry *entry;
2684 : FILE *qfile;
2685 : long num_entries;
2686 160 : long num_remove = 0;
2687 : pgssHashKey key;
2688 : TimestampTz stats_reset;
2689 :
2690 160 : if (!pgss || !pgss_hash)
2691 0 : ereport(ERROR,
2692 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2693 : errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2694 :
2695 160 : LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
2696 160 : num_entries = hash_get_num_entries(pgss_hash);
2697 :
2698 160 : stats_reset = GetCurrentTimestamp();
2699 :
2700 160 : if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2701 : {
2702 : /* If all the parameters are available, use the fast path. */
2703 2 : memset(&key, 0, sizeof(pgssHashKey));
2704 2 : key.userid = userid;
2705 2 : key.dbid = dbid;
2706 2 : key.queryid = queryid;
2707 :
2708 : /*
2709 : * Reset the entry if it exists, starting with the non-top-level
2710 : * entry.
2711 : */
2712 2 : key.toplevel = false;
2713 2 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2714 :
2715 2 : SINGLE_ENTRY_RESET(entry);
2716 :
2717 : /* Also reset the top-level entry if it exists. */
2718 2 : key.toplevel = true;
2719 2 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2720 :
2721 2 : SINGLE_ENTRY_RESET(entry);
2722 : }
2723 158 : else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2724 : {
2725 : /* Reset entries corresponding to valid parameters. */
2726 8 : hash_seq_init(&hash_seq, pgss_hash);
2727 102 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2728 : {
2729 94 : if ((!userid || entry->key.userid == userid) &&
2730 72 : (!dbid || entry->key.dbid == dbid) &&
2731 68 : (!queryid || entry->key.queryid == queryid))
2732 : {
2733 14 : SINGLE_ENTRY_RESET(entry);
2734 : }
2735 : }
2736 : }
2737 : else
2738 : {
2739 : /* Reset all entries. */
2740 150 : hash_seq_init(&hash_seq, pgss_hash);
2741 1490 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2742 : {
2743 1384 : SINGLE_ENTRY_RESET(entry);
2744 : }
2745 : }
2746 :
2747 : /* All entries are removed? */
2748 160 : if (num_entries != num_remove)
2749 12 : goto release_lock;
2750 :
2751 : /*
2752 : * Reset global statistics for pg_stat_statements since all entries are
2753 : * removed.
2754 : */
2755 148 : SpinLockAcquire(&pgss->mutex);
2756 148 : pgss->stats.dealloc = 0;
2757 148 : pgss->stats.stats_reset = stats_reset;
2758 148 : SpinLockRelease(&pgss->mutex);
2759 :
2760 : /*
2761 : * Write new empty query file, perhaps even creating a new one to recover
2762 : * if the file was missing.
2763 : */
2764 148 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2765 148 : if (qfile == NULL)
2766 : {
2767 0 : ereport(LOG,
2768 : (errcode_for_file_access(),
2769 : errmsg("could not create file \"%s\": %m",
2770 : PGSS_TEXT_FILE)));
2771 0 : goto done;
2772 : }
2773 :
2774 : /* If ftruncate fails, log it, but it's not a fatal problem */
2775 148 : if (ftruncate(fileno(qfile), 0) != 0)
2776 0 : ereport(LOG,
2777 : (errcode_for_file_access(),
2778 : errmsg("could not truncate file \"%s\": %m",
2779 : PGSS_TEXT_FILE)));
2780 :
2781 148 : FreeFile(qfile);
2782 :
2783 148 : done:
2784 148 : pgss->extent = 0;
2785 : /* This counts as a query text garbage collection for our purposes */
2786 148 : record_gc_qtexts();
2787 :
2788 160 : release_lock:
2789 160 : LWLockRelease(pgss->lock);
2790 :
2791 160 : return stats_reset;
2792 : }
2793 :
2794 : /*
2795 : * Generate a normalized version of the query string that will be used to
2796 : * represent all similar queries.
2797 : *
2798 : * Note that the normalized representation may well vary depending on
2799 : * just which "equivalent" query is used to create the hashtable entry.
2800 : * We assume this is OK.
2801 : *
2802 : * If query_loc > 0, then "query" has been advanced by that much compared to
2803 : * the original string start, so we need to translate the provided locations
2804 : * to compensate. (This lets us avoid re-scanning statements before the one
2805 : * of interest, so it's worth doing.)
2806 : *
2807 : * *query_len_p contains the input string length, and is updated with
2808 : * the result string length on exit. The resulting string might be longer
2809 : * or shorter depending on what happens with replacement of constants.
2810 : *
2811 : * Returns a palloc'd string.
2812 : */
2813 : static char *
2814 21138 : generate_normalized_query(JumbleState *jstate, const char *query,
2815 : int query_loc, int *query_len_p)
2816 : {
2817 : char *norm_query;
2818 21138 : int query_len = *query_len_p;
2819 : int i,
2820 : norm_query_buflen, /* Space allowed for norm_query */
2821 : len_to_wrt, /* Length (in bytes) to write */
2822 21138 : quer_loc = 0, /* Source query byte location */
2823 21138 : n_quer_loc = 0, /* Normalized query byte location */
2824 21138 : last_off = 0, /* Offset from start for previous tok */
2825 21138 : last_tok_len = 0; /* Length (in bytes) of that tok */
2826 :
2827 : /*
2828 : * Get constants' lengths (core system only gives us locations). Note
2829 : * this also ensures the items are sorted by location.
2830 : */
2831 21138 : fill_in_constant_lengths(jstate, query, query_loc);
2832 :
2833 : /*
2834 : * Allow for $n symbols to be longer than the constants they replace.
2835 : * Constants must take at least one byte in text form, while a $n symbol
2836 : * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2837 : * could refine that limit based on the max value of n for the current
2838 : * query, but it hardly seems worth any extra effort to do so.
2839 : */
2840 21138 : norm_query_buflen = query_len + jstate->clocations_count * 10;
2841 :
2842 : /* Allocate result buffer */
2843 21138 : norm_query = palloc(norm_query_buflen + 1);
2844 :
2845 86612 : for (i = 0; i < jstate->clocations_count; i++)
2846 : {
2847 : int off, /* Offset from start for cur tok */
2848 : tok_len; /* Length (in bytes) of that tok */
2849 :
2850 65474 : off = jstate->clocations[i].location;
2851 : /* Adjust recorded location if we're dealing with partial string */
2852 65474 : off -= query_loc;
2853 :
2854 65474 : tok_len = jstate->clocations[i].length;
2855 :
2856 65474 : if (tok_len < 0)
2857 468 : continue; /* ignore any duplicates */
2858 :
2859 : /* Copy next chunk (what precedes the next constant) */
2860 65006 : len_to_wrt = off - last_off;
2861 65006 : len_to_wrt -= last_tok_len;
2862 :
2863 : Assert(len_to_wrt >= 0);
2864 65006 : memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2865 65006 : n_quer_loc += len_to_wrt;
2866 :
2867 : /* And insert a param symbol in place of the constant token */
2868 130012 : n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2869 65006 : i + 1 + jstate->highest_extern_param_id);
2870 :
2871 65006 : quer_loc = off + tok_len;
2872 65006 : last_off = off;
2873 65006 : last_tok_len = tok_len;
2874 : }
2875 :
2876 : /*
2877 : * We've copied up until the last ignorable constant. Copy over the
2878 : * remaining bytes of the original query string.
2879 : */
2880 21138 : len_to_wrt = query_len - quer_loc;
2881 :
2882 : Assert(len_to_wrt >= 0);
2883 21138 : memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2884 21138 : n_quer_loc += len_to_wrt;
2885 :
2886 : Assert(n_quer_loc <= norm_query_buflen);
2887 21138 : norm_query[n_quer_loc] = '\0';
2888 :
2889 21138 : *query_len_p = n_quer_loc;
2890 21138 : return norm_query;
2891 : }
2892 :
2893 : /*
2894 : * Given a valid SQL string and an array of constant-location records,
2895 : * fill in the textual lengths of those constants.
2896 : *
2897 : * The constants may use any allowed constant syntax, such as float literals,
2898 : * bit-strings, single-quoted strings and dollar-quoted strings. This is
2899 : * accomplished by using the public API for the core scanner.
2900 : *
2901 : * It is the caller's job to ensure that the string is a valid SQL statement
2902 : * with constants at the indicated locations. Since in practice the string
2903 : * has already been parsed, and the locations that the caller provides will
2904 : * have originated from within the authoritative parser, this should not be
2905 : * a problem.
2906 : *
2907 : * Duplicate constant pointers are possible, and will have their lengths
2908 : * marked as '-1', so that they are later ignored. (Actually, we assume the
2909 : * lengths were initialized as -1 to start with, and don't change them here.)
2910 : *
2911 : * If query_loc > 0, then "query" has been advanced by that much compared to
2912 : * the original string start, so we need to translate the provided locations
2913 : * to compensate. (This lets us avoid re-scanning statements before the one
2914 : * of interest, so it's worth doing.)
2915 : *
2916 : * N.B. There is an assumption that a '-' character at a Const location begins
2917 : * a negative numeric constant. This precludes there ever being another
2918 : * reason for a constant to start with a '-'.
2919 : */
2920 : static void
2921 21138 : fill_in_constant_lengths(JumbleState *jstate, const char *query,
2922 : int query_loc)
2923 : {
2924 : LocationLen *locs;
2925 : core_yyscan_t yyscanner;
2926 : core_yy_extra_type yyextra;
2927 : core_YYSTYPE yylval;
2928 : YYLTYPE yylloc;
2929 21138 : int last_loc = -1;
2930 : int i;
2931 :
2932 : /*
2933 : * Sort the records by location so that we can process them in order while
2934 : * scanning the query text.
2935 : */
2936 21138 : if (jstate->clocations_count > 1)
2937 13756 : qsort(jstate->clocations, jstate->clocations_count,
2938 : sizeof(LocationLen), comp_location);
2939 21138 : locs = jstate->clocations;
2940 :
2941 : /* initialize the flex scanner --- should match raw_parser() */
2942 21138 : yyscanner = scanner_init(query,
2943 : &yyextra,
2944 : &ScanKeywords,
2945 : ScanKeywordTokens);
2946 :
2947 : /* we don't want to re-emit any escape string warnings */
2948 21138 : yyextra.escape_string_warning = false;
2949 :
2950 : /* Search for each constant, in sequence */
2951 86612 : for (i = 0; i < jstate->clocations_count; i++)
2952 : {
2953 65474 : int loc = locs[i].location;
2954 : int tok;
2955 :
2956 : /* Adjust recorded location if we're dealing with partial string */
2957 65474 : loc -= query_loc;
2958 :
2959 : Assert(loc >= 0);
2960 :
2961 65474 : if (loc <= last_loc)
2962 468 : continue; /* Duplicate constant, ignore */
2963 :
2964 : /* Lex tokens until we find the desired constant */
2965 : for (;;)
2966 : {
2967 484402 : tok = core_yylex(&yylval, &yylloc, yyscanner);
2968 :
2969 : /* We should not hit end-of-string, but if we do, behave sanely */
2970 484402 : if (tok == 0)
2971 0 : break; /* out of inner for-loop */
2972 :
2973 : /*
2974 : * We should find the token position exactly, but if we somehow
2975 : * run past it, work with that.
2976 : */
2977 484402 : if (yylloc >= loc)
2978 : {
2979 65006 : if (query[loc] == '-')
2980 : {
2981 : /*
2982 : * It's a negative value - this is the one and only case
2983 : * where we replace more than a single token.
2984 : *
2985 : * Do not compensate for the core system's special-case
2986 : * adjustment of location to that of the leading '-'
2987 : * operator in the event of a negative constant. It is
2988 : * also useful for our purposes to start from the minus
2989 : * symbol. In this way, queries like "select * from foo
2990 : * where bar = 1" and "select * from foo where bar = -2"
2991 : * will have identical normalized query strings.
2992 : */
2993 824 : tok = core_yylex(&yylval, &yylloc, yyscanner);
2994 824 : if (tok == 0)
2995 0 : break; /* out of inner for-loop */
2996 : }
2997 :
2998 : /*
2999 : * We now rely on the assumption that flex has placed a zero
3000 : * byte after the text of the current token in scanbuf.
3001 : */
3002 65006 : locs[i].length = strlen(yyextra.scanbuf + loc);
3003 65006 : break; /* out of inner for-loop */
3004 : }
3005 : }
3006 :
3007 : /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3008 65006 : if (tok == 0)
3009 0 : break;
3010 :
3011 65006 : last_loc = loc;
3012 : }
3013 :
3014 21138 : scanner_finish(yyscanner);
3015 21138 : }
3016 :
3017 : /*
3018 : * comp_location: comparator for qsorting LocationLen structs by location
3019 : */
3020 : static int
3021 76156 : comp_location(const void *a, const void *b)
3022 : {
3023 76156 : int l = ((const LocationLen *) a)->location;
3024 76156 : int r = ((const LocationLen *) b)->location;
3025 :
3026 76156 : return pg_cmp_s32(l, r);
3027 : }
|