Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_stat_statements.c
4 : * Track statement planning and execution times as well as resource
5 : * usage across a whole database cluster.
6 : *
7 : * Execution costs are totaled for each distinct source query, and kept in
8 : * a shared hashtable. (We track only as many distinct queries as will fit
9 : * in the designated amount of shared memory.)
10 : *
11 : * Starting in Postgres 9.2, this module normalized query entries. As of
12 : * Postgres 14, the normalization is done by the core if compute_query_id is
13 : * enabled, or optionally by third-party modules.
14 : *
15 : * To facilitate presenting entries to users, we create "representative" query
16 : * strings in which constants are replaced with parameter symbols ($n), to
17 : * make it clearer what a normalized entry can represent. To save on shared
18 : * memory, and to avoid having to truncate oversized query strings, we store
19 : * these strings in a temporary external query-texts file. Offsets into this
20 : * file are kept in shared memory.
21 : *
22 : * Note about locking issues: to create or delete an entry in the shared
23 : * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 : * in an entry except the counters requires the same. To look up an entry,
25 : * one must hold the lock shared. To read or update the counters within
26 : * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 : * disappear!) and also take the entry's mutex spinlock.
28 : * The shared state variable pgss->extent (the next free spot in the external
29 : * query-text file) should be accessed only while holding either the
30 : * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 : * allow reserving file space while holding only shared lock on pgss->lock.
32 : * Rewriting the entire external query-text file, eg for garbage collection,
33 : * requires holding pgss->lock exclusively; this allows individual entries
34 : * in the file to be read or written while holding only shared lock.
35 : *
36 : *
37 : * Copyright (c) 2008-2024, PostgreSQL Global Development Group
38 : *
39 : * IDENTIFICATION
40 : * contrib/pg_stat_statements/pg_stat_statements.c
41 : *
42 : *-------------------------------------------------------------------------
43 : */
44 : #include "postgres.h"
45 :
46 : #include <math.h>
47 : #include <sys/stat.h>
48 : #include <unistd.h>
49 :
50 : #include "access/parallel.h"
51 : #include "catalog/pg_authid.h"
52 : #include "common/hashfn.h"
53 : #include "common/int.h"
54 : #include "executor/instrument.h"
55 : #include "funcapi.h"
56 : #include "jit/jit.h"
57 : #include "mb/pg_wchar.h"
58 : #include "miscadmin.h"
59 : #include "nodes/queryjumble.h"
60 : #include "optimizer/planner.h"
61 : #include "parser/analyze.h"
62 : #include "parser/parsetree.h"
63 : #include "parser/scanner.h"
64 : #include "parser/scansup.h"
65 : #include "pgstat.h"
66 : #include "storage/fd.h"
67 : #include "storage/ipc.h"
68 : #include "storage/lwlock.h"
69 : #include "storage/shmem.h"
70 : #include "storage/spin.h"
71 : #include "tcop/utility.h"
72 : #include "utils/acl.h"
73 : #include "utils/builtins.h"
74 : #include "utils/memutils.h"
75 : #include "utils/timestamp.h"
76 :
77 14 : PG_MODULE_MAGIC;
78 :
79 : /* Location of permanent stats file (valid when database is shut down) */
80 : #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
81 :
82 : /*
83 : * Location of external query text file.
84 : */
85 : #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
86 :
87 : /* Magic number identifying the stats file format */
88 : static const uint32 PGSS_FILE_HEADER = 0x20220408;
89 :
90 : /* PostgreSQL major version number, changes in which invalidate all entries */
91 : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
92 :
93 : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
94 : #define USAGE_EXEC(duration) (1.0)
95 : #define USAGE_INIT (1.0) /* including initial planning */
96 : #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
97 : #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
98 : #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
99 : #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
100 : #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
101 : #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
102 :
103 : /*
104 : * Extension version number, for supporting older extension versions' objects
105 : */
106 : typedef enum pgssVersion
107 : {
108 : PGSS_V1_0 = 0,
109 : PGSS_V1_1,
110 : PGSS_V1_2,
111 : PGSS_V1_3,
112 : PGSS_V1_8,
113 : PGSS_V1_9,
114 : PGSS_V1_10,
115 : PGSS_V1_11,
116 : } pgssVersion;
117 :
118 : typedef enum pgssStoreKind
119 : {
120 : PGSS_INVALID = -1,
121 :
122 : /*
123 : * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
124 : * reference the underlying values in the arrays in the Counters struct,
125 : * and this order is required in pg_stat_statements_internal().
126 : */
127 : PGSS_PLAN = 0,
128 : PGSS_EXEC,
129 :
130 : PGSS_NUMKIND /* Must be last value of this enum */
131 : } pgssStoreKind;
132 :
133 : /*
134 : * Hashtable key that defines the identity of a hashtable entry. We separate
135 : * queries by user and by database even if they are otherwise identical.
136 : *
137 : * If you add a new key to this struct, make sure to teach pgss_store() to
138 : * zero the padding bytes. Otherwise, things will break, because pgss_hash is
139 : * created using HASH_BLOBS, and thus tag_hash is used to hash this.
140 :
141 : */
142 : typedef struct pgssHashKey
143 : {
144 : Oid userid; /* user OID */
145 : Oid dbid; /* database OID */
146 : uint64 queryid; /* query identifier */
147 : bool toplevel; /* query executed at top level */
148 : } pgssHashKey;
149 :
150 : /*
151 : * The actual stats counters kept within pgssEntry.
152 : */
153 : typedef struct Counters
154 : {
155 : int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
156 : double total_time[PGSS_NUMKIND]; /* total planning/execution time,
157 : * in msec */
158 : double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
159 : * msec since min/max reset */
160 : double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
161 : * msec since min/max reset */
162 : double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
163 : * msec */
164 : double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
165 : * planning/execution time in msec */
166 : int64 rows; /* total # of retrieved or affected rows */
167 : int64 shared_blks_hit; /* # of shared buffer hits */
168 : int64 shared_blks_read; /* # of shared disk blocks read */
169 : int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
170 : int64 shared_blks_written; /* # of shared disk blocks written */
171 : int64 local_blks_hit; /* # of local buffer hits */
172 : int64 local_blks_read; /* # of local disk blocks read */
173 : int64 local_blks_dirtied; /* # of local disk blocks dirtied */
174 : int64 local_blks_written; /* # of local disk blocks written */
175 : int64 temp_blks_read; /* # of temp blocks read */
176 : int64 temp_blks_written; /* # of temp blocks written */
177 : double shared_blk_read_time; /* time spent reading shared blocks,
178 : * in msec */
179 : double shared_blk_write_time; /* time spent writing shared blocks,
180 : * in msec */
181 : double local_blk_read_time; /* time spent reading local blocks, in
182 : * msec */
183 : double local_blk_write_time; /* time spent writing local blocks, in
184 : * msec */
185 : double temp_blk_read_time; /* time spent reading temp blocks, in msec */
186 : double temp_blk_write_time; /* time spent writing temp blocks, in
187 : * msec */
188 : double usage; /* usage factor */
189 : int64 wal_records; /* # of WAL records generated */
190 : int64 wal_fpi; /* # of WAL full page images generated */
191 : uint64 wal_bytes; /* total amount of WAL generated in bytes */
192 : int64 jit_functions; /* total number of JIT functions emitted */
193 : double jit_generation_time; /* total time to generate jit code */
194 : int64 jit_inlining_count; /* number of times inlining time has been
195 : * > 0 */
196 : double jit_deform_time; /* total time to deform tuples in jit code */
197 : int64 jit_deform_count; /* number of times deform time has been >
198 : * 0 */
199 :
200 : double jit_inlining_time; /* total time to inline jit code */
201 : int64 jit_optimization_count; /* number of times optimization time
202 : * has been > 0 */
203 : double jit_optimization_time; /* total time to optimize jit code */
204 : int64 jit_emission_count; /* number of times emission time has been
205 : * > 0 */
206 : double jit_emission_time; /* total time to emit jit code */
207 : } Counters;
208 :
209 : /*
210 : * Global statistics for pg_stat_statements
211 : */
212 : typedef struct pgssGlobalStats
213 : {
214 : int64 dealloc; /* # of times entries were deallocated */
215 : TimestampTz stats_reset; /* timestamp with all stats reset */
216 : } pgssGlobalStats;
217 :
218 : /*
219 : * Statistics per statement
220 : *
221 : * Note: in event of a failure in garbage collection of the query text file,
222 : * we reset query_offset to zero and query_len to -1. This will be seen as
223 : * an invalid state by qtext_fetch().
224 : */
225 : typedef struct pgssEntry
226 : {
227 : pgssHashKey key; /* hash key of entry - MUST BE FIRST */
228 : Counters counters; /* the statistics for this query */
229 : Size query_offset; /* query text offset in external file */
230 : int query_len; /* # of valid bytes in query string, or -1 */
231 : int encoding; /* query text encoding */
232 : TimestampTz stats_since; /* timestamp of entry allocation */
233 : TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
234 : slock_t mutex; /* protects the counters only */
235 : } pgssEntry;
236 :
237 : /*
238 : * Global shared state
239 : */
240 : typedef struct pgssSharedState
241 : {
242 : LWLock *lock; /* protects hashtable search/modification */
243 : double cur_median_usage; /* current median usage in hashtable */
244 : Size mean_query_len; /* current mean entry text length */
245 : slock_t mutex; /* protects following fields only: */
246 : Size extent; /* current extent of query file */
247 : int n_writers; /* number of active writers to query file */
248 : int gc_count; /* query file garbage collection cycle count */
249 : pgssGlobalStats stats; /* global statistics for pgss */
250 : } pgssSharedState;
251 :
252 : /*---- Local variables ----*/
253 :
254 : /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
255 : static int nesting_level = 0;
256 :
257 : /* Saved hook values in case of unload */
258 : static shmem_request_hook_type prev_shmem_request_hook = NULL;
259 : static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
260 : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
261 : static planner_hook_type prev_planner_hook = NULL;
262 : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
263 : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
264 : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
265 : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
266 : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
267 :
268 : /* Links to shared memory state */
269 : static pgssSharedState *pgss = NULL;
270 : static HTAB *pgss_hash = NULL;
271 :
272 : /*---- GUC variables ----*/
273 :
274 : typedef enum
275 : {
276 : PGSS_TRACK_NONE, /* track no statements */
277 : PGSS_TRACK_TOP, /* only top level statements */
278 : PGSS_TRACK_ALL, /* all statements, including nested ones */
279 : } PGSSTrackLevel;
280 :
281 : static const struct config_enum_entry track_options[] =
282 : {
283 : {"none", PGSS_TRACK_NONE, false},
284 : {"top", PGSS_TRACK_TOP, false},
285 : {"all", PGSS_TRACK_ALL, false},
286 : {NULL, 0, false}
287 : };
288 :
289 : static int pgss_max = 5000; /* max # statements to track */
290 : static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
291 : static bool pgss_track_utility = true; /* whether to track utility commands */
292 : static bool pgss_track_planning = false; /* whether to track planning
293 : * duration */
294 : static bool pgss_save = true; /* whether to save stats across shutdown */
295 :
296 :
297 : #define pgss_enabled(level) \
298 : (!IsParallelWorker() && \
299 : (pgss_track == PGSS_TRACK_ALL || \
300 : (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
301 :
302 : #define record_gc_qtexts() \
303 : do { \
304 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
305 : SpinLockAcquire(&s->mutex); \
306 : s->gc_count++; \
307 : SpinLockRelease(&s->mutex); \
308 : } while(0)
309 :
310 : /*---- Function declarations ----*/
311 :
312 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
313 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
314 30 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_11);
315 0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
316 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
317 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
318 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
319 12 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
320 38 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_11);
321 0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
322 14 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
323 :
324 : static void pgss_shmem_request(void);
325 : static void pgss_shmem_startup(void);
326 : static void pgss_shmem_shutdown(int code, Datum arg);
327 : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
328 : JumbleState *jstate);
329 : static PlannedStmt *pgss_planner(Query *parse,
330 : const char *query_string,
331 : int cursorOptions,
332 : ParamListInfo boundParams);
333 : static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
334 : static void pgss_ExecutorRun(QueryDesc *queryDesc,
335 : ScanDirection direction,
336 : uint64 count, bool execute_once);
337 : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
338 : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
339 : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
340 : bool readOnlyTree,
341 : ProcessUtilityContext context, ParamListInfo params,
342 : QueryEnvironment *queryEnv,
343 : DestReceiver *dest, QueryCompletion *qc);
344 : static void pgss_store(const char *query, uint64 queryId,
345 : int query_location, int query_len,
346 : pgssStoreKind kind,
347 : double total_time, uint64 rows,
348 : const BufferUsage *bufusage,
349 : const WalUsage *walusage,
350 : const struct JitInstrumentation *jitusage,
351 : JumbleState *jstate);
352 : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
353 : pgssVersion api_version,
354 : bool showtext);
355 : static Size pgss_memsize(void);
356 : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
357 : int encoding, bool sticky);
358 : static void entry_dealloc(void);
359 : static bool qtext_store(const char *query, int query_len,
360 : Size *query_offset, int *gc_count);
361 : static char *qtext_load_file(Size *buffer_size);
362 : static char *qtext_fetch(Size query_offset, int query_len,
363 : char *buffer, Size buffer_size);
364 : static bool need_gc_qtexts(void);
365 : static void gc_qtexts(void);
366 : static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only);
367 : static char *generate_normalized_query(JumbleState *jstate, const char *query,
368 : int query_loc, int *query_len_p);
369 : static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
370 : int query_loc);
371 : static int comp_location(const void *a, const void *b);
372 :
373 :
374 : /*
375 : * Module load callback
376 : */
377 : void
378 14 : _PG_init(void)
379 : {
380 : /*
381 : * In order to create our shared memory area, we have to be loaded via
382 : * shared_preload_libraries. If not, fall out without hooking into any of
383 : * the main system. (We don't throw error here because it seems useful to
384 : * allow the pg_stat_statements functions to be created even when the
385 : * module isn't active. The functions must protect themselves against
386 : * being called then, however.)
387 : */
388 14 : if (!process_shared_preload_libraries_in_progress)
389 2 : return;
390 :
391 : /*
392 : * Inform the postmaster that we want to enable query_id calculation if
393 : * compute_query_id is set to auto.
394 : */
395 12 : EnableQueryId();
396 :
397 : /*
398 : * Define (or redefine) custom GUC variables.
399 : */
400 12 : DefineCustomIntVariable("pg_stat_statements.max",
401 : "Sets the maximum number of statements tracked by pg_stat_statements.",
402 : NULL,
403 : &pgss_max,
404 : 5000,
405 : 100,
406 : INT_MAX / 2,
407 : PGC_POSTMASTER,
408 : 0,
409 : NULL,
410 : NULL,
411 : NULL);
412 :
413 12 : DefineCustomEnumVariable("pg_stat_statements.track",
414 : "Selects which statements are tracked by pg_stat_statements.",
415 : NULL,
416 : &pgss_track,
417 : PGSS_TRACK_TOP,
418 : track_options,
419 : PGC_SUSET,
420 : 0,
421 : NULL,
422 : NULL,
423 : NULL);
424 :
425 12 : DefineCustomBoolVariable("pg_stat_statements.track_utility",
426 : "Selects whether utility commands are tracked by pg_stat_statements.",
427 : NULL,
428 : &pgss_track_utility,
429 : true,
430 : PGC_SUSET,
431 : 0,
432 : NULL,
433 : NULL,
434 : NULL);
435 :
436 12 : DefineCustomBoolVariable("pg_stat_statements.track_planning",
437 : "Selects whether planning duration is tracked by pg_stat_statements.",
438 : NULL,
439 : &pgss_track_planning,
440 : false,
441 : PGC_SUSET,
442 : 0,
443 : NULL,
444 : NULL,
445 : NULL);
446 :
447 12 : DefineCustomBoolVariable("pg_stat_statements.save",
448 : "Save pg_stat_statements statistics across server shutdowns.",
449 : NULL,
450 : &pgss_save,
451 : true,
452 : PGC_SIGHUP,
453 : 0,
454 : NULL,
455 : NULL,
456 : NULL);
457 :
458 12 : MarkGUCPrefixReserved("pg_stat_statements");
459 :
460 : /*
461 : * Install hooks.
462 : */
463 12 : prev_shmem_request_hook = shmem_request_hook;
464 12 : shmem_request_hook = pgss_shmem_request;
465 12 : prev_shmem_startup_hook = shmem_startup_hook;
466 12 : shmem_startup_hook = pgss_shmem_startup;
467 12 : prev_post_parse_analyze_hook = post_parse_analyze_hook;
468 12 : post_parse_analyze_hook = pgss_post_parse_analyze;
469 12 : prev_planner_hook = planner_hook;
470 12 : planner_hook = pgss_planner;
471 12 : prev_ExecutorStart = ExecutorStart_hook;
472 12 : ExecutorStart_hook = pgss_ExecutorStart;
473 12 : prev_ExecutorRun = ExecutorRun_hook;
474 12 : ExecutorRun_hook = pgss_ExecutorRun;
475 12 : prev_ExecutorFinish = ExecutorFinish_hook;
476 12 : ExecutorFinish_hook = pgss_ExecutorFinish;
477 12 : prev_ExecutorEnd = ExecutorEnd_hook;
478 12 : ExecutorEnd_hook = pgss_ExecutorEnd;
479 12 : prev_ProcessUtility = ProcessUtility_hook;
480 12 : ProcessUtility_hook = pgss_ProcessUtility;
481 : }
482 :
483 : /*
484 : * shmem_request hook: request additional shared resources. We'll allocate or
485 : * attach to the shared resources in pgss_shmem_startup().
486 : */
487 : static void
488 12 : pgss_shmem_request(void)
489 : {
490 12 : if (prev_shmem_request_hook)
491 0 : prev_shmem_request_hook();
492 :
493 12 : RequestAddinShmemSpace(pgss_memsize());
494 12 : RequestNamedLWLockTranche("pg_stat_statements", 1);
495 12 : }
496 :
497 : /*
498 : * shmem_startup hook: allocate or attach to shared memory,
499 : * then load any pre-existing statistics from file.
500 : * Also create and load the query-texts file, which is expected to exist
501 : * (even if empty) while the module is enabled.
502 : */
503 : static void
504 12 : pgss_shmem_startup(void)
505 : {
506 : bool found;
507 : HASHCTL info;
508 12 : FILE *file = NULL;
509 12 : FILE *qfile = NULL;
510 : uint32 header;
511 : int32 num;
512 : int32 pgver;
513 : int32 i;
514 : int buffer_size;
515 12 : char *buffer = NULL;
516 :
517 12 : if (prev_shmem_startup_hook)
518 0 : prev_shmem_startup_hook();
519 :
520 : /* reset in case this is a restart within the postmaster */
521 12 : pgss = NULL;
522 12 : pgss_hash = NULL;
523 :
524 : /*
525 : * Create or attach to the shared memory state, including hash table
526 : */
527 12 : LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
528 :
529 12 : pgss = ShmemInitStruct("pg_stat_statements",
530 : sizeof(pgssSharedState),
531 : &found);
532 :
533 12 : if (!found)
534 : {
535 : /* First time through ... */
536 12 : pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
537 12 : pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
538 12 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
539 12 : SpinLockInit(&pgss->mutex);
540 12 : pgss->extent = 0;
541 12 : pgss->n_writers = 0;
542 12 : pgss->gc_count = 0;
543 12 : pgss->stats.dealloc = 0;
544 12 : pgss->stats.stats_reset = GetCurrentTimestamp();
545 : }
546 :
547 12 : info.keysize = sizeof(pgssHashKey);
548 12 : info.entrysize = sizeof(pgssEntry);
549 12 : pgss_hash = ShmemInitHash("pg_stat_statements hash",
550 : pgss_max, pgss_max,
551 : &info,
552 : HASH_ELEM | HASH_BLOBS);
553 :
554 12 : LWLockRelease(AddinShmemInitLock);
555 :
556 : /*
557 : * If we're in the postmaster (or a standalone backend...), set up a shmem
558 : * exit hook to dump the statistics to disk.
559 : */
560 12 : if (!IsUnderPostmaster)
561 12 : on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
562 :
563 : /*
564 : * Done if some other process already completed our initialization.
565 : */
566 12 : if (found)
567 12 : return;
568 :
569 : /*
570 : * Note: we don't bother with locks here, because there should be no other
571 : * processes running when this code is reached.
572 : */
573 :
574 : /* Unlink query text file possibly left over from crash */
575 12 : unlink(PGSS_TEXT_FILE);
576 :
577 : /* Allocate new query text temp file */
578 12 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
579 12 : if (qfile == NULL)
580 0 : goto write_error;
581 :
582 : /*
583 : * If we were told not to load old statistics, we're done. (Note we do
584 : * not try to unlink any old dump file in this case. This seems a bit
585 : * questionable but it's the historical behavior.)
586 : */
587 12 : if (!pgss_save)
588 : {
589 2 : FreeFile(qfile);
590 2 : return;
591 : }
592 :
593 : /*
594 : * Attempt to load old statistics from the dump file.
595 : */
596 10 : file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
597 10 : if (file == NULL)
598 : {
599 8 : if (errno != ENOENT)
600 0 : goto read_error;
601 : /* No existing persisted stats file, so we're done */
602 8 : FreeFile(qfile);
603 8 : return;
604 : }
605 :
606 2 : buffer_size = 2048;
607 2 : buffer = (char *) palloc(buffer_size);
608 :
609 4 : if (fread(&header, sizeof(uint32), 1, file) != 1 ||
610 4 : fread(&pgver, sizeof(uint32), 1, file) != 1 ||
611 2 : fread(&num, sizeof(int32), 1, file) != 1)
612 0 : goto read_error;
613 :
614 2 : if (header != PGSS_FILE_HEADER ||
615 2 : pgver != PGSS_PG_MAJOR_VERSION)
616 0 : goto data_error;
617 :
618 10 : for (i = 0; i < num; i++)
619 : {
620 : pgssEntry temp;
621 : pgssEntry *entry;
622 : Size query_offset;
623 :
624 8 : if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
625 0 : goto read_error;
626 :
627 : /* Encoding is the only field we can easily sanity-check */
628 8 : if (!PG_VALID_BE_ENCODING(temp.encoding))
629 0 : goto data_error;
630 :
631 : /* Resize buffer as needed */
632 8 : if (temp.query_len >= buffer_size)
633 : {
634 0 : buffer_size = Max(buffer_size * 2, temp.query_len + 1);
635 0 : buffer = repalloc(buffer, buffer_size);
636 : }
637 :
638 8 : if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
639 0 : goto read_error;
640 :
641 : /* Should have a trailing null, but let's make sure */
642 8 : buffer[temp.query_len] = '\0';
643 :
644 : /* Skip loading "sticky" entries */
645 8 : if (IS_STICKY(temp.counters))
646 0 : continue;
647 :
648 : /* Store the query text */
649 8 : query_offset = pgss->extent;
650 8 : if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
651 0 : goto write_error;
652 8 : pgss->extent += temp.query_len + 1;
653 :
654 : /* make the hashtable entry (discards old entries if too many) */
655 8 : entry = entry_alloc(&temp.key, query_offset, temp.query_len,
656 : temp.encoding,
657 : false);
658 :
659 : /* copy in the actual stats */
660 8 : entry->counters = temp.counters;
661 8 : entry->stats_since = temp.stats_since;
662 8 : entry->minmax_stats_since = temp.minmax_stats_since;
663 : }
664 :
665 : /* Read global statistics for pg_stat_statements */
666 2 : if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
667 0 : goto read_error;
668 :
669 2 : pfree(buffer);
670 2 : FreeFile(file);
671 2 : FreeFile(qfile);
672 :
673 : /*
674 : * Remove the persisted stats file so it's not included in
675 : * backups/replication standbys, etc. A new file will be written on next
676 : * shutdown.
677 : *
678 : * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
679 : * because we remove that file on startup; it acts inversely to
680 : * PGSS_DUMP_FILE, in that it is only supposed to be around when the
681 : * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
682 : * when the server is not running. Leaving the file creates no danger of
683 : * a newly restored database having a spurious record of execution costs,
684 : * which is what we're really concerned about here.
685 : */
686 2 : unlink(PGSS_DUMP_FILE);
687 :
688 2 : return;
689 :
690 0 : read_error:
691 0 : ereport(LOG,
692 : (errcode_for_file_access(),
693 : errmsg("could not read file \"%s\": %m",
694 : PGSS_DUMP_FILE)));
695 0 : goto fail;
696 0 : data_error:
697 0 : ereport(LOG,
698 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
699 : errmsg("ignoring invalid data in file \"%s\"",
700 : PGSS_DUMP_FILE)));
701 0 : goto fail;
702 0 : write_error:
703 0 : ereport(LOG,
704 : (errcode_for_file_access(),
705 : errmsg("could not write file \"%s\": %m",
706 : PGSS_TEXT_FILE)));
707 0 : fail:
708 0 : if (buffer)
709 0 : pfree(buffer);
710 0 : if (file)
711 0 : FreeFile(file);
712 0 : if (qfile)
713 0 : FreeFile(qfile);
714 : /* If possible, throw away the bogus file; ignore any error */
715 0 : unlink(PGSS_DUMP_FILE);
716 :
717 : /*
718 : * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
719 : * server is running with pg_stat_statements enabled
720 : */
721 : }
722 :
723 : /*
724 : * shmem_shutdown hook: Dump statistics into file.
725 : *
726 : * Note: we don't bother with acquiring lock, because there should be no
727 : * other processes running when this is called.
728 : */
729 : static void
730 12 : pgss_shmem_shutdown(int code, Datum arg)
731 : {
732 : FILE *file;
733 12 : char *qbuffer = NULL;
734 12 : Size qbuffer_size = 0;
735 : HASH_SEQ_STATUS hash_seq;
736 : int32 num_entries;
737 : pgssEntry *entry;
738 :
739 : /* Don't try to dump during a crash. */
740 12 : if (code)
741 12 : return;
742 :
743 : /* Safety check ... shouldn't get here unless shmem is set up. */
744 12 : if (!pgss || !pgss_hash)
745 0 : return;
746 :
747 : /* Don't dump if told not to. */
748 12 : if (!pgss_save)
749 4 : return;
750 :
751 8 : file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
752 8 : if (file == NULL)
753 0 : goto error;
754 :
755 8 : if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
756 0 : goto error;
757 8 : if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
758 0 : goto error;
759 8 : num_entries = hash_get_num_entries(pgss_hash);
760 8 : if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
761 0 : goto error;
762 :
763 8 : qbuffer = qtext_load_file(&qbuffer_size);
764 8 : if (qbuffer == NULL)
765 0 : goto error;
766 :
767 : /*
768 : * When serializing to disk, we store query texts immediately after their
769 : * entry data. Any orphaned query texts are thereby excluded.
770 : */
771 8 : hash_seq_init(&hash_seq, pgss_hash);
772 52580 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
773 : {
774 52572 : int len = entry->query_len;
775 52572 : char *qstr = qtext_fetch(entry->query_offset, len,
776 : qbuffer, qbuffer_size);
777 :
778 52572 : if (qstr == NULL)
779 0 : continue; /* Ignore any entries with bogus texts */
780 :
781 52572 : if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
782 52572 : fwrite(qstr, 1, len + 1, file) != len + 1)
783 : {
784 : /* note: we assume hash_seq_term won't change errno */
785 0 : hash_seq_term(&hash_seq);
786 0 : goto error;
787 : }
788 : }
789 :
790 : /* Dump global statistics for pg_stat_statements */
791 8 : if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
792 0 : goto error;
793 :
794 8 : free(qbuffer);
795 8 : qbuffer = NULL;
796 :
797 8 : if (FreeFile(file))
798 : {
799 0 : file = NULL;
800 0 : goto error;
801 : }
802 :
803 : /*
804 : * Rename file into place, so we atomically replace any old one.
805 : */
806 8 : (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
807 :
808 : /* Unlink query-texts file; it's not needed while shutdown */
809 8 : unlink(PGSS_TEXT_FILE);
810 :
811 8 : return;
812 :
813 0 : error:
814 0 : ereport(LOG,
815 : (errcode_for_file_access(),
816 : errmsg("could not write file \"%s\": %m",
817 : PGSS_DUMP_FILE ".tmp")));
818 0 : free(qbuffer);
819 0 : if (file)
820 0 : FreeFile(file);
821 0 : unlink(PGSS_DUMP_FILE ".tmp");
822 0 : unlink(PGSS_TEXT_FILE);
823 : }
824 :
825 : /*
826 : * Post-parse-analysis hook: mark query with a queryId
827 : */
828 : static void
829 132866 : pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
830 : {
831 132866 : if (prev_post_parse_analyze_hook)
832 0 : prev_post_parse_analyze_hook(pstate, query, jstate);
833 :
834 : /* Safety check... */
835 132866 : if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
836 24840 : return;
837 :
838 : /*
839 : * If it's EXECUTE, clear the queryId so that stats will accumulate for
840 : * the underlying PREPARE. But don't do this if we're not tracking
841 : * utility statements, to avoid messing up another extension that might be
842 : * tracking them.
843 : */
844 108026 : if (query->utilityStmt)
845 : {
846 49772 : if (pgss_track_utility && IsA(query->utilityStmt, ExecuteStmt))
847 : {
848 2774 : query->queryId = UINT64CONST(0);
849 2774 : return;
850 : }
851 : }
852 :
853 : /*
854 : * If query jumbling were able to identify any ignorable constants, we
855 : * immediately create a hash table entry for the query, so that we can
856 : * record the normalized form of the query string. If there were no such
857 : * constants, the normalized string would be the same as the query text
858 : * anyway, so there's no need for an early entry.
859 : */
860 105252 : if (jstate && jstate->clocations_count > 0)
861 53786 : pgss_store(pstate->p_sourcetext,
862 : query->queryId,
863 : query->stmt_location,
864 : query->stmt_len,
865 : PGSS_INVALID,
866 : 0,
867 : 0,
868 : NULL,
869 : NULL,
870 : NULL,
871 : jstate);
872 : }
873 :
874 : /*
875 : * Planner hook: forward to regular planner, but measure planning time
876 : * if needed.
877 : */
878 : static PlannedStmt *
879 81738 : pgss_planner(Query *parse,
880 : const char *query_string,
881 : int cursorOptions,
882 : ParamListInfo boundParams)
883 : {
884 : PlannedStmt *result;
885 :
886 : /*
887 : * We can't process the query if no query_string is provided, as
888 : * pgss_store needs it. We also ignore query without queryid, as it would
889 : * be treated as a utility statement, which may not be the case.
890 : *
891 : * Note that planner_hook can be called from the planner itself, so we
892 : * have a specific nesting level for the planner. However, utility
893 : * commands containing optimizable statements can also call the planner,
894 : * same for regular DML (for instance for underlying foreign key queries).
895 : * So testing the planner nesting level only is not enough to detect real
896 : * top level planner call.
897 : */
898 81738 : if (pgss_enabled(nesting_level)
899 58472 : && pgss_track_planning && query_string
900 146 : && parse->queryId != UINT64CONST(0))
901 146 : {
902 : instr_time start;
903 : instr_time duration;
904 : BufferUsage bufusage_start,
905 : bufusage;
906 : WalUsage walusage_start,
907 : walusage;
908 :
909 : /* We need to track buffer usage as the planner can access them. */
910 146 : bufusage_start = pgBufferUsage;
911 :
912 : /*
913 : * Similarly the planner could write some WAL records in some cases
914 : * (e.g. setting a hint bit with those being WAL-logged)
915 : */
916 146 : walusage_start = pgWalUsage;
917 146 : INSTR_TIME_SET_CURRENT(start);
918 :
919 146 : nesting_level++;
920 146 : PG_TRY();
921 : {
922 146 : if (prev_planner_hook)
923 0 : result = prev_planner_hook(parse, query_string, cursorOptions,
924 : boundParams);
925 : else
926 146 : result = standard_planner(parse, query_string, cursorOptions,
927 : boundParams);
928 : }
929 0 : PG_FINALLY();
930 : {
931 146 : nesting_level--;
932 : }
933 146 : PG_END_TRY();
934 :
935 146 : INSTR_TIME_SET_CURRENT(duration);
936 146 : INSTR_TIME_SUBTRACT(duration, start);
937 :
938 : /* calc differences of buffer counters. */
939 146 : memset(&bufusage, 0, sizeof(BufferUsage));
940 146 : BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
941 :
942 : /* calc differences of WAL counters. */
943 146 : memset(&walusage, 0, sizeof(WalUsage));
944 146 : WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
945 :
946 146 : pgss_store(query_string,
947 : parse->queryId,
948 : parse->stmt_location,
949 : parse->stmt_len,
950 : PGSS_PLAN,
951 146 : INSTR_TIME_GET_MILLISEC(duration),
952 : 0,
953 : &bufusage,
954 : &walusage,
955 : NULL,
956 : NULL);
957 : }
958 : else
959 : {
960 : /*
961 : * Even though we're not tracking plan time for this statement, we
962 : * must still increment the nesting level, to ensure that functions
963 : * evaluated during planning are not seen as top-level calls.
964 : */
965 81592 : nesting_level++;
966 81592 : PG_TRY();
967 : {
968 81592 : if (prev_planner_hook)
969 0 : result = prev_planner_hook(parse, query_string, cursorOptions,
970 : boundParams);
971 : else
972 81592 : result = standard_planner(parse, query_string, cursorOptions,
973 : boundParams);
974 : }
975 1204 : PG_FINALLY();
976 : {
977 81592 : nesting_level--;
978 : }
979 81592 : PG_END_TRY();
980 : }
981 :
982 80534 : return result;
983 : }
984 :
985 : /*
986 : * ExecutorStart hook: start up tracking if needed
987 : */
988 : static void
989 94730 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
990 : {
991 94730 : if (prev_ExecutorStart)
992 0 : prev_ExecutorStart(queryDesc, eflags);
993 : else
994 94730 : standard_ExecutorStart(queryDesc, eflags);
995 :
996 : /*
997 : * If query has queryId zero, don't track it. This prevents double
998 : * counting of optimizable statements that are directly contained in
999 : * utility statements.
1000 : */
1001 94112 : if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
1002 : {
1003 : /*
1004 : * Set up to track total elapsed time in ExecutorRun. Make sure the
1005 : * space is allocated in the per-query context so it will go away at
1006 : * ExecutorEnd.
1007 : */
1008 58996 : if (queryDesc->totaltime == NULL)
1009 : {
1010 : MemoryContext oldcxt;
1011 :
1012 58996 : oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1013 58996 : queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1014 58996 : MemoryContextSwitchTo(oldcxt);
1015 : }
1016 : }
1017 94112 : }
1018 :
1019 : /*
1020 : * ExecutorRun hook: all we need do is track nesting depth
1021 : */
1022 : static void
1023 92130 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
1024 : bool execute_once)
1025 : {
1026 92130 : nesting_level++;
1027 92130 : PG_TRY();
1028 : {
1029 92130 : if (prev_ExecutorRun)
1030 0 : prev_ExecutorRun(queryDesc, direction, count, execute_once);
1031 : else
1032 92130 : standard_ExecutorRun(queryDesc, direction, count, execute_once);
1033 : }
1034 6784 : PG_FINALLY();
1035 : {
1036 92130 : nesting_level--;
1037 : }
1038 92130 : PG_END_TRY();
1039 85346 : }
1040 :
1041 : /*
1042 : * ExecutorFinish hook: all we need do is track nesting depth
1043 : */
1044 : static void
1045 82092 : pgss_ExecutorFinish(QueryDesc *queryDesc)
1046 : {
1047 82092 : nesting_level++;
1048 82092 : PG_TRY();
1049 : {
1050 82092 : if (prev_ExecutorFinish)
1051 0 : prev_ExecutorFinish(queryDesc);
1052 : else
1053 82092 : standard_ExecutorFinish(queryDesc);
1054 : }
1055 314 : PG_FINALLY();
1056 : {
1057 82092 : nesting_level--;
1058 : }
1059 82092 : PG_END_TRY();
1060 81778 : }
1061 :
1062 : /*
1063 : * ExecutorEnd hook: store results if needed
1064 : */
1065 : static void
1066 86936 : pgss_ExecutorEnd(QueryDesc *queryDesc)
1067 : {
1068 86936 : uint64 queryId = queryDesc->plannedstmt->queryId;
1069 :
1070 86936 : if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1071 56230 : pgss_enabled(nesting_level))
1072 : {
1073 : /*
1074 : * Make sure stats accumulation is done. (Note: it's okay if several
1075 : * levels of hook all do this.)
1076 : */
1077 56230 : InstrEndLoop(queryDesc->totaltime);
1078 :
1079 56034 : pgss_store(queryDesc->sourceText,
1080 : queryId,
1081 56230 : queryDesc->plannedstmt->stmt_location,
1082 56230 : queryDesc->plannedstmt->stmt_len,
1083 : PGSS_EXEC,
1084 56230 : queryDesc->totaltime->total * 1000.0, /* convert to msec */
1085 56230 : queryDesc->estate->es_total_processed,
1086 56230 : &queryDesc->totaltime->bufusage,
1087 56230 : &queryDesc->totaltime->walusage,
1088 56230 : queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1089 : NULL);
1090 : }
1091 :
1092 86936 : if (prev_ExecutorEnd)
1093 0 : prev_ExecutorEnd(queryDesc);
1094 : else
1095 86936 : standard_ExecutorEnd(queryDesc);
1096 86936 : }
1097 :
1098 : /*
1099 : * ProcessUtility hook
1100 : */
1101 : static void
1102 59974 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1103 : bool readOnlyTree,
1104 : ProcessUtilityContext context,
1105 : ParamListInfo params, QueryEnvironment *queryEnv,
1106 : DestReceiver *dest, QueryCompletion *qc)
1107 : {
1108 59974 : Node *parsetree = pstmt->utilityStmt;
1109 59974 : uint64 saved_queryId = pstmt->queryId;
1110 59974 : int saved_stmt_location = pstmt->stmt_location;
1111 59974 : int saved_stmt_len = pstmt->stmt_len;
1112 59974 : bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
1113 :
1114 : /*
1115 : * Force utility statements to get queryId zero. We do this even in cases
1116 : * where the statement contains an optimizable statement for which a
1117 : * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1118 : * cases, runtime control will first go through ProcessUtility and then
1119 : * the executor, and we don't want the executor hooks to do anything,
1120 : * since we are already measuring the statement's costs at the utility
1121 : * level.
1122 : *
1123 : * Note that this is only done if pg_stat_statements is enabled and
1124 : * configured to track utility statements, in the unlikely possibility
1125 : * that user configured another extension to handle utility statements
1126 : * only.
1127 : */
1128 59974 : if (enabled)
1129 49670 : pstmt->queryId = UINT64CONST(0);
1130 :
1131 : /*
1132 : * If it's an EXECUTE statement, we don't track it and don't increment the
1133 : * nesting level. This allows the cycles to be charged to the underlying
1134 : * PREPARE instead (by the Executor hooks), which is much more useful.
1135 : *
1136 : * We also don't track execution of PREPARE. If we did, we would get one
1137 : * hash table entry for the PREPARE (with hash calculated from the query
1138 : * string), and then a different one with the same query string (but hash
1139 : * calculated from the query tree) would be used to accumulate costs of
1140 : * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1141 : * actually run the planner (only parse+rewrite), its costs are generally
1142 : * pretty negligible and it seems okay to just ignore it.
1143 : */
1144 59974 : if (enabled &&
1145 49670 : !IsA(parsetree, ExecuteStmt) &&
1146 46898 : !IsA(parsetree, PrepareStmt))
1147 42334 : {
1148 : instr_time start;
1149 : instr_time duration;
1150 : uint64 rows;
1151 : BufferUsage bufusage_start,
1152 : bufusage;
1153 : WalUsage walusage_start,
1154 : walusage;
1155 :
1156 46698 : bufusage_start = pgBufferUsage;
1157 46698 : walusage_start = pgWalUsage;
1158 46698 : INSTR_TIME_SET_CURRENT(start);
1159 :
1160 46698 : nesting_level++;
1161 46698 : PG_TRY();
1162 : {
1163 46698 : if (prev_ProcessUtility)
1164 0 : prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1165 : context, params, queryEnv,
1166 : dest, qc);
1167 : else
1168 46698 : standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1169 : context, params, queryEnv,
1170 : dest, qc);
1171 : }
1172 4364 : PG_FINALLY();
1173 : {
1174 46698 : nesting_level--;
1175 : }
1176 46698 : PG_END_TRY();
1177 :
1178 : /*
1179 : * CAUTION: do not access the *pstmt data structure again below here.
1180 : * If it was a ROLLBACK or similar, that data structure may have been
1181 : * freed. We must copy everything we still need into local variables,
1182 : * which we did above.
1183 : *
1184 : * For the same reason, we can't risk restoring pstmt->queryId to its
1185 : * former value, which'd otherwise be a good idea.
1186 : */
1187 :
1188 42334 : INSTR_TIME_SET_CURRENT(duration);
1189 42334 : INSTR_TIME_SUBTRACT(duration, start);
1190 :
1191 : /*
1192 : * Track the total number of rows retrieved or affected by the utility
1193 : * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1194 : * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1195 : */
1196 42334 : rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1197 39616 : qc->commandTag == CMDTAG_FETCH ||
1198 39156 : qc->commandTag == CMDTAG_SELECT ||
1199 38800 : qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1200 84668 : qc->nprocessed : 0;
1201 :
1202 : /* calc differences of buffer counters. */
1203 42334 : memset(&bufusage, 0, sizeof(BufferUsage));
1204 42334 : BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1205 :
1206 : /* calc differences of WAL counters. */
1207 42334 : memset(&walusage, 0, sizeof(WalUsage));
1208 42334 : WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1209 :
1210 42334 : pgss_store(queryString,
1211 : saved_queryId,
1212 : saved_stmt_location,
1213 : saved_stmt_len,
1214 : PGSS_EXEC,
1215 42334 : INSTR_TIME_GET_MILLISEC(duration),
1216 : rows,
1217 : &bufusage,
1218 : &walusage,
1219 : NULL,
1220 : NULL);
1221 : }
1222 : else
1223 : {
1224 : /*
1225 : * Even though we're not tracking execution time for this statement,
1226 : * we must still increment the nesting level, to ensure that functions
1227 : * evaluated within it are not seen as top-level calls. But don't do
1228 : * so for EXECUTE; that way, when control reaches pgss_planner or
1229 : * pgss_ExecutorStart, we will treat the costs as top-level if
1230 : * appropriate. Likewise, don't bump for PREPARE, so that parse
1231 : * analysis will treat the statement as top-level if appropriate.
1232 : *
1233 : * To be absolutely certain we don't mess up the nesting level,
1234 : * evaluate the bump_level condition just once.
1235 : */
1236 13276 : bool bump_level =
1237 23778 : !IsA(parsetree, ExecuteStmt) &&
1238 10502 : !IsA(parsetree, PrepareStmt);
1239 :
1240 13276 : if (bump_level)
1241 10300 : nesting_level++;
1242 13276 : PG_TRY();
1243 : {
1244 13276 : if (prev_ProcessUtility)
1245 0 : prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1246 : context, params, queryEnv,
1247 : dest, qc);
1248 : else
1249 13276 : standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1250 : context, params, queryEnv,
1251 : dest, qc);
1252 : }
1253 240 : PG_FINALLY();
1254 : {
1255 13276 : if (bump_level)
1256 10300 : nesting_level--;
1257 : }
1258 13276 : PG_END_TRY();
1259 : }
1260 55370 : }
1261 :
1262 : /*
1263 : * Store some statistics for a statement.
1264 : *
1265 : * If jstate is not NULL then we're trying to create an entry for which
1266 : * we have no statistics as yet; we just want to record the normalized
1267 : * query string. total_time, rows, bufusage and walusage are ignored in this
1268 : * case.
1269 : *
1270 : * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1271 : * for the arrays in the Counters field.
1272 : */
1273 : static void
1274 152496 : pgss_store(const char *query, uint64 queryId,
1275 : int query_location, int query_len,
1276 : pgssStoreKind kind,
1277 : double total_time, uint64 rows,
1278 : const BufferUsage *bufusage,
1279 : const WalUsage *walusage,
1280 : const struct JitInstrumentation *jitusage,
1281 : JumbleState *jstate)
1282 : {
1283 : pgssHashKey key;
1284 : pgssEntry *entry;
1285 152496 : char *norm_query = NULL;
1286 152496 : int encoding = GetDatabaseEncoding();
1287 :
1288 : Assert(query != NULL);
1289 :
1290 : /* Safety check... */
1291 152496 : if (!pgss || !pgss_hash)
1292 0 : return;
1293 :
1294 : /*
1295 : * Nothing to do if compute_query_id isn't enabled and no other module
1296 : * computed a query identifier.
1297 : */
1298 152496 : if (queryId == UINT64CONST(0))
1299 0 : return;
1300 :
1301 : /*
1302 : * Confine our attention to the relevant part of the string, if the query
1303 : * is a portion of a multi-statement source string, and update query
1304 : * location and length if needed.
1305 : */
1306 152496 : query = CleanQuerytext(query, &query_location, &query_len);
1307 :
1308 : /* Set up key for hashtable search */
1309 :
1310 : /* clear padding */
1311 152496 : memset(&key, 0, sizeof(pgssHashKey));
1312 :
1313 152496 : key.userid = GetUserId();
1314 152496 : key.dbid = MyDatabaseId;
1315 152496 : key.queryid = queryId;
1316 152496 : key.toplevel = (nesting_level == 0);
1317 :
1318 : /* Lookup the hash table entry with shared lock. */
1319 152496 : LWLockAcquire(pgss->lock, LW_SHARED);
1320 :
1321 152496 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1322 :
1323 : /* Create new entry, if not present */
1324 152496 : if (!entry)
1325 : {
1326 : Size query_offset;
1327 : int gc_count;
1328 : bool stored;
1329 : bool do_gc;
1330 :
1331 : /*
1332 : * Create a new, normalized query string if caller asked. We don't
1333 : * need to hold the lock while doing this work. (Note: in any case,
1334 : * it's possible that someone else creates a duplicate hashtable entry
1335 : * in the interval where we don't hold the lock below. That case is
1336 : * handled by entry_alloc.)
1337 : */
1338 53310 : if (jstate)
1339 : {
1340 19312 : LWLockRelease(pgss->lock);
1341 19312 : norm_query = generate_normalized_query(jstate, query,
1342 : query_location,
1343 : &query_len);
1344 19312 : LWLockAcquire(pgss->lock, LW_SHARED);
1345 : }
1346 :
1347 : /* Append new query text to file with only shared lock held */
1348 53310 : stored = qtext_store(norm_query ? norm_query : query, query_len,
1349 : &query_offset, &gc_count);
1350 :
1351 : /*
1352 : * Determine whether we need to garbage collect external query texts
1353 : * while the shared lock is still held. This micro-optimization
1354 : * avoids taking the time to decide this while holding exclusive lock.
1355 : */
1356 53310 : do_gc = need_gc_qtexts();
1357 :
1358 : /* Need exclusive lock to make a new hashtable entry - promote */
1359 53310 : LWLockRelease(pgss->lock);
1360 53310 : LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1361 :
1362 : /*
1363 : * A garbage collection may have occurred while we weren't holding the
1364 : * lock. In the unlikely event that this happens, the query text we
1365 : * stored above will have been garbage collected, so write it again.
1366 : * This should be infrequent enough that doing it while holding
1367 : * exclusive lock isn't a performance problem.
1368 : */
1369 53310 : if (!stored || pgss->gc_count != gc_count)
1370 0 : stored = qtext_store(norm_query ? norm_query : query, query_len,
1371 : &query_offset, NULL);
1372 :
1373 : /* If we failed to write to the text file, give up */
1374 53310 : if (!stored)
1375 0 : goto done;
1376 :
1377 : /* OK to create a new hashtable entry */
1378 53310 : entry = entry_alloc(&key, query_offset, query_len, encoding,
1379 : jstate != NULL);
1380 :
1381 : /* If needed, perform garbage collection while exclusive lock held */
1382 53310 : if (do_gc)
1383 0 : gc_qtexts();
1384 : }
1385 :
1386 : /* Increment the counts, except when jstate is not NULL */
1387 152496 : if (!jstate)
1388 : {
1389 : /*
1390 : * Grab the spinlock while updating the counters (see comment about
1391 : * locking rules at the head of the file)
1392 : */
1393 98710 : volatile pgssEntry *e = (volatile pgssEntry *) entry;
1394 :
1395 : Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1396 :
1397 98710 : SpinLockAcquire(&e->mutex);
1398 :
1399 : /* "Unstick" entry if it was previously sticky */
1400 98710 : if (IS_STICKY(e->counters))
1401 52002 : e->counters.usage = USAGE_INIT;
1402 :
1403 98710 : e->counters.calls[kind] += 1;
1404 98710 : e->counters.total_time[kind] += total_time;
1405 :
1406 98710 : if (e->counters.calls[kind] == 1)
1407 : {
1408 52110 : e->counters.min_time[kind] = total_time;
1409 52110 : e->counters.max_time[kind] = total_time;
1410 52110 : e->counters.mean_time[kind] = total_time;
1411 : }
1412 : else
1413 : {
1414 : /*
1415 : * Welford's method for accurately computing variance. See
1416 : * <http://www.johndcook.com/blog/standard_deviation/>
1417 : */
1418 46600 : double old_mean = e->counters.mean_time[kind];
1419 :
1420 46600 : e->counters.mean_time[kind] +=
1421 46600 : (total_time - old_mean) / e->counters.calls[kind];
1422 46600 : e->counters.sum_var_time[kind] +=
1423 46600 : (total_time - old_mean) * (total_time - e->counters.mean_time[kind]);
1424 :
1425 : /*
1426 : * Calculate min and max time. min = 0 and max = 0 means that the
1427 : * min/max statistics were reset
1428 : */
1429 46600 : if (e->counters.min_time[kind] == 0
1430 4 : && e->counters.max_time[kind] == 0)
1431 : {
1432 4 : e->counters.min_time[kind] = total_time;
1433 4 : e->counters.max_time[kind] = total_time;
1434 : }
1435 : else
1436 : {
1437 46596 : if (e->counters.min_time[kind] > total_time)
1438 11844 : e->counters.min_time[kind] = total_time;
1439 46596 : if (e->counters.max_time[kind] < total_time)
1440 5368 : e->counters.max_time[kind] = total_time;
1441 : }
1442 : }
1443 98710 : e->counters.rows += rows;
1444 98710 : e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1445 98710 : e->counters.shared_blks_read += bufusage->shared_blks_read;
1446 98710 : e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1447 98710 : e->counters.shared_blks_written += bufusage->shared_blks_written;
1448 98710 : e->counters.local_blks_hit += bufusage->local_blks_hit;
1449 98710 : e->counters.local_blks_read += bufusage->local_blks_read;
1450 98710 : e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1451 98710 : e->counters.local_blks_written += bufusage->local_blks_written;
1452 98710 : e->counters.temp_blks_read += bufusage->temp_blks_read;
1453 98710 : e->counters.temp_blks_written += bufusage->temp_blks_written;
1454 98710 : e->counters.shared_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_read_time);
1455 98710 : e->counters.shared_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->shared_blk_write_time);
1456 98710 : e->counters.local_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_read_time);
1457 98710 : e->counters.local_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->local_blk_write_time);
1458 98710 : e->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
1459 98710 : e->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
1460 98710 : e->counters.usage += USAGE_EXEC(total_time);
1461 98710 : e->counters.wal_records += walusage->wal_records;
1462 98710 : e->counters.wal_fpi += walusage->wal_fpi;
1463 98710 : e->counters.wal_bytes += walusage->wal_bytes;
1464 98710 : if (jitusage)
1465 : {
1466 196 : e->counters.jit_functions += jitusage->created_functions;
1467 196 : e->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1468 :
1469 196 : if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1470 180 : e->counters.jit_deform_count++;
1471 196 : e->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1472 :
1473 196 : if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1474 134 : e->counters.jit_inlining_count++;
1475 196 : e->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1476 :
1477 196 : if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1478 192 : e->counters.jit_optimization_count++;
1479 196 : e->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1480 :
1481 196 : if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1482 192 : e->counters.jit_emission_count++;
1483 196 : e->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1484 : }
1485 :
1486 98710 : SpinLockRelease(&e->mutex);
1487 : }
1488 :
1489 53786 : done:
1490 152496 : LWLockRelease(pgss->lock);
1491 :
1492 : /* We postpone this clean-up until we're out of the lock */
1493 152496 : if (norm_query)
1494 19312 : pfree(norm_query);
1495 : }
1496 :
1497 : /*
1498 : * Reset statement statistics corresponding to userid, dbid, and queryid.
1499 : */
1500 : Datum
1501 2 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
1502 : {
1503 : Oid userid;
1504 : Oid dbid;
1505 : uint64 queryid;
1506 :
1507 2 : userid = PG_GETARG_OID(0);
1508 2 : dbid = PG_GETARG_OID(1);
1509 2 : queryid = (uint64) PG_GETARG_INT64(2);
1510 :
1511 2 : entry_reset(userid, dbid, queryid, false);
1512 :
1513 2 : PG_RETURN_VOID();
1514 : }
1515 :
1516 : Datum
1517 98 : pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
1518 : {
1519 : Oid userid;
1520 : Oid dbid;
1521 : uint64 queryid;
1522 : bool minmax_only;
1523 :
1524 98 : userid = PG_GETARG_OID(0);
1525 98 : dbid = PG_GETARG_OID(1);
1526 98 : queryid = (uint64) PG_GETARG_INT64(2);
1527 98 : minmax_only = PG_GETARG_BOOL(3);
1528 :
1529 98 : PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1530 : }
1531 :
1532 : /*
1533 : * Reset statement statistics.
1534 : */
1535 : Datum
1536 2 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
1537 : {
1538 2 : entry_reset(0, 0, 0, false);
1539 :
1540 2 : PG_RETURN_VOID();
1541 : }
1542 :
1543 : /* Number of output arguments (columns) for various API versions */
1544 : #define PG_STAT_STATEMENTS_COLS_V1_0 14
1545 : #define PG_STAT_STATEMENTS_COLS_V1_1 18
1546 : #define PG_STAT_STATEMENTS_COLS_V1_2 19
1547 : #define PG_STAT_STATEMENTS_COLS_V1_3 23
1548 : #define PG_STAT_STATEMENTS_COLS_V1_8 32
1549 : #define PG_STAT_STATEMENTS_COLS_V1_9 33
1550 : #define PG_STAT_STATEMENTS_COLS_V1_10 43
1551 : #define PG_STAT_STATEMENTS_COLS_V1_11 49
1552 : #define PG_STAT_STATEMENTS_COLS 49 /* maximum of above */
1553 :
1554 : /*
1555 : * Retrieve statement statistics.
1556 : *
1557 : * The SQL API of this function has changed multiple times, and will likely
1558 : * do so again in future. To support the case where a newer version of this
1559 : * loadable module is being used with an old SQL declaration of the function,
1560 : * we continue to support the older API versions. For 1.2 and later, the
1561 : * expected API version is identified by embedding it in the C name of the
1562 : * function. Unfortunately we weren't bright enough to do that for 1.1.
1563 : */
1564 : Datum
1565 116 : pg_stat_statements_1_11(PG_FUNCTION_ARGS)
1566 : {
1567 116 : bool showtext = PG_GETARG_BOOL(0);
1568 :
1569 116 : pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1570 :
1571 116 : return (Datum) 0;
1572 : }
1573 :
1574 : Datum
1575 2 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
1576 : {
1577 2 : bool showtext = PG_GETARG_BOOL(0);
1578 :
1579 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1580 :
1581 2 : return (Datum) 0;
1582 : }
1583 :
1584 : Datum
1585 2 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
1586 : {
1587 2 : bool showtext = PG_GETARG_BOOL(0);
1588 :
1589 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1590 :
1591 2 : return (Datum) 0;
1592 : }
1593 :
1594 : Datum
1595 2 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
1596 : {
1597 2 : bool showtext = PG_GETARG_BOOL(0);
1598 :
1599 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1600 :
1601 2 : return (Datum) 0;
1602 : }
1603 :
1604 : Datum
1605 2 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
1606 : {
1607 2 : bool showtext = PG_GETARG_BOOL(0);
1608 :
1609 2 : pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1610 :
1611 2 : return (Datum) 0;
1612 : }
1613 :
1614 : Datum
1615 0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
1616 : {
1617 0 : bool showtext = PG_GETARG_BOOL(0);
1618 :
1619 0 : pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1620 :
1621 0 : return (Datum) 0;
1622 : }
1623 :
1624 : /*
1625 : * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1626 : * This can be removed someday, perhaps.
1627 : */
1628 : Datum
1629 0 : pg_stat_statements(PG_FUNCTION_ARGS)
1630 : {
1631 : /* If it's really API 1.1, we'll figure that out below */
1632 0 : pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1633 :
1634 0 : return (Datum) 0;
1635 : }
1636 :
1637 : /* Common code for all versions of pg_stat_statements() */
1638 : static void
1639 124 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
1640 : pgssVersion api_version,
1641 : bool showtext)
1642 : {
1643 124 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1644 124 : Oid userid = GetUserId();
1645 124 : bool is_allowed_role = false;
1646 124 : char *qbuffer = NULL;
1647 124 : Size qbuffer_size = 0;
1648 124 : Size extent = 0;
1649 124 : int gc_count = 0;
1650 : HASH_SEQ_STATUS hash_seq;
1651 : pgssEntry *entry;
1652 :
1653 : /*
1654 : * Superusers or roles with the privileges of pg_read_all_stats members
1655 : * are allowed
1656 : */
1657 124 : is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1658 :
1659 : /* hash table must exist already */
1660 124 : if (!pgss || !pgss_hash)
1661 0 : ereport(ERROR,
1662 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1663 : errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1664 :
1665 124 : InitMaterializedSRF(fcinfo, 0);
1666 :
1667 : /*
1668 : * Check we have the expected number of output arguments. Aside from
1669 : * being a good safety check, we need a kluge here to detect API version
1670 : * 1.1, which was wedged into the code in an ill-considered way.
1671 : */
1672 124 : switch (rsinfo->setDesc->natts)
1673 : {
1674 0 : case PG_STAT_STATEMENTS_COLS_V1_0:
1675 0 : if (api_version != PGSS_V1_0)
1676 0 : elog(ERROR, "incorrect number of output arguments");
1677 0 : break;
1678 0 : case PG_STAT_STATEMENTS_COLS_V1_1:
1679 : /* pg_stat_statements() should have told us 1.0 */
1680 0 : if (api_version != PGSS_V1_0)
1681 0 : elog(ERROR, "incorrect number of output arguments");
1682 0 : api_version = PGSS_V1_1;
1683 0 : break;
1684 0 : case PG_STAT_STATEMENTS_COLS_V1_2:
1685 0 : if (api_version != PGSS_V1_2)
1686 0 : elog(ERROR, "incorrect number of output arguments");
1687 0 : break;
1688 2 : case PG_STAT_STATEMENTS_COLS_V1_3:
1689 2 : if (api_version != PGSS_V1_3)
1690 0 : elog(ERROR, "incorrect number of output arguments");
1691 2 : break;
1692 2 : case PG_STAT_STATEMENTS_COLS_V1_8:
1693 2 : if (api_version != PGSS_V1_8)
1694 0 : elog(ERROR, "incorrect number of output arguments");
1695 2 : break;
1696 2 : case PG_STAT_STATEMENTS_COLS_V1_9:
1697 2 : if (api_version != PGSS_V1_9)
1698 0 : elog(ERROR, "incorrect number of output arguments");
1699 2 : break;
1700 2 : case PG_STAT_STATEMENTS_COLS_V1_10:
1701 2 : if (api_version != PGSS_V1_10)
1702 0 : elog(ERROR, "incorrect number of output arguments");
1703 2 : break;
1704 116 : case PG_STAT_STATEMENTS_COLS_V1_11:
1705 116 : if (api_version != PGSS_V1_11)
1706 0 : elog(ERROR, "incorrect number of output arguments");
1707 116 : break;
1708 0 : default:
1709 0 : elog(ERROR, "incorrect number of output arguments");
1710 : }
1711 :
1712 : /*
1713 : * We'd like to load the query text file (if needed) while not holding any
1714 : * lock on pgss->lock. In the worst case we'll have to do this again
1715 : * after we have the lock, but it's unlikely enough to make this a win
1716 : * despite occasional duplicated work. We need to reload if anybody
1717 : * writes to the file (either a retail qtext_store(), or a garbage
1718 : * collection) between this point and where we've gotten shared lock. If
1719 : * a qtext_store is actually in progress when we look, we might as well
1720 : * skip the speculative load entirely.
1721 : */
1722 124 : if (showtext)
1723 : {
1724 : int n_writers;
1725 :
1726 : /* Take the mutex so we can examine variables */
1727 : {
1728 124 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1729 :
1730 124 : SpinLockAcquire(&s->mutex);
1731 124 : extent = s->extent;
1732 124 : n_writers = s->n_writers;
1733 124 : gc_count = s->gc_count;
1734 124 : SpinLockRelease(&s->mutex);
1735 : }
1736 :
1737 : /* No point in loading file now if there are active writers */
1738 124 : if (n_writers == 0)
1739 124 : qbuffer = qtext_load_file(&qbuffer_size);
1740 : }
1741 :
1742 : /*
1743 : * Get shared lock, load or reload the query text file if we must, and
1744 : * iterate over the hashtable entries.
1745 : *
1746 : * With a large hash table, we might be holding the lock rather longer
1747 : * than one could wish. However, this only blocks creation of new hash
1748 : * table entries, and the larger the hash table the less likely that is to
1749 : * be needed. So we can hope this is okay. Perhaps someday we'll decide
1750 : * we need to partition the hash table to limit the time spent holding any
1751 : * one lock.
1752 : */
1753 124 : LWLockAcquire(pgss->lock, LW_SHARED);
1754 :
1755 124 : if (showtext)
1756 : {
1757 : /*
1758 : * Here it is safe to examine extent and gc_count without taking the
1759 : * mutex. Note that although other processes might change
1760 : * pgss->extent just after we look at it, the strings they then write
1761 : * into the file cannot yet be referenced in the hashtable, so we
1762 : * don't care whether we see them or not.
1763 : *
1764 : * If qtext_load_file fails, we just press on; we'll return NULL for
1765 : * every query text.
1766 : */
1767 124 : if (qbuffer == NULL ||
1768 124 : pgss->extent != extent ||
1769 124 : pgss->gc_count != gc_count)
1770 : {
1771 0 : free(qbuffer);
1772 0 : qbuffer = qtext_load_file(&qbuffer_size);
1773 : }
1774 : }
1775 :
1776 124 : hash_seq_init(&hash_seq, pgss_hash);
1777 51990 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
1778 : {
1779 : Datum values[PG_STAT_STATEMENTS_COLS];
1780 : bool nulls[PG_STAT_STATEMENTS_COLS];
1781 51866 : int i = 0;
1782 : Counters tmp;
1783 : double stddev;
1784 51866 : int64 queryid = entry->key.queryid;
1785 : TimestampTz stats_since;
1786 : TimestampTz minmax_stats_since;
1787 :
1788 51866 : memset(values, 0, sizeof(values));
1789 51866 : memset(nulls, 0, sizeof(nulls));
1790 :
1791 51866 : values[i++] = ObjectIdGetDatum(entry->key.userid);
1792 51866 : values[i++] = ObjectIdGetDatum(entry->key.dbid);
1793 51866 : if (api_version >= PGSS_V1_9)
1794 51840 : values[i++] = BoolGetDatum(entry->key.toplevel);
1795 :
1796 51866 : if (is_allowed_role || entry->key.userid == userid)
1797 : {
1798 51866 : if (api_version >= PGSS_V1_2)
1799 51866 : values[i++] = Int64GetDatumFast(queryid);
1800 :
1801 51866 : if (showtext)
1802 : {
1803 51866 : char *qstr = qtext_fetch(entry->query_offset,
1804 : entry->query_len,
1805 : qbuffer,
1806 : qbuffer_size);
1807 :
1808 51866 : if (qstr)
1809 : {
1810 : char *enc;
1811 :
1812 51866 : enc = pg_any_to_server(qstr,
1813 : entry->query_len,
1814 : entry->encoding);
1815 :
1816 51866 : values[i++] = CStringGetTextDatum(enc);
1817 :
1818 51866 : if (enc != qstr)
1819 0 : pfree(enc);
1820 : }
1821 : else
1822 : {
1823 : /* Just return a null if we fail to find the text */
1824 0 : nulls[i++] = true;
1825 : }
1826 : }
1827 : else
1828 : {
1829 : /* Query text not requested */
1830 0 : nulls[i++] = true;
1831 : }
1832 : }
1833 : else
1834 : {
1835 : /* Don't show queryid */
1836 0 : if (api_version >= PGSS_V1_2)
1837 0 : nulls[i++] = true;
1838 :
1839 : /*
1840 : * Don't show query text, but hint as to the reason for not doing
1841 : * so if it was requested
1842 : */
1843 0 : if (showtext)
1844 0 : values[i++] = CStringGetTextDatum("<insufficient privilege>");
1845 : else
1846 0 : nulls[i++] = true;
1847 : }
1848 :
1849 : /* copy counters to a local variable to keep locking time short */
1850 : {
1851 51866 : volatile pgssEntry *e = (volatile pgssEntry *) entry;
1852 :
1853 51866 : SpinLockAcquire(&e->mutex);
1854 51866 : tmp = e->counters;
1855 51866 : stats_since = e->stats_since;
1856 51866 : minmax_stats_since = e->minmax_stats_since;
1857 51866 : SpinLockRelease(&e->mutex);
1858 : }
1859 :
1860 : /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1861 51866 : if (IS_STICKY(tmp))
1862 1336 : continue;
1863 :
1864 : /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1865 151590 : for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1866 : {
1867 101060 : if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1868 : {
1869 101052 : values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1870 101052 : values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1871 : }
1872 :
1873 101060 : if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1874 : api_version >= PGSS_V1_8)
1875 : {
1876 101052 : values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1877 101052 : values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1878 101052 : values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1879 :
1880 : /*
1881 : * Note we are calculating the population variance here, not
1882 : * the sample variance, as we have data for the whole
1883 : * population, so Bessel's correction is not used, and we
1884 : * don't divide by tmp.calls - 1.
1885 : */
1886 101052 : if (tmp.calls[kind] > 1)
1887 9018 : stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1888 : else
1889 92034 : stddev = 0.0;
1890 101052 : values[i++] = Float8GetDatumFast(stddev);
1891 : }
1892 : }
1893 50530 : values[i++] = Int64GetDatumFast(tmp.rows);
1894 50530 : values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1895 50530 : values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1896 50530 : if (api_version >= PGSS_V1_1)
1897 50530 : values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1898 50530 : values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1899 50530 : values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1900 50530 : values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1901 50530 : if (api_version >= PGSS_V1_1)
1902 50530 : values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1903 50530 : values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1904 50530 : values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1905 50530 : values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1906 50530 : if (api_version >= PGSS_V1_1)
1907 : {
1908 50530 : values[i++] = Float8GetDatumFast(tmp.shared_blk_read_time);
1909 50530 : values[i++] = Float8GetDatumFast(tmp.shared_blk_write_time);
1910 : }
1911 50530 : if (api_version >= PGSS_V1_11)
1912 : {
1913 50468 : values[i++] = Float8GetDatumFast(tmp.local_blk_read_time);
1914 50468 : values[i++] = Float8GetDatumFast(tmp.local_blk_write_time);
1915 : }
1916 50530 : if (api_version >= PGSS_V1_10)
1917 : {
1918 50490 : values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
1919 50490 : values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
1920 : }
1921 50530 : if (api_version >= PGSS_V1_8)
1922 : {
1923 : char buf[256];
1924 : Datum wal_bytes;
1925 :
1926 50522 : values[i++] = Int64GetDatumFast(tmp.wal_records);
1927 50522 : values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1928 :
1929 50522 : snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1930 :
1931 : /* Convert to numeric. */
1932 50522 : wal_bytes = DirectFunctionCall3(numeric_in,
1933 : CStringGetDatum(buf),
1934 : ObjectIdGetDatum(0),
1935 : Int32GetDatum(-1));
1936 50522 : values[i++] = wal_bytes;
1937 : }
1938 50530 : if (api_version >= PGSS_V1_10)
1939 : {
1940 50490 : values[i++] = Int64GetDatumFast(tmp.jit_functions);
1941 50490 : values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
1942 50490 : values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
1943 50490 : values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
1944 50490 : values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
1945 50490 : values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
1946 50490 : values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
1947 50490 : values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
1948 : }
1949 50530 : if (api_version >= PGSS_V1_11)
1950 : {
1951 50468 : values[i++] = Int64GetDatumFast(tmp.jit_deform_count);
1952 50468 : values[i++] = Float8GetDatumFast(tmp.jit_deform_time);
1953 50468 : values[i++] = TimestampTzGetDatum(stats_since);
1954 50468 : values[i++] = TimestampTzGetDatum(minmax_stats_since);
1955 : }
1956 :
1957 : Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1958 : api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1959 : api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1960 : api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1961 : api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
1962 : api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
1963 : api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
1964 : api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
1965 : -1 /* fail if you forget to update this assert */ ));
1966 :
1967 50530 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1968 : }
1969 :
1970 124 : LWLockRelease(pgss->lock);
1971 :
1972 124 : free(qbuffer);
1973 124 : }
1974 :
1975 : /* Number of output arguments (columns) for pg_stat_statements_info */
1976 : #define PG_STAT_STATEMENTS_INFO_COLS 2
1977 :
1978 : /*
1979 : * Return statistics of pg_stat_statements.
1980 : */
1981 : Datum
1982 4 : pg_stat_statements_info(PG_FUNCTION_ARGS)
1983 : {
1984 : pgssGlobalStats stats;
1985 : TupleDesc tupdesc;
1986 4 : Datum values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
1987 4 : bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
1988 :
1989 4 : if (!pgss || !pgss_hash)
1990 0 : ereport(ERROR,
1991 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1992 : errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1993 :
1994 : /* Build a tuple descriptor for our result type */
1995 4 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1996 0 : elog(ERROR, "return type must be a row type");
1997 :
1998 : /* Read global statistics for pg_stat_statements */
1999 : {
2000 4 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2001 :
2002 4 : SpinLockAcquire(&s->mutex);
2003 4 : stats = s->stats;
2004 4 : SpinLockRelease(&s->mutex);
2005 : }
2006 :
2007 4 : values[0] = Int64GetDatum(stats.dealloc);
2008 4 : values[1] = TimestampTzGetDatum(stats.stats_reset);
2009 :
2010 4 : PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
2011 : }
2012 :
2013 : /*
2014 : * Estimate shared memory space needed.
2015 : */
2016 : static Size
2017 12 : pgss_memsize(void)
2018 : {
2019 : Size size;
2020 :
2021 12 : size = MAXALIGN(sizeof(pgssSharedState));
2022 12 : size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2023 :
2024 12 : return size;
2025 : }
2026 :
2027 : /*
2028 : * Allocate a new hashtable entry.
2029 : * caller must hold an exclusive lock on pgss->lock
2030 : *
2031 : * "query" need not be null-terminated; we rely on query_len instead
2032 : *
2033 : * If "sticky" is true, make the new entry artificially sticky so that it will
2034 : * probably still be there when the query finishes execution. We do this by
2035 : * giving it a median usage value rather than the normal value. (Strictly
2036 : * speaking, query strings are normalized on a best effort basis, though it
2037 : * would be difficult to demonstrate this even under artificial conditions.)
2038 : *
2039 : * Note: despite needing exclusive lock, it's not an error for the target
2040 : * entry to already exist. This is because pgss_store releases and
2041 : * reacquires lock after failing to find a match; so someone else could
2042 : * have made the entry while we waited to get exclusive lock.
2043 : */
2044 : static pgssEntry *
2045 53318 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2046 : bool sticky)
2047 : {
2048 : pgssEntry *entry;
2049 : bool found;
2050 :
2051 : /* Make space if needed */
2052 53318 : while (hash_get_num_entries(pgss_hash) >= pgss_max)
2053 0 : entry_dealloc();
2054 :
2055 : /* Find or create an entry with desired hash code */
2056 53318 : entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2057 :
2058 53318 : if (!found)
2059 : {
2060 : /* New entry, initialize it */
2061 :
2062 : /* reset the statistics */
2063 53318 : memset(&entry->counters, 0, sizeof(Counters));
2064 : /* set the appropriate initial usage count */
2065 53318 : entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2066 : /* re-initialize the mutex each time ... we assume no one using it */
2067 53318 : SpinLockInit(&entry->mutex);
2068 : /* ... and don't forget the query text metadata */
2069 : Assert(query_len >= 0);
2070 53318 : entry->query_offset = query_offset;
2071 53318 : entry->query_len = query_len;
2072 53318 : entry->encoding = encoding;
2073 53318 : entry->stats_since = GetCurrentTimestamp();
2074 53318 : entry->minmax_stats_since = entry->stats_since;
2075 : }
2076 :
2077 53318 : return entry;
2078 : }
2079 :
2080 : /*
2081 : * qsort comparator for sorting into increasing usage order
2082 : */
2083 : static int
2084 0 : entry_cmp(const void *lhs, const void *rhs)
2085 : {
2086 0 : double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2087 0 : double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2088 :
2089 0 : if (l_usage < r_usage)
2090 0 : return -1;
2091 0 : else if (l_usage > r_usage)
2092 0 : return +1;
2093 : else
2094 0 : return 0;
2095 : }
2096 :
2097 : /*
2098 : * Deallocate least-used entries.
2099 : *
2100 : * Caller must hold an exclusive lock on pgss->lock.
2101 : */
2102 : static void
2103 0 : entry_dealloc(void)
2104 : {
2105 : HASH_SEQ_STATUS hash_seq;
2106 : pgssEntry **entries;
2107 : pgssEntry *entry;
2108 : int nvictims;
2109 : int i;
2110 : Size tottextlen;
2111 : int nvalidtexts;
2112 :
2113 : /*
2114 : * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2115 : * While we're scanning the table, apply the decay factor to the usage
2116 : * values, and update the mean query length.
2117 : *
2118 : * Note that the mean query length is almost immediately obsolete, since
2119 : * we compute it before not after discarding the least-used entries.
2120 : * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2121 : * making two passes to get a more current result. Likewise, the new
2122 : * cur_median_usage includes the entries we're about to zap.
2123 : */
2124 :
2125 0 : entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2126 :
2127 0 : i = 0;
2128 0 : tottextlen = 0;
2129 0 : nvalidtexts = 0;
2130 :
2131 0 : hash_seq_init(&hash_seq, pgss_hash);
2132 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2133 : {
2134 0 : entries[i++] = entry;
2135 : /* "Sticky" entries get a different usage decay rate. */
2136 0 : if (IS_STICKY(entry->counters))
2137 0 : entry->counters.usage *= STICKY_DECREASE_FACTOR;
2138 : else
2139 0 : entry->counters.usage *= USAGE_DECREASE_FACTOR;
2140 : /* In the mean length computation, ignore dropped texts. */
2141 0 : if (entry->query_len >= 0)
2142 : {
2143 0 : tottextlen += entry->query_len + 1;
2144 0 : nvalidtexts++;
2145 : }
2146 : }
2147 :
2148 : /* Sort into increasing order by usage */
2149 0 : qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2150 :
2151 : /* Record the (approximate) median usage */
2152 0 : if (i > 0)
2153 0 : pgss->cur_median_usage = entries[i / 2]->counters.usage;
2154 : /* Record the mean query length */
2155 0 : if (nvalidtexts > 0)
2156 0 : pgss->mean_query_len = tottextlen / nvalidtexts;
2157 : else
2158 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2159 :
2160 : /* Now zap an appropriate fraction of lowest-usage entries */
2161 0 : nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2162 0 : nvictims = Min(nvictims, i);
2163 :
2164 0 : for (i = 0; i < nvictims; i++)
2165 : {
2166 0 : hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2167 : }
2168 :
2169 0 : pfree(entries);
2170 :
2171 : /* Increment the number of times entries are deallocated */
2172 : {
2173 0 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2174 :
2175 0 : SpinLockAcquire(&s->mutex);
2176 0 : s->stats.dealloc += 1;
2177 0 : SpinLockRelease(&s->mutex);
2178 : }
2179 0 : }
2180 :
2181 : /*
2182 : * Given a query string (not necessarily null-terminated), allocate a new
2183 : * entry in the external query text file and store the string there.
2184 : *
2185 : * If successful, returns true, and stores the new entry's offset in the file
2186 : * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2187 : * number of garbage collections that have occurred so far.
2188 : *
2189 : * On failure, returns false.
2190 : *
2191 : * At least a shared lock on pgss->lock must be held by the caller, so as
2192 : * to prevent a concurrent garbage collection. Share-lock-holding callers
2193 : * should pass a gc_count pointer to obtain the number of garbage collections,
2194 : * so that they can recheck the count after obtaining exclusive lock to
2195 : * detect whether a garbage collection occurred (and removed this entry).
2196 : */
2197 : static bool
2198 53310 : qtext_store(const char *query, int query_len,
2199 : Size *query_offset, int *gc_count)
2200 : {
2201 : Size off;
2202 : int fd;
2203 :
2204 : /*
2205 : * We use a spinlock to protect extent/n_writers/gc_count, so that
2206 : * multiple processes may execute this function concurrently.
2207 : */
2208 : {
2209 53310 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2210 :
2211 53310 : SpinLockAcquire(&s->mutex);
2212 53310 : off = s->extent;
2213 53310 : s->extent += query_len + 1;
2214 53310 : s->n_writers++;
2215 53310 : if (gc_count)
2216 53310 : *gc_count = s->gc_count;
2217 53310 : SpinLockRelease(&s->mutex);
2218 : }
2219 :
2220 53310 : *query_offset = off;
2221 :
2222 : /*
2223 : * Don't allow the file to grow larger than what qtext_load_file can
2224 : * (theoretically) handle. This has been seen to be reachable on 32-bit
2225 : * platforms.
2226 : */
2227 53310 : if (unlikely(query_len >= MaxAllocHugeSize - off))
2228 : {
2229 0 : errno = EFBIG; /* not quite right, but it'll do */
2230 0 : fd = -1;
2231 0 : goto error;
2232 : }
2233 :
2234 : /* Now write the data into the successfully-reserved part of the file */
2235 53310 : fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2236 53310 : if (fd < 0)
2237 0 : goto error;
2238 :
2239 53310 : if (pg_pwrite(fd, query, query_len, off) != query_len)
2240 0 : goto error;
2241 53310 : if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2242 0 : goto error;
2243 :
2244 53310 : CloseTransientFile(fd);
2245 :
2246 : /* Mark our write complete */
2247 : {
2248 53310 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2249 :
2250 53310 : SpinLockAcquire(&s->mutex);
2251 53310 : s->n_writers--;
2252 53310 : SpinLockRelease(&s->mutex);
2253 : }
2254 :
2255 53310 : return true;
2256 :
2257 0 : error:
2258 0 : ereport(LOG,
2259 : (errcode_for_file_access(),
2260 : errmsg("could not write file \"%s\": %m",
2261 : PGSS_TEXT_FILE)));
2262 :
2263 0 : if (fd >= 0)
2264 0 : CloseTransientFile(fd);
2265 :
2266 : /* Mark our write complete */
2267 : {
2268 0 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2269 :
2270 0 : SpinLockAcquire(&s->mutex);
2271 0 : s->n_writers--;
2272 0 : SpinLockRelease(&s->mutex);
2273 : }
2274 :
2275 0 : return false;
2276 : }
2277 :
2278 : /*
2279 : * Read the external query text file into a malloc'd buffer.
2280 : *
2281 : * Returns NULL (without throwing an error) if unable to read, eg
2282 : * file not there or insufficient memory.
2283 : *
2284 : * On success, the buffer size is also returned into *buffer_size.
2285 : *
2286 : * This can be called without any lock on pgss->lock, but in that case
2287 : * the caller is responsible for verifying that the result is sane.
2288 : */
2289 : static char *
2290 132 : qtext_load_file(Size *buffer_size)
2291 : {
2292 : char *buf;
2293 : int fd;
2294 : struct stat stat;
2295 : Size nread;
2296 :
2297 132 : fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
2298 132 : if (fd < 0)
2299 : {
2300 0 : if (errno != ENOENT)
2301 0 : ereport(LOG,
2302 : (errcode_for_file_access(),
2303 : errmsg("could not read file \"%s\": %m",
2304 : PGSS_TEXT_FILE)));
2305 0 : return NULL;
2306 : }
2307 :
2308 : /* Get file length */
2309 132 : if (fstat(fd, &stat))
2310 : {
2311 0 : ereport(LOG,
2312 : (errcode_for_file_access(),
2313 : errmsg("could not stat file \"%s\": %m",
2314 : PGSS_TEXT_FILE)));
2315 0 : CloseTransientFile(fd);
2316 0 : return NULL;
2317 : }
2318 :
2319 : /* Allocate buffer; beware that off_t might be wider than size_t */
2320 132 : if (stat.st_size <= MaxAllocHugeSize)
2321 132 : buf = (char *) malloc(stat.st_size);
2322 : else
2323 0 : buf = NULL;
2324 132 : if (buf == NULL)
2325 : {
2326 0 : ereport(LOG,
2327 : (errcode(ERRCODE_OUT_OF_MEMORY),
2328 : errmsg("out of memory"),
2329 : errdetail("Could not allocate enough memory to read file \"%s\".",
2330 : PGSS_TEXT_FILE)));
2331 0 : CloseTransientFile(fd);
2332 0 : return NULL;
2333 : }
2334 :
2335 : /*
2336 : * OK, slurp in the file. Windows fails if we try to read more than
2337 : * INT_MAX bytes at once, and other platforms might not like that either,
2338 : * so read a very large file in 1GB segments.
2339 : */
2340 132 : nread = 0;
2341 262 : while (nread < stat.st_size)
2342 : {
2343 130 : int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2344 :
2345 : /*
2346 : * If we get a short read and errno doesn't get set, the reason is
2347 : * probably that garbage collection truncated the file since we did
2348 : * the fstat(), so we don't log a complaint --- but we don't return
2349 : * the data, either, since it's most likely corrupt due to concurrent
2350 : * writes from garbage collection.
2351 : */
2352 130 : errno = 0;
2353 130 : if (read(fd, buf + nread, toread) != toread)
2354 : {
2355 0 : if (errno)
2356 0 : ereport(LOG,
2357 : (errcode_for_file_access(),
2358 : errmsg("could not read file \"%s\": %m",
2359 : PGSS_TEXT_FILE)));
2360 0 : free(buf);
2361 0 : CloseTransientFile(fd);
2362 0 : return NULL;
2363 : }
2364 130 : nread += toread;
2365 : }
2366 :
2367 132 : if (CloseTransientFile(fd) != 0)
2368 0 : ereport(LOG,
2369 : (errcode_for_file_access(),
2370 : errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2371 :
2372 132 : *buffer_size = nread;
2373 132 : return buf;
2374 : }
2375 :
2376 : /*
2377 : * Locate a query text in the file image previously read by qtext_load_file().
2378 : *
2379 : * We validate the given offset/length, and return NULL if bogus. Otherwise,
2380 : * the result points to a null-terminated string within the buffer.
2381 : */
2382 : static char *
2383 104438 : qtext_fetch(Size query_offset, int query_len,
2384 : char *buffer, Size buffer_size)
2385 : {
2386 : /* File read failed? */
2387 104438 : if (buffer == NULL)
2388 0 : return NULL;
2389 : /* Bogus offset/length? */
2390 104438 : if (query_len < 0 ||
2391 104438 : query_offset + query_len >= buffer_size)
2392 0 : return NULL;
2393 : /* As a further sanity check, make sure there's a trailing null */
2394 104438 : if (buffer[query_offset + query_len] != '\0')
2395 0 : return NULL;
2396 : /* Looks OK */
2397 104438 : return buffer + query_offset;
2398 : }
2399 :
2400 : /*
2401 : * Do we need to garbage-collect the external query text file?
2402 : *
2403 : * Caller should hold at least a shared lock on pgss->lock.
2404 : */
2405 : static bool
2406 53310 : need_gc_qtexts(void)
2407 : {
2408 : Size extent;
2409 :
2410 : /* Read shared extent pointer */
2411 : {
2412 53310 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2413 :
2414 53310 : SpinLockAcquire(&s->mutex);
2415 53310 : extent = s->extent;
2416 53310 : SpinLockRelease(&s->mutex);
2417 : }
2418 :
2419 : /*
2420 : * Don't proceed if file does not exceed 512 bytes per possible entry.
2421 : *
2422 : * Here and in the next test, 32-bit machines have overflow hazards if
2423 : * pgss_max and/or mean_query_len are large. Force the multiplications
2424 : * and comparisons to be done in uint64 arithmetic to forestall trouble.
2425 : */
2426 53310 : if ((uint64) extent < (uint64) 512 * pgss_max)
2427 53310 : return false;
2428 :
2429 : /*
2430 : * Don't proceed if file is less than about 50% bloat. Nothing can or
2431 : * should be done in the event of unusually large query texts accounting
2432 : * for file's large size. We go to the trouble of maintaining the mean
2433 : * query length in order to prevent garbage collection from thrashing
2434 : * uselessly.
2435 : */
2436 0 : if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2437 0 : return false;
2438 :
2439 0 : return true;
2440 : }
2441 :
2442 : /*
2443 : * Garbage-collect orphaned query texts in external file.
2444 : *
2445 : * This won't be called often in the typical case, since it's likely that
2446 : * there won't be too much churn, and besides, a similar compaction process
2447 : * occurs when serializing to disk at shutdown or as part of resetting.
2448 : * Despite this, it seems prudent to plan for the edge case where the file
2449 : * becomes unreasonably large, with no other method of compaction likely to
2450 : * occur in the foreseeable future.
2451 : *
2452 : * The caller must hold an exclusive lock on pgss->lock.
2453 : *
2454 : * At the first sign of trouble we unlink the query text file to get a clean
2455 : * slate (although existing statistics are retained), rather than risk
2456 : * thrashing by allowing the same problem case to recur indefinitely.
2457 : */
2458 : static void
2459 0 : gc_qtexts(void)
2460 : {
2461 : char *qbuffer;
2462 : Size qbuffer_size;
2463 0 : FILE *qfile = NULL;
2464 : HASH_SEQ_STATUS hash_seq;
2465 : pgssEntry *entry;
2466 : Size extent;
2467 : int nentries;
2468 :
2469 : /*
2470 : * When called from pgss_store, some other session might have proceeded
2471 : * with garbage collection in the no-lock-held interim of lock strength
2472 : * escalation. Check once more that this is actually necessary.
2473 : */
2474 0 : if (!need_gc_qtexts())
2475 0 : return;
2476 :
2477 : /*
2478 : * Load the old texts file. If we fail (out of memory, for instance),
2479 : * invalidate query texts. Hopefully this is rare. It might seem better
2480 : * to leave things alone on an OOM failure, but the problem is that the
2481 : * file is only going to get bigger; hoping for a future non-OOM result is
2482 : * risky and can easily lead to complete denial of service.
2483 : */
2484 0 : qbuffer = qtext_load_file(&qbuffer_size);
2485 0 : if (qbuffer == NULL)
2486 0 : goto gc_fail;
2487 :
2488 : /*
2489 : * We overwrite the query texts file in place, so as to reduce the risk of
2490 : * an out-of-disk-space failure. Since the file is guaranteed not to get
2491 : * larger, this should always work on traditional filesystems; though we
2492 : * could still lose on copy-on-write filesystems.
2493 : */
2494 0 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2495 0 : if (qfile == NULL)
2496 : {
2497 0 : ereport(LOG,
2498 : (errcode_for_file_access(),
2499 : errmsg("could not write file \"%s\": %m",
2500 : PGSS_TEXT_FILE)));
2501 0 : goto gc_fail;
2502 : }
2503 :
2504 0 : extent = 0;
2505 0 : nentries = 0;
2506 :
2507 0 : hash_seq_init(&hash_seq, pgss_hash);
2508 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2509 : {
2510 0 : int query_len = entry->query_len;
2511 0 : char *qry = qtext_fetch(entry->query_offset,
2512 : query_len,
2513 : qbuffer,
2514 : qbuffer_size);
2515 :
2516 0 : if (qry == NULL)
2517 : {
2518 : /* Trouble ... drop the text */
2519 0 : entry->query_offset = 0;
2520 0 : entry->query_len = -1;
2521 : /* entry will not be counted in mean query length computation */
2522 0 : continue;
2523 : }
2524 :
2525 0 : if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2526 : {
2527 0 : ereport(LOG,
2528 : (errcode_for_file_access(),
2529 : errmsg("could not write file \"%s\": %m",
2530 : PGSS_TEXT_FILE)));
2531 0 : hash_seq_term(&hash_seq);
2532 0 : goto gc_fail;
2533 : }
2534 :
2535 0 : entry->query_offset = extent;
2536 0 : extent += query_len + 1;
2537 0 : nentries++;
2538 : }
2539 :
2540 : /*
2541 : * Truncate away any now-unused space. If this fails for some odd reason,
2542 : * we log it, but there's no need to fail.
2543 : */
2544 0 : if (ftruncate(fileno(qfile), extent) != 0)
2545 0 : ereport(LOG,
2546 : (errcode_for_file_access(),
2547 : errmsg("could not truncate file \"%s\": %m",
2548 : PGSS_TEXT_FILE)));
2549 :
2550 0 : if (FreeFile(qfile))
2551 : {
2552 0 : ereport(LOG,
2553 : (errcode_for_file_access(),
2554 : errmsg("could not write file \"%s\": %m",
2555 : PGSS_TEXT_FILE)));
2556 0 : qfile = NULL;
2557 0 : goto gc_fail;
2558 : }
2559 :
2560 0 : elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2561 : pgss->extent, extent);
2562 :
2563 : /* Reset the shared extent pointer */
2564 0 : pgss->extent = extent;
2565 :
2566 : /*
2567 : * Also update the mean query length, to be sure that need_gc_qtexts()
2568 : * won't still think we have a problem.
2569 : */
2570 0 : if (nentries > 0)
2571 0 : pgss->mean_query_len = extent / nentries;
2572 : else
2573 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2574 :
2575 0 : free(qbuffer);
2576 :
2577 : /*
2578 : * OK, count a garbage collection cycle. (Note: even though we have
2579 : * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2580 : * other processes may examine gc_count while holding only the mutex.
2581 : * Also, we have to advance the count *after* we've rewritten the file,
2582 : * else other processes might not realize they read a stale file.)
2583 : */
2584 0 : record_gc_qtexts();
2585 :
2586 0 : return;
2587 :
2588 0 : gc_fail:
2589 : /* clean up resources */
2590 0 : if (qfile)
2591 0 : FreeFile(qfile);
2592 0 : free(qbuffer);
2593 :
2594 : /*
2595 : * Since the contents of the external file are now uncertain, mark all
2596 : * hashtable entries as having invalid texts.
2597 : */
2598 0 : hash_seq_init(&hash_seq, pgss_hash);
2599 0 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2600 : {
2601 0 : entry->query_offset = 0;
2602 0 : entry->query_len = -1;
2603 : }
2604 :
2605 : /*
2606 : * Destroy the query text file and create a new, empty one
2607 : */
2608 0 : (void) unlink(PGSS_TEXT_FILE);
2609 0 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2610 0 : if (qfile == NULL)
2611 0 : ereport(LOG,
2612 : (errcode_for_file_access(),
2613 : errmsg("could not recreate file \"%s\": %m",
2614 : PGSS_TEXT_FILE)));
2615 : else
2616 0 : FreeFile(qfile);
2617 :
2618 : /* Reset the shared extent pointer */
2619 0 : pgss->extent = 0;
2620 :
2621 : /* Reset mean_query_len to match the new state */
2622 0 : pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2623 :
2624 : /*
2625 : * Bump the GC count even though we failed.
2626 : *
2627 : * This is needed to make concurrent readers of file without any lock on
2628 : * pgss->lock notice existence of new version of file. Once readers
2629 : * subsequently observe a change in GC count with pgss->lock held, that
2630 : * forces a safe reopen of file. Writers also require that we bump here,
2631 : * of course. (As required by locking protocol, readers and writers don't
2632 : * trust earlier file contents until gc_count is found unchanged after
2633 : * pgss->lock acquired in shared or exclusive mode respectively.)
2634 : */
2635 0 : record_gc_qtexts();
2636 : }
2637 :
2638 : #define SINGLE_ENTRY_RESET(e) \
2639 : if (e) { \
2640 : if (minmax_only) { \
2641 : /* When requested reset only min/max statistics of an entry */ \
2642 : for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2643 : { \
2644 : e->counters.max_time[kind] = 0; \
2645 : e->counters.min_time[kind] = 0; \
2646 : } \
2647 : e->minmax_stats_since = stats_reset; \
2648 : } \
2649 : else \
2650 : { \
2651 : /* Remove the key otherwise */ \
2652 : hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2653 : num_remove++; \
2654 : } \
2655 : }
2656 :
2657 : /*
2658 : * Reset entries corresponding to parameters passed.
2659 : */
2660 : static TimestampTz
2661 102 : entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
2662 : {
2663 : HASH_SEQ_STATUS hash_seq;
2664 : pgssEntry *entry;
2665 : FILE *qfile;
2666 : long num_entries;
2667 102 : long num_remove = 0;
2668 : pgssHashKey key;
2669 : TimestampTz stats_reset;
2670 :
2671 102 : if (!pgss || !pgss_hash)
2672 0 : ereport(ERROR,
2673 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2674 : errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
2675 :
2676 102 : LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
2677 102 : num_entries = hash_get_num_entries(pgss_hash);
2678 :
2679 102 : stats_reset = GetCurrentTimestamp();
2680 :
2681 102 : if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2682 : {
2683 : /* If all the parameters are available, use the fast path. */
2684 2 : memset(&key, 0, sizeof(pgssHashKey));
2685 2 : key.userid = userid;
2686 2 : key.dbid = dbid;
2687 2 : key.queryid = queryid;
2688 :
2689 : /*
2690 : * Reset the entry if it exists, starting with the non-top-level
2691 : * entry.
2692 : */
2693 2 : key.toplevel = false;
2694 2 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2695 :
2696 2 : SINGLE_ENTRY_RESET(entry);
2697 :
2698 : /* Also reset the top-level entry if it exists. */
2699 2 : key.toplevel = true;
2700 2 : entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2701 :
2702 2 : SINGLE_ENTRY_RESET(entry);
2703 : }
2704 100 : else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2705 : {
2706 : /* Reset entries corresponding to valid parameters. */
2707 8 : hash_seq_init(&hash_seq, pgss_hash);
2708 96 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2709 : {
2710 88 : if ((!userid || entry->key.userid == userid) &&
2711 68 : (!dbid || entry->key.dbid == dbid) &&
2712 64 : (!queryid || entry->key.queryid == queryid))
2713 : {
2714 14 : SINGLE_ENTRY_RESET(entry);
2715 : }
2716 : }
2717 : }
2718 : else
2719 : {
2720 : /* Reset all entries. */
2721 92 : hash_seq_init(&hash_seq, pgss_hash);
2722 840 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2723 : {
2724 792 : SINGLE_ENTRY_RESET(entry);
2725 : }
2726 : }
2727 :
2728 : /* All entries are removed? */
2729 102 : if (num_entries != num_remove)
2730 12 : goto release_lock;
2731 :
2732 : /*
2733 : * Reset global statistics for pg_stat_statements since all entries are
2734 : * removed.
2735 : */
2736 : {
2737 90 : volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2738 :
2739 90 : SpinLockAcquire(&s->mutex);
2740 90 : s->stats.dealloc = 0;
2741 90 : s->stats.stats_reset = stats_reset;
2742 90 : SpinLockRelease(&s->mutex);
2743 : }
2744 :
2745 : /*
2746 : * Write new empty query file, perhaps even creating a new one to recover
2747 : * if the file was missing.
2748 : */
2749 90 : qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2750 90 : if (qfile == NULL)
2751 : {
2752 0 : ereport(LOG,
2753 : (errcode_for_file_access(),
2754 : errmsg("could not create file \"%s\": %m",
2755 : PGSS_TEXT_FILE)));
2756 0 : goto done;
2757 : }
2758 :
2759 : /* If ftruncate fails, log it, but it's not a fatal problem */
2760 90 : if (ftruncate(fileno(qfile), 0) != 0)
2761 0 : ereport(LOG,
2762 : (errcode_for_file_access(),
2763 : errmsg("could not truncate file \"%s\": %m",
2764 : PGSS_TEXT_FILE)));
2765 :
2766 90 : FreeFile(qfile);
2767 :
2768 90 : done:
2769 90 : pgss->extent = 0;
2770 : /* This counts as a query text garbage collection for our purposes */
2771 90 : record_gc_qtexts();
2772 :
2773 102 : release_lock:
2774 102 : LWLockRelease(pgss->lock);
2775 :
2776 102 : return stats_reset;
2777 : }
2778 :
2779 : /*
2780 : * Generate a normalized version of the query string that will be used to
2781 : * represent all similar queries.
2782 : *
2783 : * Note that the normalized representation may well vary depending on
2784 : * just which "equivalent" query is used to create the hashtable entry.
2785 : * We assume this is OK.
2786 : *
2787 : * If query_loc > 0, then "query" has been advanced by that much compared to
2788 : * the original string start, so we need to translate the provided locations
2789 : * to compensate. (This lets us avoid re-scanning statements before the one
2790 : * of interest, so it's worth doing.)
2791 : *
2792 : * *query_len_p contains the input string length, and is updated with
2793 : * the result string length on exit. The resulting string might be longer
2794 : * or shorter depending on what happens with replacement of constants.
2795 : *
2796 : * Returns a palloc'd string.
2797 : */
2798 : static char *
2799 19312 : generate_normalized_query(JumbleState *jstate, const char *query,
2800 : int query_loc, int *query_len_p)
2801 : {
2802 : char *norm_query;
2803 19312 : int query_len = *query_len_p;
2804 : int i,
2805 : norm_query_buflen, /* Space allowed for norm_query */
2806 : len_to_wrt, /* Length (in bytes) to write */
2807 19312 : quer_loc = 0, /* Source query byte location */
2808 19312 : n_quer_loc = 0, /* Normalized query byte location */
2809 19312 : last_off = 0, /* Offset from start for previous tok */
2810 19312 : last_tok_len = 0; /* Length (in bytes) of that tok */
2811 :
2812 : /*
2813 : * Get constants' lengths (core system only gives us locations). Note
2814 : * this also ensures the items are sorted by location.
2815 : */
2816 19312 : fill_in_constant_lengths(jstate, query, query_loc);
2817 :
2818 : /*
2819 : * Allow for $n symbols to be longer than the constants they replace.
2820 : * Constants must take at least one byte in text form, while a $n symbol
2821 : * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2822 : * could refine that limit based on the max value of n for the current
2823 : * query, but it hardly seems worth any extra effort to do so.
2824 : */
2825 19312 : norm_query_buflen = query_len + jstate->clocations_count * 10;
2826 :
2827 : /* Allocate result buffer */
2828 19312 : norm_query = palloc(norm_query_buflen + 1);
2829 :
2830 80230 : for (i = 0; i < jstate->clocations_count; i++)
2831 : {
2832 : int off, /* Offset from start for cur tok */
2833 : tok_len; /* Length (in bytes) of that tok */
2834 :
2835 60918 : off = jstate->clocations[i].location;
2836 : /* Adjust recorded location if we're dealing with partial string */
2837 60918 : off -= query_loc;
2838 :
2839 60918 : tok_len = jstate->clocations[i].length;
2840 :
2841 60918 : if (tok_len < 0)
2842 432 : continue; /* ignore any duplicates */
2843 :
2844 : /* Copy next chunk (what precedes the next constant) */
2845 60486 : len_to_wrt = off - last_off;
2846 60486 : len_to_wrt -= last_tok_len;
2847 :
2848 : Assert(len_to_wrt >= 0);
2849 60486 : memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2850 60486 : n_quer_loc += len_to_wrt;
2851 :
2852 : /* And insert a param symbol in place of the constant token */
2853 120972 : n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2854 60486 : i + 1 + jstate->highest_extern_param_id);
2855 :
2856 60486 : quer_loc = off + tok_len;
2857 60486 : last_off = off;
2858 60486 : last_tok_len = tok_len;
2859 : }
2860 :
2861 : /*
2862 : * We've copied up until the last ignorable constant. Copy over the
2863 : * remaining bytes of the original query string.
2864 : */
2865 19312 : len_to_wrt = query_len - quer_loc;
2866 :
2867 : Assert(len_to_wrt >= 0);
2868 19312 : memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2869 19312 : n_quer_loc += len_to_wrt;
2870 :
2871 : Assert(n_quer_loc <= norm_query_buflen);
2872 19312 : norm_query[n_quer_loc] = '\0';
2873 :
2874 19312 : *query_len_p = n_quer_loc;
2875 19312 : return norm_query;
2876 : }
2877 :
2878 : /*
2879 : * Given a valid SQL string and an array of constant-location records,
2880 : * fill in the textual lengths of those constants.
2881 : *
2882 : * The constants may use any allowed constant syntax, such as float literals,
2883 : * bit-strings, single-quoted strings and dollar-quoted strings. This is
2884 : * accomplished by using the public API for the core scanner.
2885 : *
2886 : * It is the caller's job to ensure that the string is a valid SQL statement
2887 : * with constants at the indicated locations. Since in practice the string
2888 : * has already been parsed, and the locations that the caller provides will
2889 : * have originated from within the authoritative parser, this should not be
2890 : * a problem.
2891 : *
2892 : * Duplicate constant pointers are possible, and will have their lengths
2893 : * marked as '-1', so that they are later ignored. (Actually, we assume the
2894 : * lengths were initialized as -1 to start with, and don't change them here.)
2895 : *
2896 : * If query_loc > 0, then "query" has been advanced by that much compared to
2897 : * the original string start, so we need to translate the provided locations
2898 : * to compensate. (This lets us avoid re-scanning statements before the one
2899 : * of interest, so it's worth doing.)
2900 : *
2901 : * N.B. There is an assumption that a '-' character at a Const location begins
2902 : * a negative numeric constant. This precludes there ever being another
2903 : * reason for a constant to start with a '-'.
2904 : */
2905 : static void
2906 19312 : fill_in_constant_lengths(JumbleState *jstate, const char *query,
2907 : int query_loc)
2908 : {
2909 : LocationLen *locs;
2910 : core_yyscan_t yyscanner;
2911 : core_yy_extra_type yyextra;
2912 : core_YYSTYPE yylval;
2913 : YYLTYPE yylloc;
2914 19312 : int last_loc = -1;
2915 : int i;
2916 :
2917 : /*
2918 : * Sort the records by location so that we can process them in order while
2919 : * scanning the query text.
2920 : */
2921 19312 : if (jstate->clocations_count > 1)
2922 12812 : qsort(jstate->clocations, jstate->clocations_count,
2923 : sizeof(LocationLen), comp_location);
2924 19312 : locs = jstate->clocations;
2925 :
2926 : /* initialize the flex scanner --- should match raw_parser() */
2927 19312 : yyscanner = scanner_init(query,
2928 : &yyextra,
2929 : &ScanKeywords,
2930 : ScanKeywordTokens);
2931 :
2932 : /* we don't want to re-emit any escape string warnings */
2933 19312 : yyextra.escape_string_warning = false;
2934 :
2935 : /* Search for each constant, in sequence */
2936 80230 : for (i = 0; i < jstate->clocations_count; i++)
2937 : {
2938 60918 : int loc = locs[i].location;
2939 : int tok;
2940 :
2941 : /* Adjust recorded location if we're dealing with partial string */
2942 60918 : loc -= query_loc;
2943 :
2944 : Assert(loc >= 0);
2945 :
2946 60918 : if (loc <= last_loc)
2947 432 : continue; /* Duplicate constant, ignore */
2948 :
2949 : /* Lex tokens until we find the desired constant */
2950 : for (;;)
2951 : {
2952 455414 : tok = core_yylex(&yylval, &yylloc, yyscanner);
2953 :
2954 : /* We should not hit end-of-string, but if we do, behave sanely */
2955 455414 : if (tok == 0)
2956 0 : break; /* out of inner for-loop */
2957 :
2958 : /*
2959 : * We should find the token position exactly, but if we somehow
2960 : * run past it, work with that.
2961 : */
2962 455414 : if (yylloc >= loc)
2963 : {
2964 60486 : if (query[loc] == '-')
2965 : {
2966 : /*
2967 : * It's a negative value - this is the one and only case
2968 : * where we replace more than a single token.
2969 : *
2970 : * Do not compensate for the core system's special-case
2971 : * adjustment of location to that of the leading '-'
2972 : * operator in the event of a negative constant. It is
2973 : * also useful for our purposes to start from the minus
2974 : * symbol. In this way, queries like "select * from foo
2975 : * where bar = 1" and "select * from foo where bar = -2"
2976 : * will have identical normalized query strings.
2977 : */
2978 804 : tok = core_yylex(&yylval, &yylloc, yyscanner);
2979 804 : if (tok == 0)
2980 0 : break; /* out of inner for-loop */
2981 : }
2982 :
2983 : /*
2984 : * We now rely on the assumption that flex has placed a zero
2985 : * byte after the text of the current token in scanbuf.
2986 : */
2987 60486 : locs[i].length = strlen(yyextra.scanbuf + loc);
2988 60486 : break; /* out of inner for-loop */
2989 : }
2990 : }
2991 :
2992 : /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2993 60486 : if (tok == 0)
2994 0 : break;
2995 :
2996 60486 : last_loc = loc;
2997 : }
2998 :
2999 19312 : scanner_finish(yyscanner);
3000 19312 : }
3001 :
3002 : /*
3003 : * comp_location: comparator for qsorting LocationLen structs by location
3004 : */
3005 : static int
3006 73044 : comp_location(const void *a, const void *b)
3007 : {
3008 73044 : int l = ((const LocationLen *) a)->location;
3009 73044 : int r = ((const LocationLen *) b)->location;
3010 :
3011 73044 : return pg_cmp_s32(l, r);
3012 : }
|