Line data Source code
1 : /*
2 : * psql - the PostgreSQL interactive terminal
3 : *
4 : * Copyright (c) 2000-2026, PostgreSQL Global Development Group
5 : *
6 : * src/bin/psql/crosstabview.c
7 : */
8 : #include "postgres_fe.h"
9 :
10 : #include "common.h"
11 : #include "common/int.h"
12 : #include "common/logging.h"
13 : #include "crosstabview.h"
14 : #include "pqexpbuffer.h"
15 : #include "psqlscanslash.h"
16 : #include "settings.h"
17 :
18 : /*
19 : * Value/position from the resultset that goes into the horizontal or vertical
20 : * crosstabview header.
21 : */
22 : typedef struct _pivot_field
23 : {
24 : /*
25 : * Pointer obtained from PQgetvalue() for colV or colH. Each distinct
26 : * value becomes an entry in the vertical header (colV), or horizontal
27 : * header (colH). A Null value is represented by a NULL pointer.
28 : */
29 : char *name;
30 :
31 : /*
32 : * When a sort is requested on an alternative column, this holds
33 : * PQgetvalue() for the sort column corresponding to <name>. If <name>
34 : * appear multiple times, it's the first value in the order of the results
35 : * that is kept. A Null value is represented by a NULL pointer.
36 : */
37 : char *sort_value;
38 :
39 : /*
40 : * Rank of this value, starting at 0. Initially, it's the relative
41 : * position of the first appearance of <name> in the resultset. For
42 : * example, if successive rows contain B,A,C,A,D then it's B:0,A:1,C:2,D:3
43 : * When a sort column is specified, ranks get updated in a final pass to
44 : * reflect the desired order.
45 : */
46 : int rank;
47 : } pivot_field;
48 :
49 : /* Node in avl_tree */
50 : typedef struct _avl_node
51 : {
52 : /* Node contents */
53 : pivot_field field;
54 :
55 : /*
56 : * Height of this node in the tree (number of nodes on the longest path to
57 : * a leaf).
58 : */
59 : int height;
60 :
61 : /*
62 : * Child nodes. [0] points to left subtree, [1] to right subtree. Never
63 : * NULL, points to the empty node avl_tree.end when no left or right
64 : * value.
65 : */
66 : struct _avl_node *children[2];
67 : } avl_node;
68 :
69 : /*
70 : * Control structure for the AVL tree (binary search tree kept
71 : * balanced with the AVL algorithm)
72 : */
73 : typedef struct _avl_tree
74 : {
75 : int count; /* Total number of nodes */
76 : avl_node *root; /* root of the tree */
77 : avl_node *end; /* Immutable dereferenceable empty tree */
78 : } avl_tree;
79 :
80 :
81 : static bool printCrosstab(const PGresult *result,
82 : int num_columns, pivot_field *piv_columns, int field_for_columns,
83 : int num_rows, pivot_field *piv_rows, int field_for_rows,
84 : int field_for_data);
85 : static void avlInit(avl_tree *tree);
86 : static void avlMergeValue(avl_tree *tree, char *name, char *sort_value);
87 : static int avlCollectFields(avl_tree *tree, avl_node *node,
88 : pivot_field *fields, int idx);
89 : static void avlFree(avl_tree *tree, avl_node *node);
90 : static void rankSort(int num_columns, pivot_field *piv_columns);
91 : static int indexOfColumn(char *arg, const PGresult *res);
92 : static int pivotFieldCompare(const void *a, const void *b);
93 : static int rankCompare(const void *a, const void *b);
94 :
95 :
96 : /*
97 : * Main entry point to this module.
98 : *
99 : * Process the data from *res according to the options in pset (global),
100 : * to generate the horizontal and vertical headers contents,
101 : * then call printCrosstab() for the actual output.
102 : */
103 : bool
104 66 : PrintResultInCrosstab(const PGresult *res)
105 : {
106 66 : bool retval = false;
107 : avl_tree piv_columns;
108 : avl_tree piv_rows;
109 66 : pivot_field *array_columns = NULL;
110 66 : pivot_field *array_rows = NULL;
111 66 : int num_columns = 0;
112 66 : int num_rows = 0;
113 : int field_for_rows;
114 : int field_for_columns;
115 : int field_for_data;
116 : int sort_field_for_columns;
117 : int rn;
118 :
119 66 : avlInit(&piv_rows);
120 66 : avlInit(&piv_columns);
121 :
122 66 : if (PQresultStatus(res) != PGRES_TUPLES_OK)
123 : {
124 0 : pg_log_error("\\crosstabview: statement did not return a result set");
125 0 : goto error_return;
126 : }
127 :
128 66 : if (PQnfields(res) < 3)
129 : {
130 6 : pg_log_error("\\crosstabview: query must return at least three columns");
131 6 : goto error_return;
132 : }
133 :
134 : /* Process first optional arg (vertical header column) */
135 60 : if (pset.ctv_args[0] == NULL)
136 15 : field_for_rows = 0;
137 : else
138 : {
139 45 : field_for_rows = indexOfColumn(pset.ctv_args[0], res);
140 45 : if (field_for_rows < 0)
141 0 : goto error_return;
142 : }
143 :
144 : /* Process second optional arg (horizontal header column) */
145 60 : if (pset.ctv_args[1] == NULL)
146 15 : field_for_columns = 1;
147 : else
148 : {
149 45 : field_for_columns = indexOfColumn(pset.ctv_args[1], res);
150 45 : if (field_for_columns < 0)
151 3 : goto error_return;
152 : }
153 :
154 : /* Insist that header columns be distinct */
155 57 : if (field_for_columns == field_for_rows)
156 : {
157 3 : pg_log_error("\\crosstabview: vertical and horizontal headers must be different columns");
158 3 : goto error_return;
159 : }
160 :
161 : /* Process third optional arg (data column) */
162 54 : if (pset.ctv_args[2] == NULL)
163 : {
164 : int i;
165 :
166 : /*
167 : * If the data column was not specified, we search for the one not
168 : * used as either vertical or horizontal headers. Must be exactly
169 : * three columns, or this won't be unique.
170 : */
171 15 : if (PQnfields(res) != 3)
172 : {
173 0 : pg_log_error("\\crosstabview: data column must be specified when query returns more than three columns");
174 0 : goto error_return;
175 : }
176 :
177 15 : field_for_data = -1;
178 45 : for (i = 0; i < PQnfields(res); i++)
179 : {
180 45 : if (i != field_for_rows && i != field_for_columns)
181 : {
182 15 : field_for_data = i;
183 15 : break;
184 : }
185 : }
186 : Assert(field_for_data >= 0);
187 : }
188 : else
189 : {
190 39 : field_for_data = indexOfColumn(pset.ctv_args[2], res);
191 39 : if (field_for_data < 0)
192 9 : goto error_return;
193 : }
194 :
195 : /* Process fourth optional arg (horizontal header sort column) */
196 45 : if (pset.ctv_args[3] == NULL)
197 30 : sort_field_for_columns = -1; /* no sort column */
198 : else
199 : {
200 15 : sort_field_for_columns = indexOfColumn(pset.ctv_args[3], res);
201 15 : if (sort_field_for_columns < 0)
202 0 : goto error_return;
203 : }
204 :
205 : /*
206 : * First part: accumulate the names that go into the vertical and
207 : * horizontal headers, each into an AVL binary tree to build the set of
208 : * DISTINCT values.
209 : */
210 :
211 5061 : for (rn = 0; rn < PQntuples(res); rn++)
212 : {
213 : char *val;
214 : char *val1;
215 :
216 : /* horizontal */
217 5019 : val = PQgetisnull(res, rn, field_for_columns) ? NULL :
218 4998 : PQgetvalue(res, rn, field_for_columns);
219 5019 : val1 = NULL;
220 :
221 5094 : if (sort_field_for_columns >= 0 &&
222 75 : !PQgetisnull(res, rn, sort_field_for_columns))
223 75 : val1 = PQgetvalue(res, rn, sort_field_for_columns);
224 :
225 5019 : avlMergeValue(&piv_columns, val, val1);
226 :
227 5019 : if (piv_columns.count > CROSSTABVIEW_MAX_COLUMNS)
228 : {
229 3 : pg_log_error("\\crosstabview: maximum number of columns (%d) exceeded",
230 : CROSSTABVIEW_MAX_COLUMNS);
231 3 : goto error_return;
232 : }
233 :
234 : /* vertical */
235 5016 : val = PQgetisnull(res, rn, field_for_rows) ? NULL :
236 5010 : PQgetvalue(res, rn, field_for_rows);
237 :
238 5016 : avlMergeValue(&piv_rows, val, NULL);
239 : }
240 :
241 : /*
242 : * Second part: Generate sorted arrays from the AVL trees.
243 : */
244 :
245 42 : num_columns = piv_columns.count;
246 42 : num_rows = piv_rows.count;
247 :
248 42 : array_columns = pg_malloc_array(pivot_field, num_columns);
249 :
250 42 : array_rows = pg_malloc_array(pivot_field, num_rows);
251 :
252 42 : avlCollectFields(&piv_columns, piv_columns.root, array_columns, 0);
253 42 : avlCollectFields(&piv_rows, piv_rows.root, array_rows, 0);
254 :
255 : /*
256 : * Third part: optionally, process the ranking data for the horizontal
257 : * header
258 : */
259 42 : if (sort_field_for_columns >= 0)
260 15 : rankSort(num_columns, array_columns);
261 :
262 : /*
263 : * Fourth part: print the crosstab'ed result.
264 : */
265 42 : retval = printCrosstab(res,
266 : num_columns, array_columns, field_for_columns,
267 : num_rows, array_rows, field_for_rows,
268 : field_for_data);
269 :
270 66 : error_return:
271 66 : avlFree(&piv_columns, piv_columns.root);
272 66 : avlFree(&piv_rows, piv_rows.root);
273 66 : pg_free(array_columns);
274 66 : pg_free(array_rows);
275 :
276 66 : return retval;
277 : }
278 :
279 : /*
280 : * Output the pivoted resultset with the printTable* functions. Return true
281 : * if successful, false otherwise.
282 : */
283 : static bool
284 42 : printCrosstab(const PGresult *result,
285 : int num_columns, pivot_field *piv_columns, int field_for_columns,
286 : int num_rows, pivot_field *piv_rows, int field_for_rows,
287 : int field_for_data)
288 : {
289 42 : printQueryOpt popt = pset.popt;
290 : printTableContent cont;
291 : int i,
292 : rn;
293 : char col_align;
294 : int *horiz_map;
295 42 : bool retval = false;
296 :
297 42 : printTableInit(&cont, &popt.topt, popt.title, num_columns + 1, num_rows);
298 :
299 : /* Step 1: set target column names (horizontal header) */
300 :
301 : /* The name of the first column is kept unchanged by the pivoting */
302 42 : printTableAddHeader(&cont,
303 : PQfname(result, field_for_rows),
304 : false,
305 42 : column_type_alignment(PQftype(result,
306 : field_for_rows)));
307 :
308 : /*
309 : * To iterate over piv_columns[] by piv_columns[].rank, create a reverse
310 : * map associating each piv_columns[].rank to its index in piv_columns.
311 : * This avoids an O(N^2) loop later.
312 : */
313 42 : horiz_map = pg_malloc_array(int, num_columns);
314 231 : for (i = 0; i < num_columns; i++)
315 189 : horiz_map[piv_columns[i].rank] = i;
316 :
317 : /*
318 : * The display alignment depends on its PQftype().
319 : */
320 42 : col_align = column_type_alignment(PQftype(result, field_for_data));
321 :
322 231 : for (i = 0; i < num_columns; i++)
323 : {
324 : char *colname;
325 :
326 378 : colname = piv_columns[horiz_map[i]].name ?
327 210 : piv_columns[horiz_map[i]].name :
328 21 : (popt.nullPrint ? popt.nullPrint : "");
329 :
330 189 : printTableAddHeader(&cont, colname, false, col_align);
331 : }
332 42 : pg_free(horiz_map);
333 :
334 : /* Step 2: set row names in the first output column (vertical header) */
335 153 : for (i = 0; i < num_rows; i++)
336 : {
337 111 : int k = piv_rows[i].rank;
338 :
339 111 : cont.cells[k * (num_columns + 1)] = piv_rows[i].name ?
340 117 : piv_rows[i].name :
341 6 : (popt.nullPrint ? popt.nullPrint : "");
342 : }
343 42 : cont.cellsadded = num_rows * (num_columns + 1);
344 :
345 : /*
346 : * Step 3: fill in the content cells.
347 : */
348 255 : for (rn = 0; rn < PQntuples(result); rn++)
349 : {
350 : int row_number;
351 : int col_number;
352 : pivot_field *rp,
353 : *cp;
354 : pivot_field elt;
355 :
356 : /* Find target row */
357 216 : if (!PQgetisnull(result, rn, field_for_rows))
358 210 : elt.name = PQgetvalue(result, rn, field_for_rows);
359 : else
360 6 : elt.name = NULL;
361 216 : rp = (pivot_field *) bsearch(&elt,
362 : piv_rows,
363 : num_rows,
364 : sizeof(pivot_field),
365 : pivotFieldCompare);
366 : Assert(rp != NULL);
367 216 : row_number = rp->rank;
368 :
369 : /* Find target column */
370 216 : if (!PQgetisnull(result, rn, field_for_columns))
371 195 : elt.name = PQgetvalue(result, rn, field_for_columns);
372 : else
373 21 : elt.name = NULL;
374 :
375 216 : cp = (pivot_field *) bsearch(&elt,
376 : piv_columns,
377 : num_columns,
378 : sizeof(pivot_field),
379 : pivotFieldCompare);
380 : Assert(cp != NULL);
381 216 : col_number = cp->rank;
382 :
383 : /* Place value into cell */
384 216 : if (col_number >= 0 && row_number >= 0)
385 : {
386 : int idx;
387 :
388 : /* index into the cont.cells array */
389 216 : idx = 1 + col_number + row_number * (num_columns + 1);
390 :
391 : /*
392 : * If the cell already contains a value, raise an error.
393 : */
394 216 : if (cont.cells[idx] != NULL)
395 : {
396 3 : pg_log_error("\\crosstabview: query result contains multiple data values for row \"%s\", column \"%s\"",
397 : rp->name ? rp->name :
398 : (popt.nullPrint ? popt.nullPrint : "(null)"),
399 : cp->name ? cp->name :
400 : (popt.nullPrint ? popt.nullPrint : "(null)"));
401 3 : goto error;
402 : }
403 :
404 426 : cont.cells[idx] = !PQgetisnull(result, rn, field_for_data) ?
405 219 : PQgetvalue(result, rn, field_for_data) :
406 6 : (popt.nullPrint ? popt.nullPrint : "");
407 : }
408 : }
409 :
410 : /*
411 : * The non-initialized cells must be set to an empty string for the print
412 : * functions
413 : */
414 576 : for (i = 0; i < cont.cellsadded; i++)
415 : {
416 537 : if (cont.cells[i] == NULL)
417 246 : cont.cells[i] = "";
418 : }
419 :
420 39 : printTable(&cont, pset.queryFout, false, pset.logfile);
421 39 : retval = true;
422 :
423 42 : error:
424 42 : printTableCleanup(&cont);
425 :
426 42 : return retval;
427 : }
428 :
429 : /*
430 : * The avl* functions below provide a minimalistic implementation of AVL binary
431 : * trees, to efficiently collect the distinct values that will form the horizontal
432 : * and vertical headers. It only supports adding new values, no removal or even
433 : * search.
434 : */
435 : static void
436 132 : avlInit(avl_tree *tree)
437 : {
438 132 : tree->end = pg_malloc0_object(avl_node);
439 132 : tree->end->children[0] = tree->end->children[1] = tree->end;
440 132 : tree->count = 0;
441 132 : tree->root = tree->end;
442 132 : }
443 :
444 : /* Deallocate recursively an AVL tree, starting from node */
445 : static void
446 9945 : avlFree(avl_tree *tree, avl_node *node)
447 : {
448 9945 : if (node->children[0] != tree->end)
449 : {
450 5028 : avlFree(tree, node->children[0]);
451 5028 : pg_free(node->children[0]);
452 : }
453 9945 : if (node->children[1] != tree->end)
454 : {
455 4785 : avlFree(tree, node->children[1]);
456 4785 : pg_free(node->children[1]);
457 : }
458 9945 : if (node == tree->root)
459 : {
460 : /* free the root separately as it's not child of anything */
461 132 : if (node != tree->end)
462 90 : pg_free(node);
463 : /* free the tree->end struct only once and when all else is freed */
464 132 : pg_free(tree->end);
465 : }
466 9945 : }
467 :
468 : /* Set the height to 1 plus the greatest of left and right heights */
469 : static void
470 112743 : avlUpdateHeight(avl_node *n)
471 : {
472 112743 : n->height = 1 + (n->children[0]->height > n->children[1]->height ?
473 112743 : n->children[0]->height :
474 : n->children[1]->height);
475 112743 : }
476 :
477 : /* Rotate a subtree left (dir=0) or right (dir=1). Not recursive */
478 : static avl_node *
479 12105 : avlRotate(avl_node **current, int dir)
480 : {
481 12105 : avl_node *before = *current;
482 12105 : avl_node *after = (*current)->children[dir];
483 :
484 12105 : *current = after;
485 12105 : before->children[dir] = after->children[!dir];
486 12105 : avlUpdateHeight(before);
487 12105 : after->children[!dir] = before;
488 :
489 12105 : return after;
490 : }
491 :
492 : static int
493 109551 : avlBalance(avl_node *n)
494 : {
495 109551 : return n->children[0]->height - n->children[1]->height;
496 : }
497 :
498 : /*
499 : * After an insertion, possibly rebalance the tree so that the left and right
500 : * node heights don't differ by more than 1.
501 : * May update *node.
502 : */
503 : static void
504 100638 : avlAdjustBalance(avl_tree *tree, avl_node **node)
505 : {
506 100638 : avl_node *current = *node;
507 100638 : int b = avlBalance(current) / 2;
508 :
509 100638 : if (b != 0)
510 : {
511 8913 : int dir = (1 - b) / 2;
512 :
513 8913 : if (avlBalance(current->children[dir]) == -b)
514 3192 : avlRotate(¤t->children[dir], !dir);
515 8913 : current = avlRotate(node, dir);
516 : }
517 100638 : if (current != tree->end)
518 100638 : avlUpdateHeight(current);
519 100638 : }
520 :
521 : /*
522 : * Insert a new value/field, starting from *node, reaching the correct position
523 : * in the tree by recursion. Possibly rebalance the tree and possibly update
524 : * *node. Do nothing if the value is already present in the tree.
525 : */
526 : static void
527 110673 : avlInsertNode(avl_tree *tree, avl_node **node, pivot_field field)
528 : {
529 110673 : avl_node *current = *node;
530 :
531 110673 : if (current == tree->end)
532 : {
533 9903 : avl_node *new_node = pg_malloc_object(avl_node);
534 :
535 9903 : new_node->height = 1;
536 9903 : new_node->field = field;
537 9903 : new_node->children[0] = new_node->children[1] = tree->end;
538 9903 : tree->count++;
539 9903 : *node = new_node;
540 : }
541 : else
542 : {
543 100770 : int cmp = pivotFieldCompare(&field, ¤t->field);
544 :
545 100770 : if (cmp != 0)
546 : {
547 100638 : avlInsertNode(tree,
548 : cmp > 0 ? ¤t->children[1] : ¤t->children[0],
549 : field);
550 100638 : avlAdjustBalance(tree, node);
551 : }
552 : }
553 110673 : }
554 :
555 : /* Insert the value into the AVL tree, if it does not preexist */
556 : static void
557 10035 : avlMergeValue(avl_tree *tree, char *name, char *sort_value)
558 : {
559 : pivot_field field;
560 :
561 10035 : field.name = name;
562 10035 : field.rank = tree->count;
563 10035 : field.sort_value = sort_value;
564 10035 : avlInsertNode(tree, &tree->root, field);
565 10035 : }
566 :
567 : /*
568 : * Recursively extract node values into the names array, in sorted order with a
569 : * left-to-right tree traversal.
570 : * Return the next candidate offset to write into the names array.
571 : * fields[] must be preallocated to hold tree->count entries
572 : */
573 : static int
574 684 : avlCollectFields(avl_tree *tree, avl_node *node, pivot_field *fields, int idx)
575 : {
576 684 : if (node == tree->end)
577 384 : return idx;
578 :
579 300 : idx = avlCollectFields(tree, node->children[0], fields, idx);
580 300 : fields[idx] = node->field;
581 300 : return avlCollectFields(tree, node->children[1], fields, idx + 1);
582 : }
583 :
584 : static void
585 15 : rankSort(int num_columns, pivot_field *piv_columns)
586 : {
587 : int *hmap; /* [[offset in piv_columns, rank], ...for
588 : * every header entry] */
589 : int i;
590 :
591 15 : hmap = pg_malloc_array(int, num_columns * 2);
592 78 : for (i = 0; i < num_columns; i++)
593 : {
594 63 : char *val = piv_columns[i].sort_value;
595 :
596 : /* ranking information is valid if non null and matches /^-?\d+$/ */
597 63 : if (val &&
598 63 : ((*val == '-' &&
599 0 : strspn(val + 1, "0123456789") == strlen(val + 1)) ||
600 63 : strspn(val, "0123456789") == strlen(val)))
601 : {
602 63 : hmap[i * 2] = atoi(val);
603 63 : hmap[i * 2 + 1] = i;
604 : }
605 : else
606 : {
607 : /* invalid rank information ignored (equivalent to rank 0) */
608 0 : hmap[i * 2] = 0;
609 0 : hmap[i * 2 + 1] = i;
610 : }
611 : }
612 :
613 15 : qsort(hmap, num_columns, sizeof(int) * 2, rankCompare);
614 :
615 78 : for (i = 0; i < num_columns; i++)
616 : {
617 63 : piv_columns[hmap[i * 2 + 1]].rank = i;
618 : }
619 :
620 15 : pg_free(hmap);
621 15 : }
622 :
623 : /*
624 : * Look up a column reference, which can be either:
625 : * - a number from 1 to PQnfields(res)
626 : * - a column name matching one of PQfname(res,...)
627 : *
628 : * Returns zero-based column number, or -1 if not found or ambiguous.
629 : *
630 : * Note: may modify contents of "arg" string.
631 : */
632 : static int
633 144 : indexOfColumn(char *arg, const PGresult *res)
634 : {
635 : int idx;
636 :
637 144 : if (arg[0] && strspn(arg, "0123456789") == strlen(arg))
638 : {
639 : /* if arg contains only digits, it's a column number */
640 48 : idx = atoi(arg) - 1;
641 48 : if (idx < 0 || idx >= PQnfields(res))
642 : {
643 3 : pg_log_error("\\crosstabview: column number %d is out of range 1..%d",
644 : idx + 1, PQnfields(res));
645 3 : return -1;
646 : }
647 : }
648 : else
649 : {
650 : int i;
651 :
652 : /*
653 : * Dequote and downcase the column name. By checking for all-digits
654 : * before doing this, we can ensure that a quoted name is treated as a
655 : * name even if it's all digits.
656 : */
657 96 : dequote_downcase_identifier(arg, true, pset.encoding);
658 :
659 : /* Now look for match(es) among res' column names */
660 96 : idx = -1;
661 456 : for (i = 0; i < PQnfields(res); i++)
662 : {
663 360 : if (strcmp(arg, PQfname(res, i)) == 0)
664 : {
665 87 : if (idx >= 0)
666 : {
667 : /* another idx was already found for the same name */
668 0 : pg_log_error("\\crosstabview: ambiguous column name: \"%s\"", arg);
669 0 : return -1;
670 : }
671 87 : idx = i;
672 : }
673 : }
674 96 : if (idx == -1)
675 : {
676 9 : pg_log_error("\\crosstabview: column name not found: \"%s\"", arg);
677 9 : return -1;
678 : }
679 : }
680 :
681 132 : return idx;
682 : }
683 :
684 : /*
685 : * Value comparator for vertical and horizontal headers
686 : * used for deduplication only.
687 : * - null values are considered equal
688 : * - non-null < null
689 : * - non-null values are compared with strcmp()
690 : */
691 : static int
692 101568 : pivotFieldCompare(const void *a, const void *b)
693 : {
694 101568 : const pivot_field *pa = (const pivot_field *) a;
695 101568 : const pivot_field *pb = (const pivot_field *) b;
696 :
697 : /* test null values */
698 101568 : if (!pb->name)
699 66 : return pa->name ? -1 : 0;
700 101502 : else if (!pa->name)
701 51 : return 1;
702 :
703 : /* non-null values */
704 101451 : return strcmp(pa->name, pb->name);
705 : }
706 :
707 : static int
708 72 : rankCompare(const void *a, const void *b)
709 : {
710 72 : return pg_cmp_s32(*(const int *) a, *(const int *) b);
711 : }
|