Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsquery_util.c
4 : * Utilities for tsquery datatype
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsquery_util.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "miscadmin.h"
18 : #include "tsearch/ts_utils.h"
19 : #include "varatt.h"
20 :
21 : /*
22 : * Build QTNode tree for a tsquery given in QueryItem array format.
23 : */
24 : QTNode *
25 8850 : QT2QTN(QueryItem *in, char *operand)
26 : {
27 8850 : QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
28 :
29 : /* since this function recurses, it could be driven to stack overflow. */
30 8850 : check_stack_depth();
31 :
32 8850 : node->valnode = in;
33 :
34 8850 : if (in->type == QI_OPR)
35 : {
36 3338 : node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
37 3338 : node->child[0] = QT2QTN(in + 1, operand);
38 3338 : node->sign = node->child[0]->sign;
39 3338 : if (in->qoperator.oper == OP_NOT)
40 42 : node->nchild = 1;
41 : else
42 : {
43 3296 : node->nchild = 2;
44 3296 : node->child[1] = QT2QTN(in + in->qoperator.left, operand);
45 3296 : node->sign |= node->child[1]->sign;
46 : }
47 : }
48 5512 : else if (operand)
49 : {
50 5512 : node->word = operand + in->qoperand.distance;
51 5512 : node->sign = ((uint32) 1) << (((unsigned int) in->qoperand.valcrc) % 32);
52 : }
53 :
54 8850 : return node;
55 : }
56 :
57 : /*
58 : * Free a QTNode tree.
59 : *
60 : * Referenced "word" and "valnode" items are freed if marked as transient
61 : * by flags.
62 : */
63 : void
64 9780 : QTNFree(QTNode *in)
65 : {
66 9780 : if (!in)
67 12 : return;
68 :
69 : /* since this function recurses, it could be driven to stack overflow. */
70 9768 : check_stack_depth();
71 :
72 9768 : if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
73 612 : pfree(in->word);
74 :
75 9768 : if (in->valnode->type == QI_OPR)
76 : {
77 : int i;
78 :
79 10998 : for (i = 0; i < in->nchild; i++)
80 7354 : QTNFree(in->child[i]);
81 : }
82 9768 : if (in->child)
83 3644 : pfree(in->child);
84 :
85 9768 : if (in->flags & QTN_NEEDFREE)
86 1152 : pfree(in->valnode);
87 :
88 9768 : pfree(in);
89 : }
90 :
91 : /*
92 : * Sort comparator for QTNodes.
93 : *
94 : * The sort order is somewhat arbitrary.
95 : */
96 : int
97 3984 : QTNodeCompare(QTNode *an, QTNode *bn)
98 : {
99 : /* since this function recurses, it could be driven to stack overflow. */
100 3984 : check_stack_depth();
101 :
102 3984 : if (an->valnode->type != bn->valnode->type)
103 1182 : return (an->valnode->type > bn->valnode->type) ? -1 : 1;
104 :
105 2802 : if (an->valnode->type == QI_OPR)
106 : {
107 544 : QueryOperator *ao = &an->valnode->qoperator;
108 544 : QueryOperator *bo = &bn->valnode->qoperator;
109 :
110 544 : if (ao->oper != bo->oper)
111 48 : return (ao->oper > bo->oper) ? -1 : 1;
112 :
113 496 : if (an->nchild != bn->nchild)
114 108 : return (an->nchild > bn->nchild) ? -1 : 1;
115 :
116 : {
117 : int i,
118 : res;
119 :
120 648 : for (i = 0; i < an->nchild; i++)
121 518 : if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
122 258 : return res;
123 : }
124 :
125 130 : if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
126 6 : return (ao->distance > bo->distance) ? -1 : 1;
127 :
128 124 : return 0;
129 : }
130 2258 : else if (an->valnode->type == QI_VAL)
131 : {
132 2258 : QueryOperand *ao = &an->valnode->qoperand;
133 2258 : QueryOperand *bo = &bn->valnode->qoperand;
134 :
135 2258 : if (ao->valcrc != bo->valcrc)
136 : {
137 1752 : return (ao->valcrc > bo->valcrc) ? -1 : 1;
138 : }
139 :
140 506 : return tsCompareString(an->word, ao->length, bn->word, bo->length, false);
141 : }
142 : else
143 : {
144 0 : elog(ERROR, "unrecognized QueryItem type: %d", an->valnode->type);
145 : return 0; /* keep compiler quiet */
146 : }
147 : }
148 :
149 : /*
150 : * qsort comparator for QTNode pointers.
151 : */
152 : static int
153 3036 : cmpQTN(const void *a, const void *b)
154 : {
155 3036 : return QTNodeCompare(*(QTNode *const *) a, *(QTNode *const *) b);
156 : }
157 :
158 : /*
159 : * Canonicalize a QTNode tree by sorting the children of AND/OR nodes
160 : * into an arbitrary but well-defined order.
161 : */
162 : void
163 9084 : QTNSort(QTNode *in)
164 : {
165 : int i;
166 :
167 : /* since this function recurses, it could be driven to stack overflow. */
168 9084 : check_stack_depth();
169 :
170 9084 : if (in->valnode->type != QI_OPR)
171 5916 : return;
172 :
173 10398 : for (i = 0; i < in->nchild; i++)
174 7230 : QTNSort(in->child[i]);
175 3168 : if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
176 1848 : qsort(in->child, in->nchild, sizeof(QTNode *), cmpQTN);
177 : }
178 :
179 : /*
180 : * Are two QTNode trees equal according to QTNodeCompare?
181 : */
182 : bool
183 198 : QTNEq(QTNode *a, QTNode *b)
184 : {
185 198 : uint32 sign = a->sign & b->sign;
186 :
187 198 : if (!(sign == a->sign && sign == b->sign))
188 6 : return false;
189 :
190 192 : return (QTNodeCompare(a, b) == 0);
191 : }
192 :
193 : /*
194 : * Remove unnecessary intermediate nodes. For example:
195 : *
196 : * OR OR
197 : * a OR -> a b c
198 : * b c
199 : */
200 : void
201 8748 : QTNTernary(QTNode *in)
202 : {
203 : int i;
204 :
205 : /* since this function recurses, it could be driven to stack overflow. */
206 8748 : check_stack_depth();
207 :
208 8748 : if (in->valnode->type != QI_OPR)
209 5538 : return;
210 :
211 10146 : for (i = 0; i < in->nchild; i++)
212 6936 : QTNTernary(in->child[i]);
213 :
214 : /* Only AND and OR are associative, so don't flatten other node types */
215 3210 : if (in->valnode->qoperator.oper != OP_AND &&
216 2016 : in->valnode->qoperator.oper != OP_OR)
217 1248 : return;
218 :
219 6426 : for (i = 0; i < in->nchild; i++)
220 : {
221 4464 : QTNode *cc = in->child[i];
222 :
223 4464 : if (cc->valnode->type == QI_OPR &&
224 1554 : in->valnode->qoperator.oper == cc->valnode->qoperator.oper)
225 : {
226 330 : int oldnchild = in->nchild;
227 :
228 330 : in->nchild += cc->nchild - 1;
229 330 : in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
230 :
231 330 : if (i + 1 != oldnchild)
232 36 : memmove(in->child + i + cc->nchild, in->child + i + 1,
233 36 : (oldnchild - i - 1) * sizeof(QTNode *));
234 :
235 330 : memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
236 330 : i += cc->nchild - 1;
237 :
238 330 : if (cc->flags & QTN_NEEDFREE)
239 108 : pfree(cc->valnode);
240 330 : pfree(cc);
241 : }
242 : }
243 : }
244 :
245 : /*
246 : * Convert a tree to binary tree by inserting intermediate nodes.
247 : * (Opposite of QTNTernary)
248 : */
249 : void
250 1182 : QTNBinary(QTNode *in)
251 : {
252 : int i;
253 :
254 : /* since this function recurses, it could be driven to stack overflow. */
255 1182 : check_stack_depth();
256 :
257 1182 : if (in->valnode->type != QI_OPR)
258 726 : return;
259 :
260 1464 : for (i = 0; i < in->nchild; i++)
261 1008 : QTNBinary(in->child[i]);
262 :
263 576 : while (in->nchild > 2)
264 : {
265 120 : QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
266 :
267 120 : nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
268 120 : nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
269 :
270 120 : nn->nchild = 2;
271 120 : nn->flags = QTN_NEEDFREE;
272 :
273 120 : nn->child[0] = in->child[0];
274 120 : nn->child[1] = in->child[1];
275 120 : nn->sign = nn->child[0]->sign | nn->child[1]->sign;
276 :
277 120 : nn->valnode->type = in->valnode->type;
278 120 : nn->valnode->qoperator.oper = in->valnode->qoperator.oper;
279 :
280 120 : in->child[0] = nn;
281 120 : in->child[1] = in->child[in->nchild - 1];
282 120 : in->nchild--;
283 : }
284 : }
285 :
286 : /*
287 : * Count the total length of operand strings in tree (including '\0'-
288 : * terminators) and the total number of nodes.
289 : * Caller must initialize *sumlen and *nnode to zeroes.
290 : */
291 : static void
292 1986 : cntsize(QTNode *in, int *sumlen, int *nnode)
293 : {
294 : /* since this function recurses, it could be driven to stack overflow. */
295 1986 : check_stack_depth();
296 :
297 1986 : *nnode += 1;
298 1986 : if (in->valnode->type == QI_OPR)
299 : {
300 : int i;
301 :
302 2562 : for (i = 0; i < in->nchild; i++)
303 1692 : cntsize(in->child[i], sumlen, nnode);
304 : }
305 : else
306 : {
307 1116 : *sumlen += in->valnode->qoperand.length + 1;
308 : }
309 1986 : }
310 :
311 : typedef struct
312 : {
313 : QueryItem *curitem;
314 : char *operand;
315 : char *curoperand;
316 : } QTN2QTState;
317 :
318 : /*
319 : * Recursively convert a QTNode tree into flat tsquery format.
320 : * Caller must have allocated arrays of the correct size.
321 : */
322 : static void
323 1986 : fillQT(QTN2QTState *state, QTNode *in)
324 : {
325 : /* since this function recurses, it could be driven to stack overflow. */
326 1986 : check_stack_depth();
327 :
328 1986 : if (in->valnode->type == QI_VAL)
329 : {
330 1116 : memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
331 :
332 1116 : memcpy(state->curoperand, in->word, in->valnode->qoperand.length);
333 1116 : state->curitem->qoperand.distance = state->curoperand - state->operand;
334 1116 : state->curoperand[in->valnode->qoperand.length] = '\0';
335 1116 : state->curoperand += in->valnode->qoperand.length + 1;
336 1116 : state->curitem++;
337 : }
338 : else
339 : {
340 870 : QueryItem *curitem = state->curitem;
341 :
342 : Assert(in->valnode->type == QI_OPR);
343 :
344 870 : memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
345 :
346 : Assert(in->nchild <= 2);
347 870 : state->curitem++;
348 :
349 870 : fillQT(state, in->child[0]);
350 :
351 870 : if (in->nchild == 2)
352 : {
353 822 : curitem->qoperator.left = state->curitem - curitem;
354 822 : fillQT(state, in->child[1]);
355 : }
356 : }
357 1986 : }
358 :
359 : /*
360 : * Build flat tsquery from a QTNode tree.
361 : */
362 : TSQuery
363 294 : QTN2QT(QTNode *in)
364 : {
365 : TSQuery out;
366 : int len;
367 294 : int sumlen = 0,
368 294 : nnode = 0;
369 : QTN2QTState state;
370 :
371 294 : cntsize(in, &sumlen, &nnode);
372 :
373 294 : if (TSQUERY_TOO_BIG(nnode, sumlen))
374 0 : ereport(ERROR,
375 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
376 : errmsg("tsquery is too large")));
377 294 : len = COMPUTESIZE(nnode, sumlen);
378 :
379 294 : out = (TSQuery) palloc0(len);
380 294 : SET_VARSIZE(out, len);
381 294 : out->size = nnode;
382 :
383 294 : state.curitem = GETQUERY(out);
384 294 : state.operand = state.curoperand = GETOPERAND(out);
385 :
386 294 : fillQT(&state, in);
387 294 : return out;
388 : }
389 :
390 : /*
391 : * Copy a QTNode tree.
392 : *
393 : * Modifiable copies of the words and valnodes are made, too.
394 : */
395 : QTNode *
396 1020 : QTNCopy(QTNode *in)
397 : {
398 : QTNode *out;
399 :
400 : /* since this function recurses, it could be driven to stack overflow. */
401 1020 : check_stack_depth();
402 :
403 1020 : out = (QTNode *) palloc(sizeof(QTNode));
404 :
405 1020 : *out = *in;
406 1020 : out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
407 1020 : *(out->valnode) = *(in->valnode);
408 1020 : out->flags |= QTN_NEEDFREE;
409 :
410 1020 : if (in->valnode->type == QI_VAL)
411 : {
412 612 : out->word = palloc(in->valnode->qoperand.length + 1);
413 612 : memcpy(out->word, in->word, in->valnode->qoperand.length);
414 612 : out->word[in->valnode->qoperand.length] = '\0';
415 612 : out->flags |= QTN_WORDFREE;
416 : }
417 : else
418 : {
419 : int i;
420 :
421 408 : out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild);
422 :
423 1218 : for (i = 0; i < in->nchild; i++)
424 810 : out->child[i] = QTNCopy(in->child[i]);
425 : }
426 :
427 1020 : return out;
428 : }
429 :
430 : /*
431 : * Clear the specified flag bit(s) in all nodes of a QTNode tree.
432 : */
433 : void
434 5244 : QTNClearFlags(QTNode *in, uint32 flags)
435 : {
436 : /* since this function recurses, it could be driven to stack overflow. */
437 5244 : check_stack_depth();
438 :
439 5244 : in->flags &= ~flags;
440 :
441 5244 : if (in->valnode->type != QI_VAL)
442 : {
443 : int i;
444 :
445 6408 : for (i = 0; i < in->nchild; i++)
446 4452 : QTNClearFlags(in->child[i], flags);
447 : }
448 5244 : }
|