Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execPartition.c
4 : * Support routines for partitioning.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/executor/execPartition.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/table.h"
17 : #include "access/tableam.h"
18 : #include "catalog/partition.h"
19 : #include "executor/execPartition.h"
20 : #include "executor/executor.h"
21 : #include "executor/nodeModifyTable.h"
22 : #include "foreign/fdwapi.h"
23 : #include "mb/pg_wchar.h"
24 : #include "miscadmin.h"
25 : #include "partitioning/partbounds.h"
26 : #include "partitioning/partdesc.h"
27 : #include "partitioning/partprune.h"
28 : #include "rewrite/rewriteManip.h"
29 : #include "utils/acl.h"
30 : #include "utils/lsyscache.h"
31 : #include "utils/partcache.h"
32 : #include "utils/rls.h"
33 : #include "utils/ruleutils.h"
34 :
35 :
36 : /*-----------------------
37 : * PartitionTupleRouting - Encapsulates all information required to
38 : * route a tuple inserted into a partitioned table to one of its leaf
39 : * partitions.
40 : *
41 : * partition_root
42 : * The partitioned table that's the target of the command.
43 : *
44 : * partition_dispatch_info
45 : * Array of 'max_dispatch' elements containing a pointer to a
46 : * PartitionDispatch object for every partitioned table touched by tuple
47 : * routing. The entry for the target partitioned table is *always*
48 : * present in the 0th element of this array. See comment for
49 : * PartitionDispatchData->indexes for details on how this array is
50 : * indexed.
51 : *
52 : * nonleaf_partitions
53 : * Array of 'max_dispatch' elements containing pointers to fake
54 : * ResultRelInfo objects for nonleaf partitions, useful for checking
55 : * the partition constraint.
56 : *
57 : * num_dispatch
58 : * The current number of items stored in the 'partition_dispatch_info'
59 : * array. Also serves as the index of the next free array element for
60 : * new PartitionDispatch objects that need to be stored.
61 : *
62 : * max_dispatch
63 : * The current allocated size of the 'partition_dispatch_info' array.
64 : *
65 : * partitions
66 : * Array of 'max_partitions' elements containing a pointer to a
67 : * ResultRelInfo for every leaf partition touched by tuple routing.
68 : * Some of these are pointers to ResultRelInfos which are borrowed out of
69 : * the owning ModifyTableState node. The remainder have been built
70 : * especially for tuple routing. See comment for
71 : * PartitionDispatchData->indexes for details on how this array is
72 : * indexed.
73 : *
74 : * is_borrowed_rel
75 : * Array of 'max_partitions' booleans recording whether a given entry
76 : * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
77 : * ModifyTableState node, rather than being built here.
78 : *
79 : * num_partitions
80 : * The current number of items stored in the 'partitions' array. Also
81 : * serves as the index of the next free array element for new
82 : * ResultRelInfo objects that need to be stored.
83 : *
84 : * max_partitions
85 : * The current allocated size of the 'partitions' array.
86 : *
87 : * memcxt
88 : * Memory context used to allocate subsidiary structs.
89 : *-----------------------
90 : */
91 : struct PartitionTupleRouting
92 : {
93 : Relation partition_root;
94 : PartitionDispatch *partition_dispatch_info;
95 : ResultRelInfo **nonleaf_partitions;
96 : int num_dispatch;
97 : int max_dispatch;
98 : ResultRelInfo **partitions;
99 : bool *is_borrowed_rel;
100 : int num_partitions;
101 : int max_partitions;
102 : MemoryContext memcxt;
103 : };
104 :
105 : /*-----------------------
106 : * PartitionDispatch - information about one partitioned table in a partition
107 : * hierarchy required to route a tuple to any of its partitions. A
108 : * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
109 : * struct and stored inside its 'partition_dispatch_info' array.
110 : *
111 : * reldesc
112 : * Relation descriptor of the table
113 : *
114 : * key
115 : * Partition key information of the table
116 : *
117 : * keystate
118 : * Execution state required for expressions in the partition key
119 : *
120 : * partdesc
121 : * Partition descriptor of the table
122 : *
123 : * tupslot
124 : * A standalone TupleTableSlot initialized with this table's tuple
125 : * descriptor, or NULL if no tuple conversion between the parent is
126 : * required.
127 : *
128 : * tupmap
129 : * TupleConversionMap to convert from the parent's rowtype to this table's
130 : * rowtype (when extracting the partition key of a tuple just before
131 : * routing it through this table). A NULL value is stored if no tuple
132 : * conversion is required.
133 : *
134 : * indexes
135 : * Array of partdesc->nparts elements. For leaf partitions the index
136 : * corresponds to the partition's ResultRelInfo in the encapsulating
137 : * PartitionTupleRouting's partitions array. For partitioned partitions,
138 : * the index corresponds to the PartitionDispatch for it in its
139 : * partition_dispatch_info array. -1 indicates we've not yet allocated
140 : * anything in PartitionTupleRouting for the partition.
141 : *-----------------------
142 : */
143 : typedef struct PartitionDispatchData
144 : {
145 : Relation reldesc;
146 : PartitionKey key;
147 : List *keystate; /* list of ExprState */
148 : PartitionDesc partdesc;
149 : TupleTableSlot *tupslot;
150 : AttrMap *tupmap;
151 : int indexes[FLEXIBLE_ARRAY_MEMBER];
152 : } PartitionDispatchData;
153 :
154 :
155 : static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
156 : EState *estate, PartitionTupleRouting *proute,
157 : PartitionDispatch dispatch,
158 : ResultRelInfo *rootResultRelInfo,
159 : int partidx);
160 : static void ExecInitRoutingInfo(ModifyTableState *mtstate,
161 : EState *estate,
162 : PartitionTupleRouting *proute,
163 : PartitionDispatch dispatch,
164 : ResultRelInfo *partRelInfo,
165 : int partidx,
166 : bool is_borrowed_rel);
167 : static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate,
168 : PartitionTupleRouting *proute,
169 : Oid partoid, PartitionDispatch parent_pd,
170 : int partidx, ResultRelInfo *rootResultRelInfo);
171 : static void FormPartitionKeyDatum(PartitionDispatch pd,
172 : TupleTableSlot *slot,
173 : EState *estate,
174 : Datum *values,
175 : bool *isnull);
176 : static int get_partition_for_tuple(PartitionDispatch pd, const Datum *values,
177 : const bool *isnull);
178 : static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
179 : const Datum *values,
180 : const bool *isnull,
181 : int maxfieldlen);
182 : static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri);
183 : static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap);
184 : static PartitionPruneState *CreatePartitionPruneState(EState *estate,
185 : PartitionPruneInfo *pruneinfo,
186 : Bitmapset **all_leafpart_rtis);
187 : static void InitPartitionPruneContext(PartitionPruneContext *context,
188 : List *pruning_steps,
189 : PartitionDesc partdesc,
190 : PartitionKey partkey,
191 : PlanState *planstate,
192 : ExprContext *econtext);
193 : static void InitExecPartitionPruneContexts(PartitionPruneState *prunestate,
194 : PlanState *parent_plan,
195 : Bitmapset *initially_valid_subplans,
196 : int n_total_subplans);
197 : static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
198 : PartitionedRelPruningData *pprune,
199 : bool initial_prune,
200 : Bitmapset **validsubplans,
201 : Bitmapset **validsubplan_rtis);
202 :
203 :
204 : /*
205 : * ExecSetupPartitionTupleRouting - sets up information needed during
206 : * tuple routing for partitioned tables, encapsulates it in
207 : * PartitionTupleRouting, and returns it.
208 : *
209 : * Callers must use the returned PartitionTupleRouting during calls to
210 : * ExecFindPartition(). The actual ResultRelInfo for a partition is only
211 : * allocated when the partition is found for the first time.
212 : *
213 : * The current memory context is used to allocate this struct and all
214 : * subsidiary structs that will be allocated from it later on. Typically
215 : * it should be estate->es_query_cxt.
216 : */
217 : PartitionTupleRouting *
218 5160 : ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
219 : {
220 : PartitionTupleRouting *proute;
221 :
222 : /*
223 : * Here we attempt to expend as little effort as possible in setting up
224 : * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
225 : * demand, only when we actually need to route a tuple to that partition.
226 : * The reason for this is that a common case is for INSERT to insert a
227 : * single tuple into a partitioned table and this must be fast.
228 : */
229 5160 : proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
230 5160 : proute->partition_root = rel;
231 5160 : proute->memcxt = CurrentMemoryContext;
232 : /* Rest of members initialized by zeroing */
233 :
234 : /*
235 : * Initialize this table's PartitionDispatch object. Here we pass in the
236 : * parent as NULL as we don't need to care about any parent of the target
237 : * partitioned table.
238 : */
239 5160 : ExecInitPartitionDispatchInfo(estate, proute, RelationGetRelid(rel),
240 : NULL, 0, NULL);
241 :
242 5160 : return proute;
243 : }
244 :
245 : /*
246 : * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
247 : * the tuple contained in *slot should belong to.
248 : *
249 : * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
250 : * one up or reuse one from mtstate's resultRelInfo array. When reusing a
251 : * ResultRelInfo from the mtstate we verify that the relation is a valid
252 : * target for INSERTs and initialize tuple routing information.
253 : *
254 : * rootResultRelInfo is the relation named in the query.
255 : *
256 : * estate must be non-NULL; we'll need it to compute any expressions in the
257 : * partition keys. Also, its per-tuple contexts are used as evaluation
258 : * scratch space.
259 : *
260 : * If no leaf partition is found, this routine errors out with the appropriate
261 : * error message. An error may also be raised if the found target partition
262 : * is not a valid target for an INSERT.
263 : */
264 : ResultRelInfo *
265 1031672 : ExecFindPartition(ModifyTableState *mtstate,
266 : ResultRelInfo *rootResultRelInfo,
267 : PartitionTupleRouting *proute,
268 : TupleTableSlot *slot, EState *estate)
269 : {
270 1031672 : PartitionDispatch *pd = proute->partition_dispatch_info;
271 : Datum values[PARTITION_MAX_KEYS];
272 : bool isnull[PARTITION_MAX_KEYS];
273 : Relation rel;
274 : PartitionDispatch dispatch;
275 : PartitionDesc partdesc;
276 1031672 : ExprContext *ecxt = GetPerTupleExprContext(estate);
277 1031672 : TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
278 1031672 : TupleTableSlot *rootslot = slot;
279 1031672 : TupleTableSlot *myslot = NULL;
280 : MemoryContext oldcxt;
281 1031672 : ResultRelInfo *rri = NULL;
282 :
283 : /* use per-tuple context here to avoid leaking memory */
284 1031672 : oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
285 :
286 : /*
287 : * First check the root table's partition constraint, if any. No point in
288 : * routing the tuple if it doesn't belong in the root table itself.
289 : */
290 1031672 : if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
291 4496 : ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
292 :
293 : /* start with the root partitioned table */
294 1031640 : dispatch = pd[0];
295 2179584 : while (dispatch != NULL)
296 : {
297 1148136 : int partidx = -1;
298 : bool is_leaf;
299 :
300 1148136 : CHECK_FOR_INTERRUPTS();
301 :
302 1148136 : rel = dispatch->reldesc;
303 1148136 : partdesc = dispatch->partdesc;
304 :
305 : /*
306 : * Extract partition key from tuple. Expression evaluation machinery
307 : * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
308 : * point to the correct tuple slot. The slot might have changed from
309 : * what was used for the parent table if the table of the current
310 : * partitioning level has different tuple descriptor from the parent.
311 : * So update ecxt_scantuple accordingly.
312 : */
313 1148136 : ecxt->ecxt_scantuple = slot;
314 1148136 : FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
315 :
316 : /*
317 : * If this partitioned table has no partitions or no partition for
318 : * these values, error out.
319 : */
320 2296218 : if (partdesc->nparts == 0 ||
321 1148094 : (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
322 : {
323 : char *val_desc;
324 :
325 154 : val_desc = ExecBuildSlotPartitionKeyDescription(rel,
326 : values, isnull, 64);
327 : Assert(OidIsValid(RelationGetRelid(rel)));
328 154 : ereport(ERROR,
329 : (errcode(ERRCODE_CHECK_VIOLATION),
330 : errmsg("no partition of relation \"%s\" found for row",
331 : RelationGetRelationName(rel)),
332 : val_desc ?
333 : errdetail("Partition key of the failing row contains %s.",
334 : val_desc) : 0,
335 : errtable(rel)));
336 : }
337 :
338 1147970 : is_leaf = partdesc->is_leaf[partidx];
339 1147970 : if (is_leaf)
340 : {
341 : /*
342 : * We've reached the leaf -- hurray, we're done. Look to see if
343 : * we've already got a ResultRelInfo for this partition.
344 : */
345 1031472 : if (likely(dispatch->indexes[partidx] >= 0))
346 : {
347 : /* ResultRelInfo already built */
348 : Assert(dispatch->indexes[partidx] < proute->num_partitions);
349 1024440 : rri = proute->partitions[dispatch->indexes[partidx]];
350 : }
351 : else
352 : {
353 : /*
354 : * If the partition is known in the owning ModifyTableState
355 : * node, we can re-use that ResultRelInfo instead of creating
356 : * a new one with ExecInitPartitionInfo().
357 : */
358 7032 : rri = ExecLookupResultRelByOid(mtstate,
359 7032 : partdesc->oids[partidx],
360 : true, false);
361 7032 : if (rri)
362 : {
363 508 : ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
364 :
365 : /* Verify this ResultRelInfo allows INSERTs */
366 508 : CheckValidResultRel(rri, CMD_INSERT,
367 : node ? node->onConflictAction : ONCONFLICT_NONE,
368 : NIL);
369 :
370 : /*
371 : * Initialize information needed to insert this and
372 : * subsequent tuples routed to this partition.
373 : */
374 508 : ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
375 : rri, partidx, true);
376 : }
377 : else
378 : {
379 : /* We need to create a new one. */
380 6524 : rri = ExecInitPartitionInfo(mtstate, estate, proute,
381 : dispatch,
382 : rootResultRelInfo, partidx);
383 : }
384 : }
385 : Assert(rri != NULL);
386 :
387 : /* Signal to terminate the loop */
388 1031448 : dispatch = NULL;
389 : }
390 : else
391 : {
392 : /*
393 : * Partition is a sub-partitioned table; get the PartitionDispatch
394 : */
395 116498 : if (likely(dispatch->indexes[partidx] >= 0))
396 : {
397 : /* Already built. */
398 : Assert(dispatch->indexes[partidx] < proute->num_dispatch);
399 :
400 115310 : rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
401 :
402 : /*
403 : * Move down to the next partition level and search again
404 : * until we find a leaf partition that matches this tuple
405 : */
406 115310 : dispatch = pd[dispatch->indexes[partidx]];
407 : }
408 : else
409 : {
410 : /* Not yet built. Do that now. */
411 : PartitionDispatch subdispatch;
412 :
413 : /*
414 : * Create the new PartitionDispatch. We pass the current one
415 : * in as the parent PartitionDispatch
416 : */
417 1188 : subdispatch = ExecInitPartitionDispatchInfo(estate,
418 : proute,
419 1188 : partdesc->oids[partidx],
420 : dispatch, partidx,
421 : mtstate->rootResultRelInfo);
422 : Assert(dispatch->indexes[partidx] >= 0 &&
423 : dispatch->indexes[partidx] < proute->num_dispatch);
424 :
425 1188 : rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
426 1188 : dispatch = subdispatch;
427 : }
428 :
429 : /*
430 : * Convert the tuple to the new parent's layout, if different from
431 : * the previous parent.
432 : */
433 116498 : if (dispatch->tupslot)
434 : {
435 61716 : AttrMap *map = dispatch->tupmap;
436 61716 : TupleTableSlot *tempslot = myslot;
437 :
438 61716 : myslot = dispatch->tupslot;
439 61716 : slot = execute_attr_map_slot(map, slot, myslot);
440 :
441 61716 : if (tempslot != NULL)
442 294 : ExecClearTuple(tempslot);
443 : }
444 : }
445 :
446 : /*
447 : * If this partition is the default one, we must check its partition
448 : * constraint now, which may have changed concurrently due to
449 : * partitions being added to the parent.
450 : *
451 : * (We do this here, and do not rely on ExecInsert doing it, because
452 : * we don't want to miss doing it for non-leaf partitions.)
453 : */
454 1147946 : if (partidx == partdesc->boundinfo->default_index)
455 : {
456 : /*
457 : * The tuple must match the partition's layout for the constraint
458 : * expression to be evaluated successfully. If the partition is
459 : * sub-partitioned, that would already be the case due to the code
460 : * above, but for a leaf partition the tuple still matches the
461 : * parent's layout.
462 : *
463 : * Note that we have a map to convert from root to current
464 : * partition, but not from immediate parent to current partition.
465 : * So if we have to convert, do it from the root slot; if not, use
466 : * the root slot as-is.
467 : */
468 596 : if (is_leaf)
469 : {
470 552 : TupleConversionMap *map = ExecGetRootToChildMap(rri, estate);
471 :
472 552 : if (map)
473 162 : slot = execute_attr_map_slot(map->attrMap, rootslot,
474 : rri->ri_PartitionTupleSlot);
475 : else
476 390 : slot = rootslot;
477 : }
478 :
479 596 : ExecPartitionCheck(rri, slot, estate, true);
480 : }
481 : }
482 :
483 : /* Release the tuple in the lowest parent's dedicated slot. */
484 1031448 : if (myslot != NULL)
485 61384 : ExecClearTuple(myslot);
486 : /* and restore ecxt's scantuple */
487 1031448 : ecxt->ecxt_scantuple = ecxt_scantuple_saved;
488 1031448 : MemoryContextSwitchTo(oldcxt);
489 :
490 1031448 : return rri;
491 : }
492 :
493 : /*
494 : * ExecInitPartitionInfo
495 : * Lock the partition and initialize ResultRelInfo. Also setup other
496 : * information for the partition and store it in the next empty slot in
497 : * the proute->partitions array.
498 : *
499 : * Returns the ResultRelInfo
500 : */
501 : static ResultRelInfo *
502 6524 : ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
503 : PartitionTupleRouting *proute,
504 : PartitionDispatch dispatch,
505 : ResultRelInfo *rootResultRelInfo,
506 : int partidx)
507 : {
508 6524 : ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
509 6524 : Oid partOid = dispatch->partdesc->oids[partidx];
510 : Relation partrel;
511 6524 : int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
512 6524 : Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
513 : ResultRelInfo *leaf_part_rri;
514 : MemoryContext oldcxt;
515 6524 : AttrMap *part_attmap = NULL;
516 : bool found_whole_row;
517 :
518 6524 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
519 :
520 6524 : partrel = table_open(partOid, RowExclusiveLock);
521 :
522 6524 : leaf_part_rri = makeNode(ResultRelInfo);
523 6524 : InitResultRelInfo(leaf_part_rri,
524 : partrel,
525 : 0,
526 : rootResultRelInfo,
527 : estate->es_instrument);
528 :
529 : /*
530 : * Verify result relation is a valid target for an INSERT. An UPDATE of a
531 : * partition-key becomes a DELETE+INSERT operation, so this check is still
532 : * required when the operation is CMD_UPDATE.
533 : */
534 6524 : CheckValidResultRel(leaf_part_rri, CMD_INSERT,
535 : node ? node->onConflictAction : ONCONFLICT_NONE, NIL);
536 :
537 : /*
538 : * Open partition indices. The user may have asked to check for conflicts
539 : * within this leaf partition and do "nothing" instead of throwing an
540 : * error. Be prepared in that case by initializing the index information
541 : * needed by ExecInsert() to perform speculative insertions.
542 : */
543 6512 : if (partrel->rd_rel->relhasindex &&
544 1764 : leaf_part_rri->ri_IndexRelationDescs == NULL)
545 1764 : ExecOpenIndices(leaf_part_rri,
546 3322 : (node != NULL &&
547 1558 : node->onConflictAction != ONCONFLICT_NONE));
548 :
549 : /*
550 : * Build WITH CHECK OPTION constraints for the partition. Note that we
551 : * didn't build the withCheckOptionList for partitions within the planner,
552 : * but simple translation of varattnos will suffice. This only occurs for
553 : * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
554 : * didn't find a result rel to reuse.
555 : */
556 6512 : if (node && node->withCheckOptionLists != NIL)
557 : {
558 : List *wcoList;
559 96 : List *wcoExprs = NIL;
560 : ListCell *ll;
561 :
562 : /*
563 : * In the case of INSERT on a partitioned table, there is only one
564 : * plan. Likewise, there is only one WCO list, not one per partition.
565 : * For UPDATE/MERGE, there are as many WCO lists as there are plans.
566 : */
567 : Assert((node->operation == CMD_INSERT &&
568 : list_length(node->withCheckOptionLists) == 1 &&
569 : list_length(node->resultRelations) == 1) ||
570 : (node->operation == CMD_UPDATE &&
571 : list_length(node->withCheckOptionLists) ==
572 : list_length(node->resultRelations)) ||
573 : (node->operation == CMD_MERGE &&
574 : list_length(node->withCheckOptionLists) ==
575 : list_length(node->resultRelations)));
576 :
577 : /*
578 : * Use the WCO list of the first plan as a reference to calculate
579 : * attno's for the WCO list of this partition. In the INSERT case,
580 : * that refers to the root partitioned table, whereas in the UPDATE
581 : * tuple routing case, that refers to the first partition in the
582 : * mtstate->resultRelInfo array. In any case, both that relation and
583 : * this partition should have the same columns, so we should be able
584 : * to map attributes successfully.
585 : */
586 96 : wcoList = linitial(node->withCheckOptionLists);
587 :
588 : /*
589 : * Convert Vars in it to contain this partition's attribute numbers.
590 : */
591 : part_attmap =
592 96 : build_attrmap_by_name(RelationGetDescr(partrel),
593 : RelationGetDescr(firstResultRel),
594 : false);
595 : wcoList = (List *)
596 96 : map_variable_attnos((Node *) wcoList,
597 : firstVarno, 0,
598 : part_attmap,
599 96 : RelationGetForm(partrel)->reltype,
600 : &found_whole_row);
601 : /* We ignore the value of found_whole_row. */
602 :
603 270 : foreach(ll, wcoList)
604 : {
605 174 : WithCheckOption *wco = lfirst_node(WithCheckOption, ll);
606 174 : ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
607 : &mtstate->ps);
608 :
609 174 : wcoExprs = lappend(wcoExprs, wcoExpr);
610 : }
611 :
612 96 : leaf_part_rri->ri_WithCheckOptions = wcoList;
613 96 : leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
614 : }
615 :
616 : /*
617 : * Build the RETURNING projection for the partition. Note that we didn't
618 : * build the returningList for partitions within the planner, but simple
619 : * translation of varattnos will suffice. This only occurs for the INSERT
620 : * case or in the case of UPDATE/MERGE tuple routing where we didn't find
621 : * a result rel to reuse.
622 : */
623 6512 : if (node && node->returningLists != NIL)
624 : {
625 : TupleTableSlot *slot;
626 : ExprContext *econtext;
627 : List *returningList;
628 :
629 : /* See the comment above for WCO lists. */
630 : Assert((node->operation == CMD_INSERT &&
631 : list_length(node->returningLists) == 1 &&
632 : list_length(node->resultRelations) == 1) ||
633 : (node->operation == CMD_UPDATE &&
634 : list_length(node->returningLists) ==
635 : list_length(node->resultRelations)) ||
636 : (node->operation == CMD_MERGE &&
637 : list_length(node->returningLists) ==
638 : list_length(node->resultRelations)));
639 :
640 : /*
641 : * Use the RETURNING list of the first plan as a reference to
642 : * calculate attno's for the RETURNING list of this partition. See
643 : * the comment above for WCO lists for more details on why this is
644 : * okay.
645 : */
646 212 : returningList = linitial(node->returningLists);
647 :
648 : /*
649 : * Convert Vars in it to contain this partition's attribute numbers.
650 : */
651 212 : if (part_attmap == NULL)
652 : part_attmap =
653 212 : build_attrmap_by_name(RelationGetDescr(partrel),
654 : RelationGetDescr(firstResultRel),
655 : false);
656 : returningList = (List *)
657 212 : map_variable_attnos((Node *) returningList,
658 : firstVarno, 0,
659 : part_attmap,
660 212 : RelationGetForm(partrel)->reltype,
661 : &found_whole_row);
662 : /* We ignore the value of found_whole_row. */
663 :
664 212 : leaf_part_rri->ri_returningList = returningList;
665 :
666 : /*
667 : * Initialize the projection itself.
668 : *
669 : * Use the slot and the expression context that would have been set up
670 : * in ExecInitModifyTable() for projection's output.
671 : */
672 : Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
673 212 : slot = mtstate->ps.ps_ResultTupleSlot;
674 : Assert(mtstate->ps.ps_ExprContext != NULL);
675 212 : econtext = mtstate->ps.ps_ExprContext;
676 212 : leaf_part_rri->ri_projectReturning =
677 212 : ExecBuildProjectionInfo(returningList, econtext, slot,
678 : &mtstate->ps, RelationGetDescr(partrel));
679 : }
680 :
681 : /* Set up information needed for routing tuples to the partition. */
682 6512 : ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
683 : leaf_part_rri, partidx, false);
684 :
685 : /*
686 : * If there is an ON CONFLICT clause, initialize state for it.
687 : */
688 6512 : if (node && node->onConflictAction != ONCONFLICT_NONE)
689 : {
690 228 : TupleDesc partrelDesc = RelationGetDescr(partrel);
691 228 : ExprContext *econtext = mtstate->ps.ps_ExprContext;
692 : ListCell *lc;
693 228 : List *arbiterIndexes = NIL;
694 :
695 : /*
696 : * If there is a list of arbiter indexes, map it to a list of indexes
697 : * in the partition. We do that by scanning the partition's index
698 : * list and searching for ancestry relationships to each index in the
699 : * ancestor table.
700 : */
701 228 : if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
702 : {
703 : List *childIdxs;
704 :
705 172 : childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
706 :
707 356 : foreach(lc, childIdxs)
708 : {
709 184 : Oid childIdx = lfirst_oid(lc);
710 : List *ancestors;
711 : ListCell *lc2;
712 :
713 184 : ancestors = get_partition_ancestors(childIdx);
714 368 : foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
715 : {
716 184 : if (list_member_oid(ancestors, lfirst_oid(lc2)))
717 172 : arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
718 : }
719 184 : list_free(ancestors);
720 : }
721 : }
722 :
723 : /*
724 : * If the resulting lists are of inequal length, something is wrong.
725 : * XXX This may happen because we don't match the lists correctly when
726 : * a partitioned index is being processed by REINDEX CONCURRENTLY.
727 : * FIXME later.
728 : */
729 456 : if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
730 228 : list_length(arbiterIndexes))
731 0 : elog(ERROR, "invalid arbiter index list");
732 228 : leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
733 :
734 : /*
735 : * In the DO UPDATE case, we have some more state to initialize.
736 : */
737 228 : if (node->onConflictAction == ONCONFLICT_UPDATE)
738 : {
739 166 : OnConflictSetState *onconfl = makeNode(OnConflictSetState);
740 : TupleConversionMap *map;
741 :
742 166 : map = ExecGetRootToChildMap(leaf_part_rri, estate);
743 :
744 : Assert(node->onConflictSet != NIL);
745 : Assert(rootResultRelInfo->ri_onConflict != NULL);
746 :
747 166 : leaf_part_rri->ri_onConflict = onconfl;
748 :
749 : /*
750 : * Need a separate existing slot for each partition, as the
751 : * partition could be of a different AM, even if the tuple
752 : * descriptors match.
753 : */
754 166 : onconfl->oc_Existing =
755 166 : table_slot_create(leaf_part_rri->ri_RelationDesc,
756 166 : &mtstate->ps.state->es_tupleTable);
757 :
758 : /*
759 : * If the partition's tuple descriptor matches exactly the root
760 : * parent (the common case), we can re-use most of the parent's ON
761 : * CONFLICT SET state, skipping a bunch of work. Otherwise, we
762 : * need to create state specific to this partition.
763 : */
764 166 : if (map == NULL)
765 : {
766 : /*
767 : * It's safe to reuse these from the partition root, as we
768 : * only process one tuple at a time (therefore we won't
769 : * overwrite needed data in slots), and the results of
770 : * projections are independent of the underlying storage.
771 : * Projections and where clauses themselves don't store state
772 : * / are independent of the underlying storage.
773 : */
774 90 : onconfl->oc_ProjSlot =
775 90 : rootResultRelInfo->ri_onConflict->oc_ProjSlot;
776 90 : onconfl->oc_ProjInfo =
777 90 : rootResultRelInfo->ri_onConflict->oc_ProjInfo;
778 90 : onconfl->oc_WhereClause =
779 90 : rootResultRelInfo->ri_onConflict->oc_WhereClause;
780 : }
781 : else
782 : {
783 : List *onconflset;
784 : List *onconflcols;
785 :
786 : /*
787 : * Translate expressions in onConflictSet to account for
788 : * different attribute numbers. For that, map partition
789 : * varattnos twice: first to catch the EXCLUDED
790 : * pseudo-relation (INNER_VAR), and second to handle the main
791 : * target relation (firstVarno).
792 : */
793 76 : onconflset = copyObject(node->onConflictSet);
794 76 : if (part_attmap == NULL)
795 : part_attmap =
796 70 : build_attrmap_by_name(RelationGetDescr(partrel),
797 : RelationGetDescr(firstResultRel),
798 : false);
799 : onconflset = (List *)
800 76 : map_variable_attnos((Node *) onconflset,
801 : INNER_VAR, 0,
802 : part_attmap,
803 76 : RelationGetForm(partrel)->reltype,
804 : &found_whole_row);
805 : /* We ignore the value of found_whole_row. */
806 : onconflset = (List *)
807 76 : map_variable_attnos((Node *) onconflset,
808 : firstVarno, 0,
809 : part_attmap,
810 76 : RelationGetForm(partrel)->reltype,
811 : &found_whole_row);
812 : /* We ignore the value of found_whole_row. */
813 :
814 : /* Finally, adjust the target colnos to match the partition. */
815 76 : onconflcols = adjust_partition_colnos(node->onConflictCols,
816 : leaf_part_rri);
817 :
818 : /* create the tuple slot for the UPDATE SET projection */
819 76 : onconfl->oc_ProjSlot =
820 76 : table_slot_create(partrel,
821 76 : &mtstate->ps.state->es_tupleTable);
822 :
823 : /* build UPDATE SET projection state */
824 76 : onconfl->oc_ProjInfo =
825 76 : ExecBuildUpdateProjection(onconflset,
826 : true,
827 : onconflcols,
828 : partrelDesc,
829 : econtext,
830 : onconfl->oc_ProjSlot,
831 : &mtstate->ps);
832 :
833 : /*
834 : * If there is a WHERE clause, initialize state where it will
835 : * be evaluated, mapping the attribute numbers appropriately.
836 : * As with onConflictSet, we need to map partition varattnos
837 : * to the partition's tupdesc.
838 : */
839 76 : if (node->onConflictWhere)
840 : {
841 : List *clause;
842 :
843 30 : clause = copyObject((List *) node->onConflictWhere);
844 : clause = (List *)
845 30 : map_variable_attnos((Node *) clause,
846 : INNER_VAR, 0,
847 : part_attmap,
848 30 : RelationGetForm(partrel)->reltype,
849 : &found_whole_row);
850 : /* We ignore the value of found_whole_row. */
851 : clause = (List *)
852 30 : map_variable_attnos((Node *) clause,
853 : firstVarno, 0,
854 : part_attmap,
855 30 : RelationGetForm(partrel)->reltype,
856 : &found_whole_row);
857 : /* We ignore the value of found_whole_row. */
858 30 : onconfl->oc_WhereClause =
859 30 : ExecInitQual((List *) clause, &mtstate->ps);
860 : }
861 : }
862 : }
863 : }
864 :
865 : /*
866 : * Since we've just initialized this ResultRelInfo, it's not in any list
867 : * attached to the estate as yet. Add it, so that it can be found later.
868 : *
869 : * Note that the entries in this list appear in no predetermined order,
870 : * because partition result rels are initialized as and when they're
871 : * needed.
872 : */
873 6512 : MemoryContextSwitchTo(estate->es_query_cxt);
874 6512 : estate->es_tuple_routing_result_relations =
875 6512 : lappend(estate->es_tuple_routing_result_relations,
876 : leaf_part_rri);
877 :
878 : /*
879 : * Initialize information about this partition that's needed to handle
880 : * MERGE. We take the "first" result relation's mergeActionList as
881 : * reference and make copy for this relation, converting stuff that
882 : * references attribute numbers to match this relation's.
883 : *
884 : * This duplicates much of the logic in ExecInitMerge(), so if something
885 : * changes there, look here too.
886 : */
887 6512 : if (node && node->operation == CMD_MERGE)
888 : {
889 24 : List *firstMergeActionList = linitial(node->mergeActionLists);
890 : ListCell *lc;
891 24 : ExprContext *econtext = mtstate->ps.ps_ExprContext;
892 : Node *joinCondition;
893 :
894 24 : if (part_attmap == NULL)
895 : part_attmap =
896 12 : build_attrmap_by_name(RelationGetDescr(partrel),
897 : RelationGetDescr(firstResultRel),
898 : false);
899 :
900 24 : if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
901 24 : ExecInitMergeTupleSlots(mtstate, leaf_part_rri);
902 :
903 : /* Initialize state for join condition checking. */
904 : joinCondition =
905 24 : map_variable_attnos(linitial(node->mergeJoinConditions),
906 : firstVarno, 0,
907 : part_attmap,
908 24 : RelationGetForm(partrel)->reltype,
909 : &found_whole_row);
910 : /* We ignore the value of found_whole_row. */
911 24 : leaf_part_rri->ri_MergeJoinCondition =
912 24 : ExecInitQual((List *) joinCondition, &mtstate->ps);
913 :
914 60 : foreach(lc, firstMergeActionList)
915 : {
916 : /* Make a copy for this relation to be safe. */
917 36 : MergeAction *action = copyObject(lfirst(lc));
918 : MergeActionState *action_state;
919 :
920 : /* Generate the action's state for this relation */
921 36 : action_state = makeNode(MergeActionState);
922 36 : action_state->mas_action = action;
923 :
924 : /* And put the action in the appropriate list */
925 72 : leaf_part_rri->ri_MergeActions[action->matchKind] =
926 36 : lappend(leaf_part_rri->ri_MergeActions[action->matchKind],
927 : action_state);
928 :
929 36 : switch (action->commandType)
930 : {
931 12 : case CMD_INSERT:
932 :
933 : /*
934 : * ExecCheckPlanOutput() already done on the targetlist
935 : * when "first" result relation initialized and it is same
936 : * for all result relations.
937 : */
938 12 : action_state->mas_proj =
939 12 : ExecBuildProjectionInfo(action->targetList, econtext,
940 : leaf_part_rri->ri_newTupleSlot,
941 : &mtstate->ps,
942 : RelationGetDescr(partrel));
943 12 : break;
944 18 : case CMD_UPDATE:
945 :
946 : /*
947 : * Convert updateColnos from "first" result relation
948 : * attribute numbers to this result rel's.
949 : */
950 18 : if (part_attmap)
951 18 : action->updateColnos =
952 18 : adjust_partition_colnos_using_map(action->updateColnos,
953 : part_attmap);
954 18 : action_state->mas_proj =
955 18 : ExecBuildUpdateProjection(action->targetList,
956 : true,
957 : action->updateColnos,
958 18 : RelationGetDescr(leaf_part_rri->ri_RelationDesc),
959 : econtext,
960 : leaf_part_rri->ri_newTupleSlot,
961 : NULL);
962 18 : break;
963 6 : case CMD_DELETE:
964 : case CMD_NOTHING:
965 : /* Nothing to do */
966 6 : break;
967 :
968 0 : default:
969 0 : elog(ERROR, "unknown action in MERGE WHEN clause");
970 : }
971 :
972 : /* found_whole_row intentionally ignored. */
973 36 : action->qual =
974 36 : map_variable_attnos(action->qual,
975 : firstVarno, 0,
976 : part_attmap,
977 36 : RelationGetForm(partrel)->reltype,
978 : &found_whole_row);
979 36 : action_state->mas_whenqual =
980 36 : ExecInitQual((List *) action->qual, &mtstate->ps);
981 : }
982 : }
983 6512 : MemoryContextSwitchTo(oldcxt);
984 :
985 6512 : return leaf_part_rri;
986 : }
987 :
988 : /*
989 : * ExecInitRoutingInfo
990 : * Set up information needed for translating tuples between root
991 : * partitioned table format and partition format, and keep track of it
992 : * in PartitionTupleRouting.
993 : */
994 : static void
995 7020 : ExecInitRoutingInfo(ModifyTableState *mtstate,
996 : EState *estate,
997 : PartitionTupleRouting *proute,
998 : PartitionDispatch dispatch,
999 : ResultRelInfo *partRelInfo,
1000 : int partidx,
1001 : bool is_borrowed_rel)
1002 : {
1003 : MemoryContext oldcxt;
1004 : int rri_index;
1005 :
1006 7020 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
1007 :
1008 : /*
1009 : * Set up tuple conversion between root parent and the partition if the
1010 : * two have different rowtypes. If conversion is indeed required, also
1011 : * initialize a slot dedicated to storing this partition's converted
1012 : * tuples. Various operations that are applied to tuples after routing,
1013 : * such as checking constraints, will refer to this slot.
1014 : */
1015 7020 : if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
1016 : {
1017 1318 : Relation partrel = partRelInfo->ri_RelationDesc;
1018 :
1019 : /*
1020 : * This pins the partition's TupleDesc, which will be released at the
1021 : * end of the command.
1022 : */
1023 1318 : partRelInfo->ri_PartitionTupleSlot =
1024 1318 : table_slot_create(partrel, &estate->es_tupleTable);
1025 : }
1026 : else
1027 5702 : partRelInfo->ri_PartitionTupleSlot = NULL;
1028 :
1029 : /*
1030 : * If the partition is a foreign table, let the FDW init itself for
1031 : * routing tuples to the partition.
1032 : */
1033 7020 : if (partRelInfo->ri_FdwRoutine != NULL &&
1034 92 : partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1035 92 : partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1036 :
1037 : /*
1038 : * Determine if the FDW supports batch insert and determine the batch size
1039 : * (a FDW may support batching, but it may be disabled for the
1040 : * server/table or for this particular query).
1041 : *
1042 : * If the FDW does not support batching, we set the batch size to 1.
1043 : */
1044 7008 : if (partRelInfo->ri_FdwRoutine != NULL &&
1045 80 : partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize &&
1046 80 : partRelInfo->ri_FdwRoutine->ExecForeignBatchInsert)
1047 80 : partRelInfo->ri_BatchSize =
1048 80 : partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1049 : else
1050 6928 : partRelInfo->ri_BatchSize = 1;
1051 :
1052 : Assert(partRelInfo->ri_BatchSize >= 1);
1053 :
1054 7008 : partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1055 :
1056 : /*
1057 : * Keep track of it in the PartitionTupleRouting->partitions array.
1058 : */
1059 : Assert(dispatch->indexes[partidx] == -1);
1060 :
1061 7008 : rri_index = proute->num_partitions++;
1062 :
1063 : /* Allocate or enlarge the array, as needed */
1064 7008 : if (proute->num_partitions >= proute->max_partitions)
1065 : {
1066 4846 : if (proute->max_partitions == 0)
1067 : {
1068 4834 : proute->max_partitions = 8;
1069 4834 : proute->partitions = (ResultRelInfo **)
1070 4834 : palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
1071 4834 : proute->is_borrowed_rel = (bool *)
1072 4834 : palloc(sizeof(bool) * proute->max_partitions);
1073 : }
1074 : else
1075 : {
1076 12 : proute->max_partitions *= 2;
1077 12 : proute->partitions = (ResultRelInfo **)
1078 12 : repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1079 12 : proute->max_partitions);
1080 12 : proute->is_borrowed_rel = (bool *)
1081 12 : repalloc(proute->is_borrowed_rel, sizeof(bool) *
1082 12 : proute->max_partitions);
1083 : }
1084 : }
1085 :
1086 7008 : proute->partitions[rri_index] = partRelInfo;
1087 7008 : proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
1088 7008 : dispatch->indexes[partidx] = rri_index;
1089 :
1090 7008 : MemoryContextSwitchTo(oldcxt);
1091 7008 : }
1092 :
1093 : /*
1094 : * ExecInitPartitionDispatchInfo
1095 : * Lock the partitioned table (if not locked already) and initialize
1096 : * PartitionDispatch for a partitioned table and store it in the next
1097 : * available slot in the proute->partition_dispatch_info array. Also,
1098 : * record the index into this array in the parent_pd->indexes[] array in
1099 : * the partidx element so that we can properly retrieve the newly created
1100 : * PartitionDispatch later.
1101 : */
1102 : static PartitionDispatch
1103 6348 : ExecInitPartitionDispatchInfo(EState *estate,
1104 : PartitionTupleRouting *proute, Oid partoid,
1105 : PartitionDispatch parent_pd, int partidx,
1106 : ResultRelInfo *rootResultRelInfo)
1107 : {
1108 : Relation rel;
1109 : PartitionDesc partdesc;
1110 : PartitionDispatch pd;
1111 : int dispatchidx;
1112 : MemoryContext oldcxt;
1113 :
1114 : /*
1115 : * For data modification, it is better that executor does not include
1116 : * partitions being detached, except when running in snapshot-isolation
1117 : * mode. This means that a read-committed transaction immediately gets a
1118 : * "no partition for tuple" error when a tuple is inserted into a
1119 : * partition that's being detached concurrently, but a transaction in
1120 : * repeatable-read mode can still use such a partition.
1121 : */
1122 6348 : if (estate->es_partition_directory == NULL)
1123 5124 : estate->es_partition_directory =
1124 5124 : CreatePartitionDirectory(estate->es_query_cxt,
1125 : !IsolationUsesXactSnapshot());
1126 :
1127 6348 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
1128 :
1129 : /*
1130 : * Only sub-partitioned tables need to be locked here. The root
1131 : * partitioned table will already have been locked as it's referenced in
1132 : * the query's rtable.
1133 : */
1134 6348 : if (partoid != RelationGetRelid(proute->partition_root))
1135 1188 : rel = table_open(partoid, RowExclusiveLock);
1136 : else
1137 5160 : rel = proute->partition_root;
1138 6348 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1139 :
1140 6348 : pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
1141 6348 : partdesc->nparts * sizeof(int));
1142 6348 : pd->reldesc = rel;
1143 6348 : pd->key = RelationGetPartitionKey(rel);
1144 6348 : pd->keystate = NIL;
1145 6348 : pd->partdesc = partdesc;
1146 6348 : if (parent_pd != NULL)
1147 : {
1148 1188 : TupleDesc tupdesc = RelationGetDescr(rel);
1149 :
1150 : /*
1151 : * For sub-partitioned tables where the column order differs from its
1152 : * direct parent partitioned table, we must store a tuple table slot
1153 : * initialized with its tuple descriptor and a tuple conversion map to
1154 : * convert a tuple from its parent's rowtype to its own. This is to
1155 : * make sure that we are looking at the correct row using the correct
1156 : * tuple descriptor when computing its partition key for tuple
1157 : * routing.
1158 : */
1159 1188 : pd->tupmap = build_attrmap_by_name_if_req(RelationGetDescr(parent_pd->reldesc),
1160 : tupdesc,
1161 : false);
1162 1188 : pd->tupslot = pd->tupmap ?
1163 1188 : MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1164 : }
1165 : else
1166 : {
1167 : /* Not required for the root partitioned table */
1168 5160 : pd->tupmap = NULL;
1169 5160 : pd->tupslot = NULL;
1170 : }
1171 :
1172 : /*
1173 : * Initialize with -1 to signify that the corresponding partition's
1174 : * ResultRelInfo or PartitionDispatch has not been created yet.
1175 : */
1176 6348 : memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1177 :
1178 : /* Track in PartitionTupleRouting for later use */
1179 6348 : dispatchidx = proute->num_dispatch++;
1180 :
1181 : /* Allocate or enlarge the array, as needed */
1182 6348 : if (proute->num_dispatch >= proute->max_dispatch)
1183 : {
1184 5160 : if (proute->max_dispatch == 0)
1185 : {
1186 5160 : proute->max_dispatch = 4;
1187 5160 : proute->partition_dispatch_info = (PartitionDispatch *)
1188 5160 : palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
1189 5160 : proute->nonleaf_partitions = (ResultRelInfo **)
1190 5160 : palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
1191 : }
1192 : else
1193 : {
1194 0 : proute->max_dispatch *= 2;
1195 0 : proute->partition_dispatch_info = (PartitionDispatch *)
1196 0 : repalloc(proute->partition_dispatch_info,
1197 0 : sizeof(PartitionDispatch) * proute->max_dispatch);
1198 0 : proute->nonleaf_partitions = (ResultRelInfo **)
1199 0 : repalloc(proute->nonleaf_partitions,
1200 0 : sizeof(ResultRelInfo *) * proute->max_dispatch);
1201 : }
1202 : }
1203 6348 : proute->partition_dispatch_info[dispatchidx] = pd;
1204 :
1205 : /*
1206 : * If setting up a PartitionDispatch for a sub-partitioned table, we may
1207 : * also need a minimally valid ResultRelInfo for checking the partition
1208 : * constraint later; set that up now.
1209 : */
1210 6348 : if (parent_pd)
1211 : {
1212 1188 : ResultRelInfo *rri = makeNode(ResultRelInfo);
1213 :
1214 1188 : InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
1215 1188 : proute->nonleaf_partitions[dispatchidx] = rri;
1216 : }
1217 : else
1218 5160 : proute->nonleaf_partitions[dispatchidx] = NULL;
1219 :
1220 : /*
1221 : * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1222 : * install a downlink in the parent to allow quick descent.
1223 : */
1224 6348 : if (parent_pd)
1225 : {
1226 : Assert(parent_pd->indexes[partidx] == -1);
1227 1188 : parent_pd->indexes[partidx] = dispatchidx;
1228 : }
1229 :
1230 6348 : MemoryContextSwitchTo(oldcxt);
1231 :
1232 6348 : return pd;
1233 : }
1234 :
1235 : /*
1236 : * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1237 : * routing.
1238 : *
1239 : * Close all the partitioned tables, leaf partitions, and their indices.
1240 : */
1241 : void
1242 4346 : ExecCleanupTupleRouting(ModifyTableState *mtstate,
1243 : PartitionTupleRouting *proute)
1244 : {
1245 : int i;
1246 :
1247 : /*
1248 : * Remember, proute->partition_dispatch_info[0] corresponds to the root
1249 : * partitioned table, which we must not try to close, because it is the
1250 : * main target table of the query that will be closed by callers such as
1251 : * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1252 : * partitioned table.
1253 : */
1254 5310 : for (i = 1; i < proute->num_dispatch; i++)
1255 : {
1256 964 : PartitionDispatch pd = proute->partition_dispatch_info[i];
1257 :
1258 964 : table_close(pd->reldesc, NoLock);
1259 :
1260 964 : if (pd->tupslot)
1261 454 : ExecDropSingleTupleTableSlot(pd->tupslot);
1262 : }
1263 :
1264 10780 : for (i = 0; i < proute->num_partitions; i++)
1265 : {
1266 6434 : ResultRelInfo *resultRelInfo = proute->partitions[i];
1267 :
1268 : /* Allow any FDWs to shut down */
1269 6434 : if (resultRelInfo->ri_FdwRoutine != NULL &&
1270 68 : resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1271 68 : resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1272 : resultRelInfo);
1273 :
1274 : /*
1275 : * Close it if it's not one of the result relations borrowed from the
1276 : * owning ModifyTableState; those will be closed by ExecEndPlan().
1277 : */
1278 6434 : if (proute->is_borrowed_rel[i])
1279 460 : continue;
1280 :
1281 5974 : ExecCloseIndices(resultRelInfo);
1282 5974 : table_close(resultRelInfo->ri_RelationDesc, NoLock);
1283 : }
1284 4346 : }
1285 :
1286 : /* ----------------
1287 : * FormPartitionKeyDatum
1288 : * Construct values[] and isnull[] arrays for the partition key
1289 : * of a tuple.
1290 : *
1291 : * pd Partition dispatch object of the partitioned table
1292 : * slot Heap tuple from which to extract partition key
1293 : * estate executor state for evaluating any partition key
1294 : * expressions (must be non-NULL)
1295 : * values Array of partition key Datums (output area)
1296 : * isnull Array of is-null indicators (output area)
1297 : *
1298 : * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1299 : * the heap tuple passed in.
1300 : * ----------------
1301 : */
1302 : static void
1303 1148136 : FormPartitionKeyDatum(PartitionDispatch pd,
1304 : TupleTableSlot *slot,
1305 : EState *estate,
1306 : Datum *values,
1307 : bool *isnull)
1308 : {
1309 : ListCell *partexpr_item;
1310 : int i;
1311 :
1312 1148136 : if (pd->key->partexprs != NIL && pd->keystate == NIL)
1313 : {
1314 : /* Check caller has set up context correctly */
1315 : Assert(estate != NULL &&
1316 : GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1317 :
1318 : /* First time through, set up expression evaluation state */
1319 534 : pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1320 : }
1321 :
1322 1148136 : partexpr_item = list_head(pd->keystate);
1323 2319084 : for (i = 0; i < pd->key->partnatts; i++)
1324 : {
1325 1170948 : AttrNumber keycol = pd->key->partattrs[i];
1326 : Datum datum;
1327 : bool isNull;
1328 :
1329 1170948 : if (keycol != 0)
1330 : {
1331 : /* Plain column; get the value directly from the heap tuple */
1332 1083324 : datum = slot_getattr(slot, keycol, &isNull);
1333 : }
1334 : else
1335 : {
1336 : /* Expression; need to evaluate it */
1337 87624 : if (partexpr_item == NULL)
1338 0 : elog(ERROR, "wrong number of partition key expressions");
1339 87624 : datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1340 87624 : GetPerTupleExprContext(estate),
1341 : &isNull);
1342 87624 : partexpr_item = lnext(pd->keystate, partexpr_item);
1343 : }
1344 1170948 : values[i] = datum;
1345 1170948 : isnull[i] = isNull;
1346 : }
1347 :
1348 1148136 : if (partexpr_item != NULL)
1349 0 : elog(ERROR, "wrong number of partition key expressions");
1350 1148136 : }
1351 :
1352 : /*
1353 : * The number of times the same partition must be found in a row before we
1354 : * switch from a binary search for the given values to just checking if the
1355 : * values belong to the last found partition. This must be above 0.
1356 : */
1357 : #define PARTITION_CACHED_FIND_THRESHOLD 16
1358 :
1359 : /*
1360 : * get_partition_for_tuple
1361 : * Finds partition of relation which accepts the partition key specified
1362 : * in values and isnull.
1363 : *
1364 : * Calling this function can be quite expensive when LIST and RANGE
1365 : * partitioned tables have many partitions. This is due to the binary search
1366 : * that's done to find the correct partition. Many of the use cases for LIST
1367 : * and RANGE partitioned tables make it likely that the same partition is
1368 : * found in subsequent ExecFindPartition() calls. This is especially true for
1369 : * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1370 : * partition key is the current time. When asked to find a partition for a
1371 : * RANGE or LIST partitioned table, we record the partition index and datum
1372 : * offset we've found for the given 'values' in the PartitionDesc (which is
1373 : * stored in relcache), and if we keep finding the same partition
1374 : * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1375 : * logic and instead of performing a binary search to find the correct
1376 : * partition, we'll just double-check that 'values' still belong to the last
1377 : * found partition, and if so, we'll return that partition index, thus
1378 : * skipping the need for the binary search. If we fail to match the last
1379 : * partition when double checking, then we fall back on doing a binary search.
1380 : * In this case, unless we find 'values' belong to the DEFAULT partition,
1381 : * we'll reset the number of times we've hit the same partition so that we
1382 : * don't attempt to use the cache again until we've found that partition at
1383 : * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1384 : *
1385 : * For cases where the partition changes on each lookup, the amount of
1386 : * additional work required just amounts to recording the last found partition
1387 : * and bound offset then resetting the found counter. This is cheap and does
1388 : * not appear to cause any meaningful slowdowns for such cases.
1389 : *
1390 : * No caching of partitions is done when the last found partition is the
1391 : * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1392 : * is no bound offset storing the matching datum, so we cannot confirm the
1393 : * indexes match. For the NULL partition, this is just so cheap, there's no
1394 : * sense in caching.
1395 : *
1396 : * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1397 : * found or -1 if none found.
1398 : */
1399 : static int
1400 1148094 : get_partition_for_tuple(PartitionDispatch pd, const Datum *values, const bool *isnull)
1401 : {
1402 1148094 : int bound_offset = -1;
1403 1148094 : int part_index = -1;
1404 1148094 : PartitionKey key = pd->key;
1405 1148094 : PartitionDesc partdesc = pd->partdesc;
1406 1148094 : PartitionBoundInfo boundinfo = partdesc->boundinfo;
1407 :
1408 : /*
1409 : * In the switch statement below, when we perform a cached lookup for
1410 : * RANGE and LIST partitioned tables, if we find that the last found
1411 : * partition matches the 'values', we return the partition index right
1412 : * away. We do this instead of breaking out of the switch as we don't
1413 : * want to execute the code about the DEFAULT partition or do any updates
1414 : * for any of the cache-related fields. That would be a waste of effort
1415 : * as we already know it's not the DEFAULT partition and have no need to
1416 : * increment the number of times we found the same partition any higher
1417 : * than PARTITION_CACHED_FIND_THRESHOLD.
1418 : */
1419 :
1420 : /* Route as appropriate based on partitioning strategy. */
1421 1148094 : switch (key->strategy)
1422 : {
1423 210738 : case PARTITION_STRATEGY_HASH:
1424 : {
1425 : uint64 rowHash;
1426 :
1427 : /* hash partitioning is too cheap to bother caching */
1428 210738 : rowHash = compute_partition_hash_value(key->partnatts,
1429 : key->partsupfunc,
1430 210738 : key->partcollation,
1431 : values, isnull);
1432 :
1433 : /*
1434 : * HASH partitions can't have a DEFAULT partition and we don't
1435 : * do any caching work for them, so just return the part index
1436 : */
1437 210726 : return boundinfo->indexes[rowHash % boundinfo->nindexes];
1438 : }
1439 :
1440 171030 : case PARTITION_STRATEGY_LIST:
1441 171030 : if (isnull[0])
1442 : {
1443 : /* this is far too cheap to bother doing any caching */
1444 132 : if (partition_bound_accepts_nulls(boundinfo))
1445 : {
1446 : /*
1447 : * When there is a NULL partition we just return that
1448 : * directly. We don't have a bound_offset so it's not
1449 : * valid to drop into the code after the switch which
1450 : * checks and updates the cache fields. We perhaps should
1451 : * be invalidating the details of the last cached
1452 : * partition but there's no real need to. Keeping those
1453 : * fields set gives a chance at matching to the cached
1454 : * partition on the next lookup.
1455 : */
1456 102 : return boundinfo->null_index;
1457 : }
1458 : }
1459 : else
1460 : {
1461 : bool equal;
1462 :
1463 170898 : if (partdesc->last_found_count >= PARTITION_CACHED_FIND_THRESHOLD)
1464 : {
1465 23892 : int last_datum_offset = partdesc->last_found_datum_index;
1466 23892 : Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1467 : int32 cmpval;
1468 :
1469 : /* does the last found datum index match this datum? */
1470 23892 : cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1471 23892 : key->partcollation[0],
1472 : lastDatum,
1473 : values[0]));
1474 :
1475 23892 : if (cmpval == 0)
1476 23538 : return boundinfo->indexes[last_datum_offset];
1477 :
1478 : /* fall-through and do a manual lookup */
1479 : }
1480 :
1481 147360 : bound_offset = partition_list_bsearch(key->partsupfunc,
1482 : key->partcollation,
1483 : boundinfo,
1484 : values[0], &equal);
1485 147360 : if (bound_offset >= 0 && equal)
1486 146960 : part_index = boundinfo->indexes[bound_offset];
1487 : }
1488 147390 : break;
1489 :
1490 766326 : case PARTITION_STRATEGY_RANGE:
1491 : {
1492 766326 : bool equal = false,
1493 766326 : range_partkey_has_null = false;
1494 : int i;
1495 :
1496 : /*
1497 : * No range includes NULL, so this will be accepted by the
1498 : * default partition if there is one, and otherwise rejected.
1499 : */
1500 1555044 : for (i = 0; i < key->partnatts; i++)
1501 : {
1502 788772 : if (isnull[i])
1503 : {
1504 54 : range_partkey_has_null = true;
1505 54 : break;
1506 : }
1507 : }
1508 :
1509 : /* NULLs belong in the DEFAULT partition */
1510 766326 : if (range_partkey_has_null)
1511 54 : break;
1512 :
1513 766272 : if (partdesc->last_found_count >= PARTITION_CACHED_FIND_THRESHOLD)
1514 : {
1515 249654 : int last_datum_offset = partdesc->last_found_datum_index;
1516 249654 : Datum *lastDatums = boundinfo->datums[last_datum_offset];
1517 249654 : PartitionRangeDatumKind *kind = boundinfo->kind[last_datum_offset];
1518 : int32 cmpval;
1519 :
1520 : /* check if the value is >= to the lower bound */
1521 249654 : cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1522 : key->partcollation,
1523 : lastDatums,
1524 : kind,
1525 : values,
1526 249654 : key->partnatts);
1527 :
1528 : /*
1529 : * If it's equal to the lower bound then no need to check
1530 : * the upper bound.
1531 : */
1532 249654 : if (cmpval == 0)
1533 249344 : return boundinfo->indexes[last_datum_offset + 1];
1534 :
1535 243756 : if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1536 : {
1537 : /* check if the value is below the upper bound */
1538 243696 : lastDatums = boundinfo->datums[last_datum_offset + 1];
1539 243696 : kind = boundinfo->kind[last_datum_offset + 1];
1540 243696 : cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1541 : key->partcollation,
1542 : lastDatums,
1543 : kind,
1544 : values,
1545 243696 : key->partnatts);
1546 :
1547 243696 : if (cmpval > 0)
1548 243446 : return boundinfo->indexes[last_datum_offset + 1];
1549 : }
1550 : /* fall-through and do a manual lookup */
1551 : }
1552 :
1553 516928 : bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1554 : key->partcollation,
1555 : boundinfo,
1556 516928 : key->partnatts,
1557 : values,
1558 : &equal);
1559 :
1560 : /*
1561 : * The bound at bound_offset is less than or equal to the
1562 : * tuple value, so the bound at offset+1 is the upper bound of
1563 : * the partition we're looking for, if there actually exists
1564 : * one.
1565 : */
1566 516928 : part_index = boundinfo->indexes[bound_offset + 1];
1567 : }
1568 516928 : break;
1569 :
1570 0 : default:
1571 0 : elog(ERROR, "unexpected partition strategy: %d",
1572 : (int) key->strategy);
1573 : }
1574 :
1575 : /*
1576 : * part_index < 0 means we failed to find a partition of this parent. Use
1577 : * the default partition, if there is one.
1578 : */
1579 664372 : if (part_index < 0)
1580 : {
1581 : /*
1582 : * No need to reset the cache fields here. The next set of values
1583 : * might end up belonging to the cached partition, so leaving the
1584 : * cache alone improves the chances of a cache hit on the next lookup.
1585 : */
1586 708 : return boundinfo->default_index;
1587 : }
1588 :
1589 : /* we should only make it here when the code above set bound_offset */
1590 : Assert(bound_offset >= 0);
1591 :
1592 : /*
1593 : * Attend to the cache fields. If the bound_offset matches the last
1594 : * cached bound offset then we've found the same partition as last time,
1595 : * so bump the count by one. If all goes well, we'll eventually reach
1596 : * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1597 : * around. Otherwise, we'll reset the cache count back to 1 to mark that
1598 : * we've found this partition for the first time.
1599 : */
1600 663664 : if (bound_offset == partdesc->last_found_datum_index)
1601 461214 : partdesc->last_found_count++;
1602 : else
1603 : {
1604 202450 : partdesc->last_found_count = 1;
1605 202450 : partdesc->last_found_part_index = part_index;
1606 202450 : partdesc->last_found_datum_index = bound_offset;
1607 : }
1608 :
1609 663664 : return part_index;
1610 : }
1611 :
1612 : /*
1613 : * ExecBuildSlotPartitionKeyDescription
1614 : *
1615 : * This works very much like BuildIndexValueDescription() and is currently
1616 : * used for building error messages when ExecFindPartition() fails to find
1617 : * partition for a row.
1618 : */
1619 : static char *
1620 154 : ExecBuildSlotPartitionKeyDescription(Relation rel,
1621 : const Datum *values,
1622 : const bool *isnull,
1623 : int maxfieldlen)
1624 : {
1625 : StringInfoData buf;
1626 154 : PartitionKey key = RelationGetPartitionKey(rel);
1627 154 : int partnatts = get_partition_natts(key);
1628 : int i;
1629 154 : Oid relid = RelationGetRelid(rel);
1630 : AclResult aclresult;
1631 :
1632 154 : if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1633 0 : return NULL;
1634 :
1635 : /* If the user has table-level access, just go build the description. */
1636 154 : aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1637 154 : if (aclresult != ACLCHECK_OK)
1638 : {
1639 : /*
1640 : * Step through the columns of the partition key and make sure the
1641 : * user has SELECT rights on all of them.
1642 : */
1643 24 : for (i = 0; i < partnatts; i++)
1644 : {
1645 18 : AttrNumber attnum = get_partition_col_attnum(key, i);
1646 :
1647 : /*
1648 : * If this partition key column is an expression, we return no
1649 : * detail rather than try to figure out what column(s) the
1650 : * expression includes and if the user has SELECT rights on them.
1651 : */
1652 30 : if (attnum == InvalidAttrNumber ||
1653 12 : pg_attribute_aclcheck(relid, attnum, GetUserId(),
1654 : ACL_SELECT) != ACLCHECK_OK)
1655 12 : return NULL;
1656 : }
1657 : }
1658 :
1659 142 : initStringInfo(&buf);
1660 142 : appendStringInfo(&buf, "(%s) = (",
1661 : pg_get_partkeydef_columns(relid, true));
1662 :
1663 338 : for (i = 0; i < partnatts; i++)
1664 : {
1665 : char *val;
1666 : int vallen;
1667 :
1668 196 : if (isnull[i])
1669 30 : val = "null";
1670 : else
1671 : {
1672 : Oid foutoid;
1673 : bool typisvarlena;
1674 :
1675 166 : getTypeOutputInfo(get_partition_col_typid(key, i),
1676 : &foutoid, &typisvarlena);
1677 166 : val = OidOutputFunctionCall(foutoid, values[i]);
1678 : }
1679 :
1680 196 : if (i > 0)
1681 54 : appendStringInfoString(&buf, ", ");
1682 :
1683 : /* truncate if needed */
1684 196 : vallen = strlen(val);
1685 196 : if (vallen <= maxfieldlen)
1686 196 : appendBinaryStringInfo(&buf, val, vallen);
1687 : else
1688 : {
1689 0 : vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1690 0 : appendBinaryStringInfo(&buf, val, vallen);
1691 0 : appendStringInfoString(&buf, "...");
1692 : }
1693 : }
1694 :
1695 142 : appendStringInfoChar(&buf, ')');
1696 :
1697 142 : return buf.data;
1698 : }
1699 :
1700 : /*
1701 : * adjust_partition_colnos
1702 : * Adjust the list of UPDATE target column numbers to account for
1703 : * attribute differences between the parent and the partition.
1704 : *
1705 : * Note: mustn't be called if no adjustment is required.
1706 : */
1707 : static List *
1708 76 : adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
1709 : {
1710 76 : TupleConversionMap *map = ExecGetChildToRootMap(leaf_part_rri);
1711 :
1712 : Assert(map != NULL);
1713 :
1714 76 : return adjust_partition_colnos_using_map(colnos, map->attrMap);
1715 : }
1716 :
1717 : /*
1718 : * adjust_partition_colnos_using_map
1719 : * Like adjust_partition_colnos, but uses a caller-supplied map instead
1720 : * of assuming to map from the "root" result relation.
1721 : *
1722 : * Note: mustn't be called if no adjustment is required.
1723 : */
1724 : static List *
1725 94 : adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
1726 : {
1727 94 : List *new_colnos = NIL;
1728 : ListCell *lc;
1729 :
1730 : Assert(attrMap != NULL); /* else we shouldn't be here */
1731 :
1732 232 : foreach(lc, colnos)
1733 : {
1734 138 : AttrNumber parentattrno = lfirst_int(lc);
1735 :
1736 138 : if (parentattrno <= 0 ||
1737 138 : parentattrno > attrMap->maplen ||
1738 138 : attrMap->attnums[parentattrno - 1] == 0)
1739 0 : elog(ERROR, "unexpected attno %d in target column list",
1740 : parentattrno);
1741 138 : new_colnos = lappend_int(new_colnos,
1742 138 : attrMap->attnums[parentattrno - 1]);
1743 : }
1744 :
1745 94 : return new_colnos;
1746 : }
1747 :
1748 : /*-------------------------------------------------------------------------
1749 : * Run-Time Partition Pruning Support.
1750 : *
1751 : * The following series of functions exist to support the removal of unneeded
1752 : * subplans for queries against partitioned tables. The supporting functions
1753 : * here are designed to work with any plan type which supports an arbitrary
1754 : * number of subplans, e.g. Append, MergeAppend.
1755 : *
1756 : * When pruning involves comparison of a partition key to a constant, it's
1757 : * done by the planner. However, if we have a comparison to a non-constant
1758 : * but not volatile expression, that presents an opportunity for run-time
1759 : * pruning by the executor, allowing irrelevant partitions to be skipped
1760 : * dynamically.
1761 : *
1762 : * We must distinguish expressions containing PARAM_EXEC Params from
1763 : * expressions that don't contain those. Even though a PARAM_EXEC Param is
1764 : * considered to be a stable expression, it can change value from one plan
1765 : * node scan to the next during query execution. Stable comparison
1766 : * expressions that don't involve such Params allow partition pruning to be
1767 : * done once during executor startup. Expressions that do involve such Params
1768 : * require us to prune separately for each scan of the parent plan node.
1769 : *
1770 : * Note that pruning away unneeded subplans during executor startup has the
1771 : * added benefit of not having to initialize the unneeded subplans at all.
1772 : *
1773 : *
1774 : * Functions:
1775 : *
1776 : * ExecDoInitialPruning:
1777 : * Perform runtime "initial" pruning, if necessary, to determine the set
1778 : * of child subnodes that need to be initialized during ExecInitNode() for
1779 : * all plan nodes that contain a PartitionPruneInfo.
1780 : *
1781 : * ExecInitPartitionExecPruning:
1782 : * Updates the PartitionPruneState found at given part_prune_index in
1783 : * EState.es_part_prune_states for use during "exec" pruning if required.
1784 : * Also returns the set of subplans to initialize that would be stored at
1785 : * part_prune_index in EState.es_part_prune_results by
1786 : * ExecDoInitialPruning(). Maps in PartitionPruneState are updated to
1787 : * account for initial pruning possibly having eliminated some of the
1788 : * subplans.
1789 : *
1790 : * ExecFindMatchingSubPlans:
1791 : * Returns indexes of matching subplans after evaluating the expressions
1792 : * that are safe to evaluate at a given point. This function is first
1793 : * called during ExecDoInitialPruning() to find the initially matching
1794 : * subplans based on performing the initial pruning steps and then must be
1795 : * called again each time the value of a Param listed in
1796 : * PartitionPruneState's 'execparamids' changes.
1797 : *-------------------------------------------------------------------------
1798 : */
1799 :
1800 :
1801 : /*
1802 : * ExecDoInitialPruning
1803 : * Perform runtime "initial" pruning, if necessary, to determine the set
1804 : * of child subnodes that need to be initialized during ExecInitNode() for
1805 : * plan nodes that support partition pruning.
1806 : *
1807 : * This function iterates over each PartitionPruneInfo entry in
1808 : * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
1809 : * and adds it to es_part_prune_states. ExecInitPartitionExecPruning() accesses
1810 : * these states through their corresponding indexes in es_part_prune_states and
1811 : * assign each state to the parent node's PlanState, from where it will be used
1812 : * for "exec" pruning.
1813 : *
1814 : * If initial pruning steps exist for a PartitionPruneInfo entry, this function
1815 : * executes those pruning steps and stores the result as a bitmapset of valid
1816 : * child subplans, identifying which subplans should be initialized for
1817 : * execution. The results are saved in estate->es_part_prune_results.
1818 : *
1819 : * If no initial pruning is performed for a given PartitionPruneInfo, a NULL
1820 : * entry is still added to es_part_prune_results to maintain alignment with
1821 : * es_part_prune_infos. This ensures that ExecInitPartitionExecPruning() can
1822 : * use the same index to retrieve the pruning results.
1823 : */
1824 : void
1825 578758 : ExecDoInitialPruning(EState *estate)
1826 : {
1827 : ListCell *lc;
1828 :
1829 579560 : foreach(lc, estate->es_part_prune_infos)
1830 : {
1831 802 : PartitionPruneInfo *pruneinfo = lfirst_node(PartitionPruneInfo, lc);
1832 : PartitionPruneState *prunestate;
1833 802 : Bitmapset *validsubplans = NULL;
1834 802 : Bitmapset *all_leafpart_rtis = NULL;
1835 802 : Bitmapset *validsubplan_rtis = NULL;
1836 :
1837 : /* Create and save the PartitionPruneState. */
1838 802 : prunestate = CreatePartitionPruneState(estate, pruneinfo,
1839 : &all_leafpart_rtis);
1840 802 : estate->es_part_prune_states = lappend(estate->es_part_prune_states,
1841 : prunestate);
1842 :
1843 : /*
1844 : * Perform initial pruning steps, if any, and save the result
1845 : * bitmapset or NULL as described in the header comment.
1846 : */
1847 802 : if (prunestate->do_initial_prune)
1848 448 : validsubplans = ExecFindMatchingSubPlans(prunestate, true,
1849 : &validsubplan_rtis);
1850 : else
1851 354 : validsubplan_rtis = all_leafpart_rtis;
1852 :
1853 802 : estate->es_unpruned_relids = bms_add_members(estate->es_unpruned_relids,
1854 : validsubplan_rtis);
1855 802 : estate->es_part_prune_results = lappend(estate->es_part_prune_results,
1856 : validsubplans);
1857 : }
1858 578758 : }
1859 :
1860 : /*
1861 : * ExecInitPartitionExecPruning
1862 : * Initialize the data structures needed for runtime "exec" partition
1863 : * pruning and return the result of initial pruning, if available.
1864 : *
1865 : * 'relids' identifies the relation to which both the parent plan and the
1866 : * PartitionPruneInfo given by 'part_prune_index' belong.
1867 : *
1868 : * On return, *initially_valid_subplans is assigned the set of indexes of
1869 : * child subplans that must be initialized along with the parent plan node.
1870 : * Initial pruning would have been performed by ExecDoInitialPruning(), if
1871 : * necessary, and the bitmapset of surviving subplans' indexes would have
1872 : * been stored as the part_prune_index'th element of
1873 : * EState.es_part_prune_results.
1874 : *
1875 : * If subplans were indeed pruned during initial pruning, the subplan_map
1876 : * arrays in the returned PartitionPruneState are re-sequenced to exclude those
1877 : * subplans, but only if the maps will be needed for subsequent execution
1878 : * pruning passes.
1879 : */
1880 : PartitionPruneState *
1881 806 : ExecInitPartitionExecPruning(PlanState *planstate,
1882 : int n_total_subplans,
1883 : int part_prune_index,
1884 : Bitmapset *relids,
1885 : Bitmapset **initially_valid_subplans)
1886 : {
1887 : PartitionPruneState *prunestate;
1888 806 : EState *estate = planstate->state;
1889 : PartitionPruneInfo *pruneinfo;
1890 :
1891 : /* Obtain the pruneinfo we need. */
1892 806 : pruneinfo = list_nth_node(PartitionPruneInfo, estate->es_part_prune_infos,
1893 : part_prune_index);
1894 :
1895 : /* Its relids better match the plan node's or the planner messed up. */
1896 806 : if (!bms_equal(relids, pruneinfo->relids))
1897 0 : elog(ERROR, "wrong pruneinfo with relids=%s found at part_prune_index=%d contained in plan node with relids=%s",
1898 : bmsToString(pruneinfo->relids), part_prune_index,
1899 : bmsToString(relids));
1900 :
1901 : /*
1902 : * The PartitionPruneState would have been created by
1903 : * ExecDoInitialPruning() and stored as the part_prune_index'th element of
1904 : * EState.es_part_prune_states.
1905 : */
1906 806 : prunestate = list_nth(estate->es_part_prune_states, part_prune_index);
1907 : Assert(prunestate != NULL);
1908 :
1909 : /* Use the result of initial pruning done by ExecDoInitialPruning(). */
1910 806 : if (prunestate->do_initial_prune)
1911 450 : *initially_valid_subplans = list_nth_node(Bitmapset,
1912 : estate->es_part_prune_results,
1913 : part_prune_index);
1914 : else
1915 : {
1916 : /* No pruning, so we'll need to initialize all subplans */
1917 : Assert(n_total_subplans > 0);
1918 356 : *initially_valid_subplans = bms_add_range(NULL, 0,
1919 : n_total_subplans - 1);
1920 : }
1921 :
1922 : /*
1923 : * The exec pruning state must also be initialized, if needed, before it
1924 : * can be used for pruning during execution.
1925 : *
1926 : * This also re-sequences subplan indexes contained in prunestate to
1927 : * account for any that were removed due to initial pruning; refer to the
1928 : * condition in InitExecPartitionPruneContexts() that is used to determine
1929 : * whether to do this. If no exec pruning needs to be done, we would thus
1930 : * leave the maps to be in an invalid state, but that's ok since that data
1931 : * won't be consulted again (cf initial Assert in
1932 : * ExecFindMatchingSubPlans).
1933 : */
1934 806 : if (prunestate->do_exec_prune)
1935 398 : InitExecPartitionPruneContexts(prunestate, planstate,
1936 : *initially_valid_subplans,
1937 : n_total_subplans);
1938 :
1939 806 : return prunestate;
1940 : }
1941 :
1942 : /*
1943 : * CreatePartitionPruneState
1944 : * Build the data structure required for calling ExecFindMatchingSubPlans
1945 : *
1946 : * This includes PartitionPruneContexts (stored in each
1947 : * PartitionedRelPruningData corresponding to a PartitionedRelPruneInfo),
1948 : * which hold the ExprStates needed to evaluate pruning expressions, and
1949 : * mapping arrays to convert partition indexes from the pruning logic
1950 : * into subplan indexes in the parent plan node's list of child subplans.
1951 : *
1952 : * 'pruneinfo' is a PartitionPruneInfo as generated by
1953 : * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
1954 : * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
1955 : * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
1956 : * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
1957 : * system is needed to keep from confusing the different hierarchies when a
1958 : * UNION ALL contains multiple partitioned tables as children. The data
1959 : * stored in each PartitionedRelPruningData can be re-used each time we
1960 : * re-evaluate which partitions match the pruning steps provided in each
1961 : * PartitionedRelPruneInfo.
1962 : *
1963 : * Note that only the PartitionPruneContexts for initial pruning are
1964 : * initialized here. Those required for exec pruning are initialized later in
1965 : * ExecInitPartitionExecPruning(), as they depend on the availability of the
1966 : * parent plan node's PlanState.
1967 : *
1968 : * If initial pruning steps are to be skipped (e.g., during EXPLAIN
1969 : * (GENERIC_PLAN)), *all_leafpart_rtis will be populated with the RT indexes of
1970 : * all leaf partitions whose scanning subnode is included in the parent plan
1971 : * node's list of child plans. The caller must add these RT indexes to
1972 : * estate->es_unpruned_relids.
1973 : */
1974 : static PartitionPruneState *
1975 802 : CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo,
1976 : Bitmapset **all_leafpart_rtis)
1977 : {
1978 : PartitionPruneState *prunestate;
1979 : int n_part_hierarchies;
1980 : ListCell *lc;
1981 : int i;
1982 :
1983 : /*
1984 : * Expression context that will be used by partkey_datum_from_expr() to
1985 : * evaluate expressions for comparison against partition bounds.
1986 : */
1987 802 : ExprContext *econtext = CreateExprContext(estate);
1988 :
1989 : /* For data reading, executor always includes detached partitions */
1990 802 : if (estate->es_partition_directory == NULL)
1991 754 : estate->es_partition_directory =
1992 754 : CreatePartitionDirectory(estate->es_query_cxt, false);
1993 :
1994 802 : n_part_hierarchies = list_length(pruneinfo->prune_infos);
1995 : Assert(n_part_hierarchies > 0);
1996 :
1997 : /*
1998 : * Allocate the data structure
1999 : */
2000 : prunestate = (PartitionPruneState *)
2001 802 : palloc(offsetof(PartitionPruneState, partprunedata) +
2002 : sizeof(PartitionPruningData *) * n_part_hierarchies);
2003 :
2004 : /* Save ExprContext for use during InitExecPartitionPruneContexts(). */
2005 802 : prunestate->econtext = econtext;
2006 802 : prunestate->execparamids = NULL;
2007 : /* other_subplans can change at runtime, so we need our own copy */
2008 802 : prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
2009 802 : prunestate->do_initial_prune = false; /* may be set below */
2010 802 : prunestate->do_exec_prune = false; /* may be set below */
2011 802 : prunestate->num_partprunedata = n_part_hierarchies;
2012 :
2013 : /*
2014 : * Create a short-term memory context which we'll use when making calls to
2015 : * the partition pruning functions. This avoids possible memory leaks,
2016 : * since the pruning functions call comparison functions that aren't under
2017 : * our control.
2018 : */
2019 802 : prunestate->prune_context =
2020 802 : AllocSetContextCreate(CurrentMemoryContext,
2021 : "Partition Prune",
2022 : ALLOCSET_DEFAULT_SIZES);
2023 :
2024 802 : i = 0;
2025 1628 : foreach(lc, pruneinfo->prune_infos)
2026 : {
2027 826 : List *partrelpruneinfos = lfirst_node(List, lc);
2028 826 : int npartrelpruneinfos = list_length(partrelpruneinfos);
2029 : PartitionPruningData *prunedata;
2030 : ListCell *lc2;
2031 : int j;
2032 :
2033 : prunedata = (PartitionPruningData *)
2034 826 : palloc(offsetof(PartitionPruningData, partrelprunedata) +
2035 826 : npartrelpruneinfos * sizeof(PartitionedRelPruningData));
2036 826 : prunestate->partprunedata[i] = prunedata;
2037 826 : prunedata->num_partrelprunedata = npartrelpruneinfos;
2038 :
2039 826 : j = 0;
2040 2462 : foreach(lc2, partrelpruneinfos)
2041 : {
2042 1636 : PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2);
2043 1636 : PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2044 : Relation partrel;
2045 : PartitionDesc partdesc;
2046 : PartitionKey partkey;
2047 :
2048 : /*
2049 : * We can rely on the copies of the partitioned table's partition
2050 : * key and partition descriptor appearing in its relcache entry,
2051 : * because that entry will be held open and locked for the
2052 : * duration of this executor run.
2053 : */
2054 1636 : partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false);
2055 :
2056 : /* Remember for InitExecPartitionPruneContexts(). */
2057 1636 : pprune->partrel = partrel;
2058 :
2059 1636 : partkey = RelationGetPartitionKey(partrel);
2060 1636 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory,
2061 : partrel);
2062 :
2063 : /*
2064 : * Initialize the subplan_map and subpart_map.
2065 : *
2066 : * The set of partitions that exist now might not be the same that
2067 : * existed when the plan was made. The normal case is that it is;
2068 : * optimize for that case with a quick comparison, and just copy
2069 : * the subplan_map and make subpart_map, leafpart_rti_map point to
2070 : * the ones in PruneInfo.
2071 : *
2072 : * For the case where they aren't identical, we could have more
2073 : * partitions on either side; or even exactly the same number of
2074 : * them on both but the set of OIDs doesn't match fully. Handle
2075 : * this by creating new subplan_map and subpart_map arrays that
2076 : * corresponds to the ones in the PruneInfo where the new
2077 : * partition descriptor's OIDs match. Any that don't match can be
2078 : * set to -1, as if they were pruned. By construction, both
2079 : * arrays are in partition bounds order.
2080 : */
2081 1636 : pprune->nparts = partdesc->nparts;
2082 1636 : pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts);
2083 :
2084 1636 : if (partdesc->nparts == pinfo->nparts &&
2085 1634 : memcmp(partdesc->oids, pinfo->relid_map,
2086 1634 : sizeof(int) * partdesc->nparts) == 0)
2087 : {
2088 1512 : pprune->subpart_map = pinfo->subpart_map;
2089 1512 : pprune->leafpart_rti_map = pinfo->leafpart_rti_map;
2090 1512 : memcpy(pprune->subplan_map, pinfo->subplan_map,
2091 1512 : sizeof(int) * pinfo->nparts);
2092 : }
2093 : else
2094 : {
2095 124 : int pd_idx = 0;
2096 : int pp_idx;
2097 :
2098 : /*
2099 : * When the partition arrays are not identical, there could be
2100 : * some new ones but it's also possible that one was removed;
2101 : * we cope with both situations by walking the arrays and
2102 : * discarding those that don't match.
2103 : *
2104 : * If the number of partitions on both sides match, it's still
2105 : * possible that one partition has been detached and another
2106 : * attached. Cope with that by creating a map that skips any
2107 : * mismatches.
2108 : */
2109 124 : pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts);
2110 124 : pprune->leafpart_rti_map = palloc(sizeof(int) * partdesc->nparts);
2111 :
2112 528 : for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2113 : {
2114 : /* Skip any InvalidOid relid_map entries */
2115 624 : while (pd_idx < pinfo->nparts &&
2116 504 : !OidIsValid(pinfo->relid_map[pd_idx]))
2117 220 : pd_idx++;
2118 :
2119 404 : recheck:
2120 404 : if (pd_idx < pinfo->nparts &&
2121 284 : pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
2122 : {
2123 : /* match... */
2124 182 : pprune->subplan_map[pp_idx] =
2125 182 : pinfo->subplan_map[pd_idx];
2126 182 : pprune->subpart_map[pp_idx] =
2127 182 : pinfo->subpart_map[pd_idx];
2128 182 : pprune->leafpart_rti_map[pp_idx] =
2129 182 : pinfo->leafpart_rti_map[pd_idx];
2130 182 : pd_idx++;
2131 182 : continue;
2132 : }
2133 :
2134 : /*
2135 : * There isn't an exact match in the corresponding
2136 : * positions of both arrays. Peek ahead in
2137 : * pinfo->relid_map to see if we have a match for the
2138 : * current partition in partdesc. Normally if a match
2139 : * exists it's just one element ahead, and it means the
2140 : * planner saw one extra partition that we no longer see
2141 : * now (its concurrent detach finished just in between);
2142 : * so we skip that one by updating pd_idx to the new
2143 : * location and jumping above. We can then continue to
2144 : * match the rest of the elements after skipping the OID
2145 : * with no match; no future matches are tried for the
2146 : * element that was skipped, because we know the arrays to
2147 : * be in the same order.
2148 : *
2149 : * If we don't see a match anywhere in the rest of the
2150 : * pinfo->relid_map array, that means we see an element
2151 : * now that the planner didn't see, so mark that one as
2152 : * pruned and move on.
2153 : */
2154 288 : for (int pd_idx2 = pd_idx + 1; pd_idx2 < pinfo->nparts; pd_idx2++)
2155 : {
2156 66 : if (pd_idx2 >= pinfo->nparts)
2157 0 : break;
2158 66 : if (pinfo->relid_map[pd_idx2] == partdesc->oids[pp_idx])
2159 : {
2160 0 : pd_idx = pd_idx2;
2161 0 : goto recheck;
2162 : }
2163 : }
2164 :
2165 222 : pprune->subpart_map[pp_idx] = -1;
2166 222 : pprune->subplan_map[pp_idx] = -1;
2167 222 : pprune->leafpart_rti_map[pp_idx] = 0;
2168 : }
2169 : }
2170 :
2171 : /* present_parts is also subject to later modification */
2172 1636 : pprune->present_parts = bms_copy(pinfo->present_parts);
2173 :
2174 : /*
2175 : * Only initial_context is initialized here. exec_context is
2176 : * initialized during ExecInitPartitionExecPruning() when the
2177 : * parent plan's PlanState is available.
2178 : *
2179 : * Note that we must skip execution-time (both "init" and "exec")
2180 : * partition pruning in EXPLAIN (GENERIC_PLAN), since parameter
2181 : * values may be missing.
2182 : */
2183 1636 : pprune->initial_pruning_steps = pinfo->initial_pruning_steps;
2184 1636 : if (pinfo->initial_pruning_steps &&
2185 556 : !(econtext->ecxt_estate->es_top_eflags & EXEC_FLAG_EXPLAIN_GENERIC))
2186 : {
2187 550 : InitPartitionPruneContext(&pprune->initial_context,
2188 : pprune->initial_pruning_steps,
2189 : partdesc, partkey, NULL,
2190 : econtext);
2191 : /* Record whether initial pruning is needed at any level */
2192 550 : prunestate->do_initial_prune = true;
2193 : }
2194 1636 : pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
2195 1636 : if (pinfo->exec_pruning_steps &&
2196 510 : !(econtext->ecxt_estate->es_top_eflags & EXEC_FLAG_EXPLAIN_GENERIC))
2197 : {
2198 : /* Record whether exec pruning is needed at any level */
2199 510 : prunestate->do_exec_prune = true;
2200 : }
2201 :
2202 : /*
2203 : * Accumulate the IDs of all PARAM_EXEC Params affecting the
2204 : * partitioning decisions at this plan node.
2205 : */
2206 3272 : prunestate->execparamids = bms_add_members(prunestate->execparamids,
2207 1636 : pinfo->execparamids);
2208 :
2209 : /*
2210 : * Return all leaf partition indexes if we're skipping pruning in
2211 : * the EXPLAIN (GENERIC_PLAN) case.
2212 : */
2213 1636 : if (pinfo->initial_pruning_steps && !prunestate->do_initial_prune)
2214 : {
2215 6 : int part_index = -1;
2216 :
2217 18 : while ((part_index = bms_next_member(pprune->present_parts,
2218 18 : part_index)) >= 0)
2219 : {
2220 12 : Index rtindex = pprune->leafpart_rti_map[part_index];
2221 :
2222 12 : if (rtindex)
2223 12 : *all_leafpart_rtis = bms_add_member(*all_leafpart_rtis,
2224 : rtindex);
2225 : }
2226 : }
2227 :
2228 1636 : j++;
2229 : }
2230 826 : i++;
2231 : }
2232 :
2233 802 : return prunestate;
2234 : }
2235 :
2236 : /*
2237 : * Initialize a PartitionPruneContext for the given list of pruning steps.
2238 : */
2239 : static void
2240 1062 : InitPartitionPruneContext(PartitionPruneContext *context,
2241 : List *pruning_steps,
2242 : PartitionDesc partdesc,
2243 : PartitionKey partkey,
2244 : PlanState *planstate,
2245 : ExprContext *econtext)
2246 : {
2247 : int n_steps;
2248 : int partnatts;
2249 : ListCell *lc;
2250 :
2251 1062 : n_steps = list_length(pruning_steps);
2252 :
2253 1062 : context->strategy = partkey->strategy;
2254 1062 : context->partnatts = partnatts = partkey->partnatts;
2255 1062 : context->nparts = partdesc->nparts;
2256 1062 : context->boundinfo = partdesc->boundinfo;
2257 1062 : context->partcollation = partkey->partcollation;
2258 1062 : context->partsupfunc = partkey->partsupfunc;
2259 :
2260 : /* We'll look up type-specific support functions as needed */
2261 1062 : context->stepcmpfuncs = (FmgrInfo *)
2262 1062 : palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
2263 :
2264 1062 : context->ppccontext = CurrentMemoryContext;
2265 1062 : context->planstate = planstate;
2266 1062 : context->exprcontext = econtext;
2267 :
2268 : /* Initialize expression state for each expression we need */
2269 1062 : context->exprstates = (ExprState **)
2270 1062 : palloc0(sizeof(ExprState *) * n_steps * partnatts);
2271 2786 : foreach(lc, pruning_steps)
2272 : {
2273 1724 : PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc);
2274 1724 : ListCell *lc2 = list_head(step->exprs);
2275 : int keyno;
2276 :
2277 : /* not needed for other step kinds */
2278 1724 : if (!IsA(step, PartitionPruneStepOp))
2279 286 : continue;
2280 :
2281 : Assert(list_length(step->exprs) <= partnatts);
2282 :
2283 3026 : for (keyno = 0; keyno < partnatts; keyno++)
2284 : {
2285 1588 : if (bms_is_member(keyno, step->nullkeys))
2286 6 : continue;
2287 :
2288 1582 : if (lc2 != NULL)
2289 : {
2290 1486 : Expr *expr = lfirst(lc2);
2291 :
2292 : /* not needed for Consts */
2293 1486 : if (!IsA(expr, Const))
2294 : {
2295 1392 : int stateidx = PruneCxtStateIdx(partnatts,
2296 : step->step.step_id,
2297 : keyno);
2298 :
2299 : /*
2300 : * When planstate is NULL, pruning_steps is known not to
2301 : * contain any expressions that depend on the parent plan.
2302 : * Information of any available EXTERN parameters must be
2303 : * passed explicitly in that case, which the caller must
2304 : * have made available via econtext.
2305 : */
2306 1392 : if (planstate == NULL)
2307 814 : context->exprstates[stateidx] =
2308 814 : ExecInitExprWithParams(expr,
2309 : econtext->ecxt_param_list_info);
2310 : else
2311 578 : context->exprstates[stateidx] =
2312 578 : ExecInitExpr(expr, context->planstate);
2313 : }
2314 1486 : lc2 = lnext(step->exprs, lc2);
2315 : }
2316 : }
2317 : }
2318 1062 : }
2319 :
2320 : /*
2321 : * InitExecPartitionPruneContexts
2322 : * Initialize exec pruning contexts deferred by CreatePartitionPruneState()
2323 : *
2324 : * This function finalizes exec pruning setup for a PartitionPruneState by
2325 : * initializing contexts for pruning steps that require the parent plan's
2326 : * PlanState. It iterates over PartitionPruningData entries and sets up the
2327 : * necessary execution contexts for pruning during query execution.
2328 : *
2329 : * Also fix the mapping of partition indexes to subplan indexes contained in
2330 : * prunestate by considering the new list of subplans that survived initial
2331 : * pruning.
2332 : *
2333 : * Current values of the indexes present in PartitionPruneState count all the
2334 : * subplans that would be present before initial pruning was done. If initial
2335 : * pruning got rid of some of the subplans, any subsequent pruning passes will
2336 : * be looking at a different set of target subplans to choose from than those
2337 : * in the pre-initial-pruning set, so the maps in PartitionPruneState
2338 : * containing those indexes must be updated to reflect the new indexes of
2339 : * subplans in the post-initial-pruning set.
2340 : */
2341 : static void
2342 398 : InitExecPartitionPruneContexts(PartitionPruneState *prunestate,
2343 : PlanState *parent_plan,
2344 : Bitmapset *initially_valid_subplans,
2345 : int n_total_subplans)
2346 : {
2347 : EState *estate;
2348 398 : int *new_subplan_indexes = NULL;
2349 : Bitmapset *new_other_subplans;
2350 : int i;
2351 : int newidx;
2352 398 : bool fix_subplan_map = false;
2353 :
2354 : Assert(prunestate->do_exec_prune);
2355 : Assert(parent_plan != NULL);
2356 398 : estate = parent_plan->state;
2357 :
2358 : /*
2359 : * No need to fix subplans maps if initial pruning didn't eliminate any
2360 : * subplans.
2361 : */
2362 398 : if (bms_num_members(initially_valid_subplans) < n_total_subplans)
2363 : {
2364 48 : fix_subplan_map = true;
2365 :
2366 : /*
2367 : * First we must build a temporary array which maps old subplan
2368 : * indexes to new ones. For convenience of initialization, we use
2369 : * 1-based indexes in this array and leave pruned items as 0.
2370 : */
2371 48 : new_subplan_indexes = (int *) palloc0(sizeof(int) * n_total_subplans);
2372 48 : newidx = 1;
2373 48 : i = -1;
2374 186 : while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2375 : {
2376 : Assert(i < n_total_subplans);
2377 138 : new_subplan_indexes[i] = newidx++;
2378 : }
2379 : }
2380 :
2381 : /*
2382 : * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2383 : * subplan indexes. We must also recompute its present_parts bitmap.
2384 : */
2385 820 : for (i = 0; i < prunestate->num_partprunedata; i++)
2386 : {
2387 422 : PartitionPruningData *prunedata = prunestate->partprunedata[i];
2388 : int j;
2389 :
2390 : /*
2391 : * Within each hierarchy, we perform this loop in back-to-front order
2392 : * so that we determine present_parts for the lowest-level partitioned
2393 : * tables first. This way we can tell whether a sub-partitioned
2394 : * table's partitions were entirely pruned so we can exclude it from
2395 : * the current level's present_parts.
2396 : */
2397 1300 : for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
2398 : {
2399 878 : PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2400 878 : int nparts = pprune->nparts;
2401 : int k;
2402 :
2403 : /* Initialize PartitionPruneContext for exec pruning, if needed. */
2404 878 : if (pprune->exec_pruning_steps != NIL)
2405 : {
2406 : PartitionKey partkey;
2407 : PartitionDesc partdesc;
2408 :
2409 : /*
2410 : * See the comment in CreatePartitionPruneState() regarding
2411 : * the usage of partdesc and partkey.
2412 : */
2413 512 : partkey = RelationGetPartitionKey(pprune->partrel);
2414 512 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory,
2415 : pprune->partrel);
2416 :
2417 512 : InitPartitionPruneContext(&pprune->exec_context,
2418 : pprune->exec_pruning_steps,
2419 : partdesc, partkey, parent_plan,
2420 : prunestate->econtext);
2421 : }
2422 :
2423 878 : if (!fix_subplan_map)
2424 686 : continue;
2425 :
2426 : /* We just rebuild present_parts from scratch */
2427 192 : bms_free(pprune->present_parts);
2428 192 : pprune->present_parts = NULL;
2429 :
2430 708 : for (k = 0; k < nparts; k++)
2431 : {
2432 516 : int oldidx = pprune->subplan_map[k];
2433 : int subidx;
2434 :
2435 : /*
2436 : * If this partition existed as a subplan then change the old
2437 : * subplan index to the new subplan index. The new index may
2438 : * become -1 if the partition was pruned above, or it may just
2439 : * come earlier in the subplan list due to some subplans being
2440 : * removed earlier in the list. If it's a subpartition, add
2441 : * it to present_parts unless it's entirely pruned.
2442 : */
2443 516 : if (oldidx >= 0)
2444 : {
2445 : Assert(oldidx < n_total_subplans);
2446 396 : pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2447 :
2448 396 : if (new_subplan_indexes[oldidx] > 0)
2449 114 : pprune->present_parts =
2450 114 : bms_add_member(pprune->present_parts, k);
2451 : }
2452 120 : else if ((subidx = pprune->subpart_map[k]) >= 0)
2453 : {
2454 : PartitionedRelPruningData *subprune;
2455 :
2456 120 : subprune = &prunedata->partrelprunedata[subidx];
2457 :
2458 120 : if (!bms_is_empty(subprune->present_parts))
2459 48 : pprune->present_parts =
2460 48 : bms_add_member(pprune->present_parts, k);
2461 : }
2462 : }
2463 : }
2464 : }
2465 :
2466 : /*
2467 : * If we fixed subplan maps, we must also recompute the other_subplans
2468 : * set, since indexes in it may change.
2469 : */
2470 398 : if (fix_subplan_map)
2471 : {
2472 48 : new_other_subplans = NULL;
2473 48 : i = -1;
2474 72 : while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2475 24 : new_other_subplans = bms_add_member(new_other_subplans,
2476 24 : new_subplan_indexes[i] - 1);
2477 :
2478 48 : bms_free(prunestate->other_subplans);
2479 48 : prunestate->other_subplans = new_other_subplans;
2480 :
2481 48 : pfree(new_subplan_indexes);
2482 : }
2483 398 : }
2484 :
2485 : /*
2486 : * ExecFindMatchingSubPlans
2487 : * Determine which subplans match the pruning steps detailed in
2488 : * 'prunestate' for the current comparison expression values.
2489 : *
2490 : * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2491 : * differentiates the initial executor-time pruning step from later
2492 : * runtime pruning.
2493 : *
2494 : * The caller must pass a non-NULL validsubplan_rtis during initial pruning
2495 : * to collect the RT indexes of leaf partitions whose subnodes will be
2496 : * executed. These RT indexes are later added to EState.es_unpruned_relids.
2497 : */
2498 : Bitmapset *
2499 3898 : ExecFindMatchingSubPlans(PartitionPruneState *prunestate,
2500 : bool initial_prune,
2501 : Bitmapset **validsubplan_rtis)
2502 : {
2503 3898 : Bitmapset *result = NULL;
2504 : MemoryContext oldcontext;
2505 : int i;
2506 :
2507 : /*
2508 : * Either we're here on the initial prune done during pruning
2509 : * initialization, or we're at a point where PARAM_EXEC Params can be
2510 : * evaluated *and* there are steps in which to do so.
2511 : */
2512 : Assert(initial_prune || prunestate->do_exec_prune);
2513 : Assert(validsubplan_rtis != NULL || !initial_prune);
2514 :
2515 : /*
2516 : * Switch to a temp context to avoid leaking memory in the executor's
2517 : * query-lifespan memory context.
2518 : */
2519 3898 : oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2520 :
2521 : /*
2522 : * For each hierarchy, do the pruning tests, and add nondeletable
2523 : * subplans' indexes to "result".
2524 : */
2525 7838 : for (i = 0; i < prunestate->num_partprunedata; i++)
2526 : {
2527 3940 : PartitionPruningData *prunedata = prunestate->partprunedata[i];
2528 : PartitionedRelPruningData *pprune;
2529 :
2530 : /*
2531 : * We pass the zeroth item, belonging to the root table of the
2532 : * hierarchy, and find_matching_subplans_recurse() takes care of
2533 : * recursing to other (lower-level) parents as needed.
2534 : */
2535 3940 : pprune = &prunedata->partrelprunedata[0];
2536 3940 : find_matching_subplans_recurse(prunedata, pprune, initial_prune,
2537 : &result, validsubplan_rtis);
2538 :
2539 : /*
2540 : * Expression eval may have used space in ExprContext too. Avoid
2541 : * accessing exec_context during initial pruning, as it is not valid
2542 : * at that stage.
2543 : */
2544 3940 : if (!initial_prune && pprune->exec_pruning_steps)
2545 3396 : ResetExprContext(pprune->exec_context.exprcontext);
2546 : }
2547 :
2548 : /* Add in any subplans that partition pruning didn't account for */
2549 3898 : result = bms_add_members(result, prunestate->other_subplans);
2550 :
2551 3898 : MemoryContextSwitchTo(oldcontext);
2552 :
2553 : /* Copy result out of the temp context before we reset it */
2554 3898 : result = bms_copy(result);
2555 3898 : if (validsubplan_rtis)
2556 448 : *validsubplan_rtis = bms_copy(*validsubplan_rtis);
2557 :
2558 3898 : MemoryContextReset(prunestate->prune_context);
2559 :
2560 3898 : return result;
2561 : }
2562 :
2563 : /*
2564 : * find_matching_subplans_recurse
2565 : * Recursive worker function for ExecFindMatchingSubPlans
2566 : *
2567 : * Adds valid (non-prunable) subplan IDs to *validsubplans. If
2568 : * *validsubplan_rtis is non-NULL, it also adds the RT indexes of their
2569 : * corresponding partitions, but only if they are leaf partitions.
2570 : */
2571 : static void
2572 4354 : find_matching_subplans_recurse(PartitionPruningData *prunedata,
2573 : PartitionedRelPruningData *pprune,
2574 : bool initial_prune,
2575 : Bitmapset **validsubplans,
2576 : Bitmapset **validsubplan_rtis)
2577 : {
2578 : Bitmapset *partset;
2579 : int i;
2580 :
2581 : /* Guard against stack overflow due to overly deep partition hierarchy. */
2582 4354 : check_stack_depth();
2583 :
2584 : /*
2585 : * Prune as appropriate, if we have pruning steps matching the current
2586 : * execution context. Otherwise just include all partitions at this
2587 : * level.
2588 : */
2589 4354 : if (initial_prune && pprune->initial_pruning_steps)
2590 532 : partset = get_matching_partitions(&pprune->initial_context,
2591 : pprune->initial_pruning_steps);
2592 3822 : else if (!initial_prune && pprune->exec_pruning_steps)
2593 3480 : partset = get_matching_partitions(&pprune->exec_context,
2594 : pprune->exec_pruning_steps);
2595 : else
2596 342 : partset = pprune->present_parts;
2597 :
2598 : /* Translate partset into subplan indexes */
2599 4354 : i = -1;
2600 6164 : while ((i = bms_next_member(partset, i)) >= 0)
2601 : {
2602 1810 : if (pprune->subplan_map[i] >= 0)
2603 : {
2604 2788 : *validsubplans = bms_add_member(*validsubplans,
2605 1394 : pprune->subplan_map[i]);
2606 :
2607 : /*
2608 : * Only report leaf partitions. Non-leaf partitions may appear
2609 : * here when they use an unflattened Append or MergeAppend.
2610 : */
2611 1394 : if (validsubplan_rtis && pprune->leafpart_rti_map[i])
2612 674 : *validsubplan_rtis = bms_add_member(*validsubplan_rtis,
2613 674 : pprune->leafpart_rti_map[i]);
2614 : }
2615 : else
2616 : {
2617 416 : int partidx = pprune->subpart_map[i];
2618 :
2619 416 : if (partidx >= 0)
2620 414 : find_matching_subplans_recurse(prunedata,
2621 : &prunedata->partrelprunedata[partidx],
2622 : initial_prune, validsubplans,
2623 : validsubplan_rtis);
2624 : else
2625 : {
2626 : /*
2627 : * We get here if the planner already pruned all the sub-
2628 : * partitions for this partition. Silently ignore this
2629 : * partition in this case. The end result is the same: we
2630 : * would have pruned all partitions just the same, but we
2631 : * don't have any pruning steps to execute to verify this.
2632 : */
2633 : }
2634 : }
2635 : }
2636 4354 : }
|