PostgreSQL Source Code git master
execPartition.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * execPartition.c
4 * Support routines for partitioning.
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/executor/execPartition.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/table.h"
17#include "access/tableam.h"
18#include "catalog/partition.h"
20#include "executor/executor.h"
22#include "foreign/fdwapi.h"
23#include "mb/pg_wchar.h"
24#include "miscadmin.h"
29#include "utils/acl.h"
30#include "utils/lsyscache.h"
31#include "utils/partcache.h"
32#include "utils/rls.h"
33#include "utils/ruleutils.h"
34
35
36/*-----------------------
37 * PartitionTupleRouting - Encapsulates all information required to
38 * route a tuple inserted into a partitioned table to one of its leaf
39 * partitions.
40 *
41 * partition_root
42 * The partitioned table that's the target of the command.
43 *
44 * partition_dispatch_info
45 * Array of 'max_dispatch' elements containing a pointer to a
46 * PartitionDispatch object for every partitioned table touched by tuple
47 * routing. The entry for the target partitioned table is *always*
48 * present in the 0th element of this array. See comment for
49 * PartitionDispatchData->indexes for details on how this array is
50 * indexed.
51 *
52 * nonleaf_partitions
53 * Array of 'max_dispatch' elements containing pointers to fake
54 * ResultRelInfo objects for nonleaf partitions, useful for checking
55 * the partition constraint.
56 *
57 * num_dispatch
58 * The current number of items stored in the 'partition_dispatch_info'
59 * array. Also serves as the index of the next free array element for
60 * new PartitionDispatch objects that need to be stored.
61 *
62 * max_dispatch
63 * The current allocated size of the 'partition_dispatch_info' array.
64 *
65 * partitions
66 * Array of 'max_partitions' elements containing a pointer to a
67 * ResultRelInfo for every leaf partition touched by tuple routing.
68 * Some of these are pointers to ResultRelInfos which are borrowed out of
69 * the owning ModifyTableState node. The remainder have been built
70 * especially for tuple routing. See comment for
71 * PartitionDispatchData->indexes for details on how this array is
72 * indexed.
73 *
74 * is_borrowed_rel
75 * Array of 'max_partitions' booleans recording whether a given entry
76 * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
77 * ModifyTableState node, rather than being built here.
78 *
79 * num_partitions
80 * The current number of items stored in the 'partitions' array. Also
81 * serves as the index of the next free array element for new
82 * ResultRelInfo objects that need to be stored.
83 *
84 * max_partitions
85 * The current allocated size of the 'partitions' array.
86 *
87 * memcxt
88 * Memory context used to allocate subsidiary structs.
89 *-----------------------
90 */
92{
103};
104
105/*-----------------------
106 * PartitionDispatch - information about one partitioned table in a partition
107 * hierarchy required to route a tuple to any of its partitions. A
108 * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
109 * struct and stored inside its 'partition_dispatch_info' array.
110 *
111 * reldesc
112 * Relation descriptor of the table
113 *
114 * key
115 * Partition key information of the table
116 *
117 * keystate
118 * Execution state required for expressions in the partition key
119 *
120 * partdesc
121 * Partition descriptor of the table
122 *
123 * tupslot
124 * A standalone TupleTableSlot initialized with this table's tuple
125 * descriptor, or NULL if no tuple conversion between the parent is
126 * required.
127 *
128 * tupmap
129 * TupleConversionMap to convert from the parent's rowtype to this table's
130 * rowtype (when extracting the partition key of a tuple just before
131 * routing it through this table). A NULL value is stored if no tuple
132 * conversion is required.
133 *
134 * indexes
135 * Array of partdesc->nparts elements. For leaf partitions the index
136 * corresponds to the partition's ResultRelInfo in the encapsulating
137 * PartitionTupleRouting's partitions array. For partitioned partitions,
138 * the index corresponds to the PartitionDispatch for it in its
139 * partition_dispatch_info array. -1 indicates we've not yet allocated
140 * anything in PartitionTupleRouting for the partition.
141 *-----------------------
142 */
144{
147 List *keystate; /* list of ExprState */
153
154
156 EState *estate, PartitionTupleRouting *proute,
157 PartitionDispatch dispatch,
158 ResultRelInfo *rootResultRelInfo,
159 int partidx);
160static void ExecInitRoutingInfo(ModifyTableState *mtstate,
161 EState *estate,
162 PartitionTupleRouting *proute,
163 PartitionDispatch dispatch,
164 ResultRelInfo *partRelInfo,
165 int partidx,
166 bool is_borrowed_rel);
168 PartitionTupleRouting *proute,
169 Oid partoid, PartitionDispatch parent_pd,
170 int partidx, ResultRelInfo *rootResultRelInfo);
172 TupleTableSlot *slot,
173 EState *estate,
174 Datum *values,
175 bool *isnull);
177 const bool *isnull);
179 const Datum *values,
180 const bool *isnull,
181 int maxfieldlen);
182static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri);
183static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap);
185 PartitionPruneInfo *pruneinfo,
186 Bitmapset **all_leafpart_rtis);
188 List *pruning_steps,
189 PartitionDesc partdesc,
190 PartitionKey partkey,
191 PlanState *planstate,
192 ExprContext *econtext);
194 PlanState *parent_plan,
195 Bitmapset *initially_valid_subplans,
196 int n_total_subplans);
199 bool initial_prune,
200 Bitmapset **validsubplans,
201 Bitmapset **validsubplan_rtis);
202
203
204/*
205 * ExecSetupPartitionTupleRouting - sets up information needed during
206 * tuple routing for partitioned tables, encapsulates it in
207 * PartitionTupleRouting, and returns it.
208 *
209 * Callers must use the returned PartitionTupleRouting during calls to
210 * ExecFindPartition(). The actual ResultRelInfo for a partition is only
211 * allocated when the partition is found for the first time.
212 *
213 * The current memory context is used to allocate this struct and all
214 * subsidiary structs that will be allocated from it later on. Typically
215 * it should be estate->es_query_cxt.
216 */
219{
220 PartitionTupleRouting *proute;
221
222 /*
223 * Here we attempt to expend as little effort as possible in setting up
224 * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
225 * demand, only when we actually need to route a tuple to that partition.
226 * The reason for this is that a common case is for INSERT to insert a
227 * single tuple into a partitioned table and this must be fast.
228 */
230 proute->partition_root = rel;
232 /* Rest of members initialized by zeroing */
233
234 /*
235 * Initialize this table's PartitionDispatch object. Here we pass in the
236 * parent as NULL as we don't need to care about any parent of the target
237 * partitioned table.
238 */
240 NULL, 0, NULL);
241
242 return proute;
243}
244
245/*
246 * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
247 * the tuple contained in *slot should belong to.
248 *
249 * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
250 * one up or reuse one from mtstate's resultRelInfo array. When reusing a
251 * ResultRelInfo from the mtstate we verify that the relation is a valid
252 * target for INSERTs and initialize tuple routing information.
253 *
254 * rootResultRelInfo is the relation named in the query.
255 *
256 * estate must be non-NULL; we'll need it to compute any expressions in the
257 * partition keys. Also, its per-tuple contexts are used as evaluation
258 * scratch space.
259 *
260 * If no leaf partition is found, this routine errors out with the appropriate
261 * error message. An error may also be raised if the found target partition
262 * is not a valid target for an INSERT.
263 */
266 ResultRelInfo *rootResultRelInfo,
267 PartitionTupleRouting *proute,
268 TupleTableSlot *slot, EState *estate)
269{
272 bool isnull[PARTITION_MAX_KEYS];
273 Relation rel;
274 PartitionDispatch dispatch;
275 PartitionDesc partdesc;
276 ExprContext *ecxt = GetPerTupleExprContext(estate);
277 TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
278 TupleTableSlot *rootslot = slot;
279 TupleTableSlot *myslot = NULL;
280 MemoryContext oldcxt;
281 ResultRelInfo *rri = NULL;
282
283 /* use per-tuple context here to avoid leaking memory */
285
286 /*
287 * First check the root table's partition constraint, if any. No point in
288 * routing the tuple if it doesn't belong in the root table itself.
289 */
290 if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
291 ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
292
293 /* start with the root partitioned table */
294 dispatch = pd[0];
295 while (dispatch != NULL)
296 {
297 int partidx = -1;
298 bool is_leaf;
299
301
302 rel = dispatch->reldesc;
303 partdesc = dispatch->partdesc;
304
305 /*
306 * Extract partition key from tuple. Expression evaluation machinery
307 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
308 * point to the correct tuple slot. The slot might have changed from
309 * what was used for the parent table if the table of the current
310 * partitioning level has different tuple descriptor from the parent.
311 * So update ecxt_scantuple accordingly.
312 */
313 ecxt->ecxt_scantuple = slot;
314 FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
315
316 /*
317 * If this partitioned table has no partitions or no partition for
318 * these values, error out.
319 */
320 if (partdesc->nparts == 0 ||
321 (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
322 {
323 char *val_desc;
324
326 values, isnull, 64);
329 (errcode(ERRCODE_CHECK_VIOLATION),
330 errmsg("no partition of relation \"%s\" found for row",
332 val_desc ?
333 errdetail("Partition key of the failing row contains %s.",
334 val_desc) : 0,
335 errtable(rel)));
336 }
337
338 is_leaf = partdesc->is_leaf[partidx];
339 if (is_leaf)
340 {
341 /*
342 * We've reached the leaf -- hurray, we're done. Look to see if
343 * we've already got a ResultRelInfo for this partition.
344 */
345 if (likely(dispatch->indexes[partidx] >= 0))
346 {
347 /* ResultRelInfo already built */
348 Assert(dispatch->indexes[partidx] < proute->num_partitions);
349 rri = proute->partitions[dispatch->indexes[partidx]];
350 }
351 else
352 {
353 /*
354 * If the partition is known in the owning ModifyTableState
355 * node, we can re-use that ResultRelInfo instead of creating
356 * a new one with ExecInitPartitionInfo().
357 */
358 rri = ExecLookupResultRelByOid(mtstate,
359 partdesc->oids[partidx],
360 true, false);
361 if (rri)
362 {
363 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
364
365 /* Verify this ResultRelInfo allows INSERTs */
367 node ? node->onConflictAction : ONCONFLICT_NONE,
368 NIL);
369
370 /*
371 * Initialize information needed to insert this and
372 * subsequent tuples routed to this partition.
373 */
374 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
375 rri, partidx, true);
376 }
377 else
378 {
379 /* We need to create a new one. */
380 rri = ExecInitPartitionInfo(mtstate, estate, proute,
381 dispatch,
382 rootResultRelInfo, partidx);
383 }
384 }
385 Assert(rri != NULL);
386
387 /* Signal to terminate the loop */
388 dispatch = NULL;
389 }
390 else
391 {
392 /*
393 * Partition is a sub-partitioned table; get the PartitionDispatch
394 */
395 if (likely(dispatch->indexes[partidx] >= 0))
396 {
397 /* Already built. */
398 Assert(dispatch->indexes[partidx] < proute->num_dispatch);
399
400 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
401
402 /*
403 * Move down to the next partition level and search again
404 * until we find a leaf partition that matches this tuple
405 */
406 dispatch = pd[dispatch->indexes[partidx]];
407 }
408 else
409 {
410 /* Not yet built. Do that now. */
411 PartitionDispatch subdispatch;
412
413 /*
414 * Create the new PartitionDispatch. We pass the current one
415 * in as the parent PartitionDispatch
416 */
417 subdispatch = ExecInitPartitionDispatchInfo(estate,
418 proute,
419 partdesc->oids[partidx],
420 dispatch, partidx,
421 mtstate->rootResultRelInfo);
422 Assert(dispatch->indexes[partidx] >= 0 &&
423 dispatch->indexes[partidx] < proute->num_dispatch);
424
425 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
426 dispatch = subdispatch;
427 }
428
429 /*
430 * Convert the tuple to the new parent's layout, if different from
431 * the previous parent.
432 */
433 if (dispatch->tupslot)
434 {
435 AttrMap *map = dispatch->tupmap;
436 TupleTableSlot *tempslot = myslot;
437
438 myslot = dispatch->tupslot;
439 slot = execute_attr_map_slot(map, slot, myslot);
440
441 if (tempslot != NULL)
442 ExecClearTuple(tempslot);
443 }
444 }
445
446 /*
447 * If this partition is the default one, we must check its partition
448 * constraint now, which may have changed concurrently due to
449 * partitions being added to the parent.
450 *
451 * (We do this here, and do not rely on ExecInsert doing it, because
452 * we don't want to miss doing it for non-leaf partitions.)
453 */
454 if (partidx == partdesc->boundinfo->default_index)
455 {
456 /*
457 * The tuple must match the partition's layout for the constraint
458 * expression to be evaluated successfully. If the partition is
459 * sub-partitioned, that would already be the case due to the code
460 * above, but for a leaf partition the tuple still matches the
461 * parent's layout.
462 *
463 * Note that we have a map to convert from root to current
464 * partition, but not from immediate parent to current partition.
465 * So if we have to convert, do it from the root slot; if not, use
466 * the root slot as-is.
467 */
468 if (is_leaf)
469 {
470 TupleConversionMap *map = ExecGetRootToChildMap(rri, estate);
471
472 if (map)
473 slot = execute_attr_map_slot(map->attrMap, rootslot,
475 else
476 slot = rootslot;
477 }
478
479 ExecPartitionCheck(rri, slot, estate, true);
480 }
481 }
482
483 /* Release the tuple in the lowest parent's dedicated slot. */
484 if (myslot != NULL)
485 ExecClearTuple(myslot);
486 /* and restore ecxt's scantuple */
487 ecxt->ecxt_scantuple = ecxt_scantuple_saved;
488 MemoryContextSwitchTo(oldcxt);
489
490 return rri;
491}
492
493/*
494 * ExecInitPartitionInfo
495 * Lock the partition and initialize ResultRelInfo. Also setup other
496 * information for the partition and store it in the next empty slot in
497 * the proute->partitions array.
498 *
499 * Returns the ResultRelInfo
500 */
501static ResultRelInfo *
503 PartitionTupleRouting *proute,
504 PartitionDispatch dispatch,
505 ResultRelInfo *rootResultRelInfo,
506 int partidx)
507{
508 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
509 Oid partOid = dispatch->partdesc->oids[partidx];
510 Relation partrel;
511 int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
512 Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
513 ResultRelInfo *leaf_part_rri;
514 MemoryContext oldcxt;
515 AttrMap *part_attmap = NULL;
516 bool found_whole_row;
517
518 oldcxt = MemoryContextSwitchTo(proute->memcxt);
519
520 partrel = table_open(partOid, RowExclusiveLock);
521
522 leaf_part_rri = makeNode(ResultRelInfo);
523 InitResultRelInfo(leaf_part_rri,
524 partrel,
525 0,
526 rootResultRelInfo,
527 estate->es_instrument);
528
529 /*
530 * Verify result relation is a valid target for an INSERT. An UPDATE of a
531 * partition-key becomes a DELETE+INSERT operation, so this check is still
532 * required when the operation is CMD_UPDATE.
533 */
534 CheckValidResultRel(leaf_part_rri, CMD_INSERT,
535 node ? node->onConflictAction : ONCONFLICT_NONE, NIL);
536
537 /*
538 * Open partition indices. The user may have asked to check for conflicts
539 * within this leaf partition and do "nothing" instead of throwing an
540 * error. Be prepared in that case by initializing the index information
541 * needed by ExecInsert() to perform speculative insertions.
542 */
543 if (partrel->rd_rel->relhasindex &&
544 leaf_part_rri->ri_IndexRelationDescs == NULL)
545 ExecOpenIndices(leaf_part_rri,
546 (node != NULL &&
548
549 /*
550 * Build WITH CHECK OPTION constraints for the partition. Note that we
551 * didn't build the withCheckOptionList for partitions within the planner,
552 * but simple translation of varattnos will suffice. This only occurs for
553 * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
554 * didn't find a result rel to reuse.
555 */
556 if (node && node->withCheckOptionLists != NIL)
557 {
558 List *wcoList;
559 List *wcoExprs = NIL;
560 ListCell *ll;
561
562 /*
563 * In the case of INSERT on a partitioned table, there is only one
564 * plan. Likewise, there is only one WCO list, not one per partition.
565 * For UPDATE/MERGE, there are as many WCO lists as there are plans.
566 */
567 Assert((node->operation == CMD_INSERT &&
568 list_length(node->withCheckOptionLists) == 1 &&
569 list_length(node->resultRelations) == 1) ||
570 (node->operation == CMD_UPDATE &&
573 (node->operation == CMD_MERGE &&
576
577 /*
578 * Use the WCO list of the first plan as a reference to calculate
579 * attno's for the WCO list of this partition. In the INSERT case,
580 * that refers to the root partitioned table, whereas in the UPDATE
581 * tuple routing case, that refers to the first partition in the
582 * mtstate->resultRelInfo array. In any case, both that relation and
583 * this partition should have the same columns, so we should be able
584 * to map attributes successfully.
585 */
586 wcoList = linitial(node->withCheckOptionLists);
587
588 /*
589 * Convert Vars in it to contain this partition's attribute numbers.
590 */
591 part_attmap =
593 RelationGetDescr(firstResultRel),
594 false);
595 wcoList = (List *)
596 map_variable_attnos((Node *) wcoList,
597 firstVarno, 0,
598 part_attmap,
599 RelationGetForm(partrel)->reltype,
600 &found_whole_row);
601 /* We ignore the value of found_whole_row. */
602
603 foreach(ll, wcoList)
604 {
606 ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
607 &mtstate->ps);
608
609 wcoExprs = lappend(wcoExprs, wcoExpr);
610 }
611
612 leaf_part_rri->ri_WithCheckOptions = wcoList;
613 leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
614 }
615
616 /*
617 * Build the RETURNING projection for the partition. Note that we didn't
618 * build the returningList for partitions within the planner, but simple
619 * translation of varattnos will suffice. This only occurs for the INSERT
620 * case or in the case of UPDATE/MERGE tuple routing where we didn't find
621 * a result rel to reuse.
622 */
623 if (node && node->returningLists != NIL)
624 {
625 TupleTableSlot *slot;
626 ExprContext *econtext;
627 List *returningList;
628
629 /* See the comment above for WCO lists. */
630 Assert((node->operation == CMD_INSERT &&
631 list_length(node->returningLists) == 1 &&
632 list_length(node->resultRelations) == 1) ||
633 (node->operation == CMD_UPDATE &&
636 (node->operation == CMD_MERGE &&
639
640 /*
641 * Use the RETURNING list of the first plan as a reference to
642 * calculate attno's for the RETURNING list of this partition. See
643 * the comment above for WCO lists for more details on why this is
644 * okay.
645 */
646 returningList = linitial(node->returningLists);
647
648 /*
649 * Convert Vars in it to contain this partition's attribute numbers.
650 */
651 if (part_attmap == NULL)
652 part_attmap =
654 RelationGetDescr(firstResultRel),
655 false);
656 returningList = (List *)
657 map_variable_attnos((Node *) returningList,
658 firstVarno, 0,
659 part_attmap,
660 RelationGetForm(partrel)->reltype,
661 &found_whole_row);
662 /* We ignore the value of found_whole_row. */
663
664 leaf_part_rri->ri_returningList = returningList;
665
666 /*
667 * Initialize the projection itself.
668 *
669 * Use the slot and the expression context that would have been set up
670 * in ExecInitModifyTable() for projection's output.
671 */
672 Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
673 slot = mtstate->ps.ps_ResultTupleSlot;
674 Assert(mtstate->ps.ps_ExprContext != NULL);
675 econtext = mtstate->ps.ps_ExprContext;
676 leaf_part_rri->ri_projectReturning =
677 ExecBuildProjectionInfo(returningList, econtext, slot,
678 &mtstate->ps, RelationGetDescr(partrel));
679 }
680
681 /* Set up information needed for routing tuples to the partition. */
682 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
683 leaf_part_rri, partidx, false);
684
685 /*
686 * If there is an ON CONFLICT clause, initialize state for it.
687 */
688 if (node && node->onConflictAction != ONCONFLICT_NONE)
689 {
690 TupleDesc partrelDesc = RelationGetDescr(partrel);
691 ExprContext *econtext = mtstate->ps.ps_ExprContext;
692 ListCell *lc;
693 List *arbiterIndexes = NIL;
694
695 /*
696 * If there is a list of arbiter indexes, map it to a list of indexes
697 * in the partition. We do that by scanning the partition's index
698 * list and searching for ancestry relationships to each index in the
699 * ancestor table.
700 */
701 if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
702 {
703 List *childIdxs;
704
705 childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
706
707 foreach(lc, childIdxs)
708 {
709 Oid childIdx = lfirst_oid(lc);
710 List *ancestors;
711 ListCell *lc2;
712
713 ancestors = get_partition_ancestors(childIdx);
714 foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
715 {
716 if (list_member_oid(ancestors, lfirst_oid(lc2)))
717 arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
718 }
719 list_free(ancestors);
720 }
721 }
722
723 /*
724 * If the resulting lists are of inequal length, something is wrong.
725 * XXX This may happen because we don't match the lists correctly when
726 * a partitioned index is being processed by REINDEX CONCURRENTLY.
727 * FIXME later.
728 */
729 if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
730 list_length(arbiterIndexes))
731 elog(ERROR, "invalid arbiter index list");
732 leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
733
734 /*
735 * In the DO UPDATE case, we have some more state to initialize.
736 */
738 {
741
742 map = ExecGetRootToChildMap(leaf_part_rri, estate);
743
744 Assert(node->onConflictSet != NIL);
745 Assert(rootResultRelInfo->ri_onConflict != NULL);
746
747 leaf_part_rri->ri_onConflict = onconfl;
748
749 /*
750 * Need a separate existing slot for each partition, as the
751 * partition could be of a different AM, even if the tuple
752 * descriptors match.
753 */
754 onconfl->oc_Existing =
755 table_slot_create(leaf_part_rri->ri_RelationDesc,
756 &mtstate->ps.state->es_tupleTable);
757
758 /*
759 * If the partition's tuple descriptor matches exactly the root
760 * parent (the common case), we can re-use most of the parent's ON
761 * CONFLICT SET state, skipping a bunch of work. Otherwise, we
762 * need to create state specific to this partition.
763 */
764 if (map == NULL)
765 {
766 /*
767 * It's safe to reuse these from the partition root, as we
768 * only process one tuple at a time (therefore we won't
769 * overwrite needed data in slots), and the results of
770 * projections are independent of the underlying storage.
771 * Projections and where clauses themselves don't store state
772 * / are independent of the underlying storage.
773 */
774 onconfl->oc_ProjSlot =
775 rootResultRelInfo->ri_onConflict->oc_ProjSlot;
776 onconfl->oc_ProjInfo =
777 rootResultRelInfo->ri_onConflict->oc_ProjInfo;
778 onconfl->oc_WhereClause =
779 rootResultRelInfo->ri_onConflict->oc_WhereClause;
780 }
781 else
782 {
783 List *onconflset;
784 List *onconflcols;
785
786 /*
787 * Translate expressions in onConflictSet to account for
788 * different attribute numbers. For that, map partition
789 * varattnos twice: first to catch the EXCLUDED
790 * pseudo-relation (INNER_VAR), and second to handle the main
791 * target relation (firstVarno).
792 */
793 onconflset = copyObject(node->onConflictSet);
794 if (part_attmap == NULL)
795 part_attmap =
797 RelationGetDescr(firstResultRel),
798 false);
799 onconflset = (List *)
800 map_variable_attnos((Node *) onconflset,
801 INNER_VAR, 0,
802 part_attmap,
803 RelationGetForm(partrel)->reltype,
804 &found_whole_row);
805 /* We ignore the value of found_whole_row. */
806 onconflset = (List *)
807 map_variable_attnos((Node *) onconflset,
808 firstVarno, 0,
809 part_attmap,
810 RelationGetForm(partrel)->reltype,
811 &found_whole_row);
812 /* We ignore the value of found_whole_row. */
813
814 /* Finally, adjust the target colnos to match the partition. */
815 onconflcols = adjust_partition_colnos(node->onConflictCols,
816 leaf_part_rri);
817
818 /* create the tuple slot for the UPDATE SET projection */
819 onconfl->oc_ProjSlot =
820 table_slot_create(partrel,
821 &mtstate->ps.state->es_tupleTable);
822
823 /* build UPDATE SET projection state */
824 onconfl->oc_ProjInfo =
825 ExecBuildUpdateProjection(onconflset,
826 true,
827 onconflcols,
828 partrelDesc,
829 econtext,
830 onconfl->oc_ProjSlot,
831 &mtstate->ps);
832
833 /*
834 * If there is a WHERE clause, initialize state where it will
835 * be evaluated, mapping the attribute numbers appropriately.
836 * As with onConflictSet, we need to map partition varattnos
837 * to the partition's tupdesc.
838 */
839 if (node->onConflictWhere)
840 {
841 List *clause;
842
843 clause = copyObject((List *) node->onConflictWhere);
844 clause = (List *)
845 map_variable_attnos((Node *) clause,
846 INNER_VAR, 0,
847 part_attmap,
848 RelationGetForm(partrel)->reltype,
849 &found_whole_row);
850 /* We ignore the value of found_whole_row. */
851 clause = (List *)
852 map_variable_attnos((Node *) clause,
853 firstVarno, 0,
854 part_attmap,
855 RelationGetForm(partrel)->reltype,
856 &found_whole_row);
857 /* We ignore the value of found_whole_row. */
858 onconfl->oc_WhereClause =
859 ExecInitQual((List *) clause, &mtstate->ps);
860 }
861 }
862 }
863 }
864
865 /*
866 * Since we've just initialized this ResultRelInfo, it's not in any list
867 * attached to the estate as yet. Add it, so that it can be found later.
868 *
869 * Note that the entries in this list appear in no predetermined order,
870 * because partition result rels are initialized as and when they're
871 * needed.
872 */
876 leaf_part_rri);
877
878 /*
879 * Initialize information about this partition that's needed to handle
880 * MERGE. We take the "first" result relation's mergeActionList as
881 * reference and make copy for this relation, converting stuff that
882 * references attribute numbers to match this relation's.
883 *
884 * This duplicates much of the logic in ExecInitMerge(), so if something
885 * changes there, look here too.
886 */
887 if (node && node->operation == CMD_MERGE)
888 {
889 List *firstMergeActionList = linitial(node->mergeActionLists);
890 ListCell *lc;
891 ExprContext *econtext = mtstate->ps.ps_ExprContext;
892 Node *joinCondition;
893
894 if (part_attmap == NULL)
895 part_attmap =
897 RelationGetDescr(firstResultRel),
898 false);
899
900 if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
901 ExecInitMergeTupleSlots(mtstate, leaf_part_rri);
902
903 /* Initialize state for join condition checking. */
904 joinCondition =
906 firstVarno, 0,
907 part_attmap,
908 RelationGetForm(partrel)->reltype,
909 &found_whole_row);
910 /* We ignore the value of found_whole_row. */
911 leaf_part_rri->ri_MergeJoinCondition =
912 ExecInitQual((List *) joinCondition, &mtstate->ps);
913
914 foreach(lc, firstMergeActionList)
915 {
916 /* Make a copy for this relation to be safe. */
918 MergeActionState *action_state;
919
920 /* Generate the action's state for this relation */
921 action_state = makeNode(MergeActionState);
922 action_state->mas_action = action;
923
924 /* And put the action in the appropriate list */
925 leaf_part_rri->ri_MergeActions[action->matchKind] =
926 lappend(leaf_part_rri->ri_MergeActions[action->matchKind],
927 action_state);
928
929 switch (action->commandType)
930 {
931 case CMD_INSERT:
932
933 /*
934 * ExecCheckPlanOutput() already done on the targetlist
935 * when "first" result relation initialized and it is same
936 * for all result relations.
937 */
938 action_state->mas_proj =
939 ExecBuildProjectionInfo(action->targetList, econtext,
940 leaf_part_rri->ri_newTupleSlot,
941 &mtstate->ps,
942 RelationGetDescr(partrel));
943 break;
944 case CMD_UPDATE:
945
946 /*
947 * Convert updateColnos from "first" result relation
948 * attribute numbers to this result rel's.
949 */
950 if (part_attmap)
951 action->updateColnos =
953 part_attmap);
954 action_state->mas_proj =
956 true,
957 action->updateColnos,
958 RelationGetDescr(leaf_part_rri->ri_RelationDesc),
959 econtext,
960 leaf_part_rri->ri_newTupleSlot,
961 NULL);
962 break;
963 case CMD_DELETE:
964 case CMD_NOTHING:
965 /* Nothing to do */
966 break;
967
968 default:
969 elog(ERROR, "unknown action in MERGE WHEN clause");
970 }
971
972 /* found_whole_row intentionally ignored. */
973 action->qual =
975 firstVarno, 0,
976 part_attmap,
977 RelationGetForm(partrel)->reltype,
978 &found_whole_row);
979 action_state->mas_whenqual =
980 ExecInitQual((List *) action->qual, &mtstate->ps);
981 }
982 }
983 MemoryContextSwitchTo(oldcxt);
984
985 return leaf_part_rri;
986}
987
988/*
989 * ExecInitRoutingInfo
990 * Set up information needed for translating tuples between root
991 * partitioned table format and partition format, and keep track of it
992 * in PartitionTupleRouting.
993 */
994static void
996 EState *estate,
997 PartitionTupleRouting *proute,
998 PartitionDispatch dispatch,
999 ResultRelInfo *partRelInfo,
1000 int partidx,
1001 bool is_borrowed_rel)
1002{
1003 MemoryContext oldcxt;
1004 int rri_index;
1005
1006 oldcxt = MemoryContextSwitchTo(proute->memcxt);
1007
1008 /*
1009 * Set up tuple conversion between root parent and the partition if the
1010 * two have different rowtypes. If conversion is indeed required, also
1011 * initialize a slot dedicated to storing this partition's converted
1012 * tuples. Various operations that are applied to tuples after routing,
1013 * such as checking constraints, will refer to this slot.
1014 */
1015 if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
1016 {
1017 Relation partrel = partRelInfo->ri_RelationDesc;
1018
1019 /*
1020 * This pins the partition's TupleDesc, which will be released at the
1021 * end of the command.
1022 */
1023 partRelInfo->ri_PartitionTupleSlot =
1024 table_slot_create(partrel, &estate->es_tupleTable);
1025 }
1026 else
1027 partRelInfo->ri_PartitionTupleSlot = NULL;
1028
1029 /*
1030 * If the partition is a foreign table, let the FDW init itself for
1031 * routing tuples to the partition.
1032 */
1033 if (partRelInfo->ri_FdwRoutine != NULL &&
1034 partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1035 partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1036
1037 /*
1038 * Determine if the FDW supports batch insert and determine the batch size
1039 * (a FDW may support batching, but it may be disabled for the
1040 * server/table or for this particular query).
1041 *
1042 * If the FDW does not support batching, we set the batch size to 1.
1043 */
1044 if (partRelInfo->ri_FdwRoutine != NULL &&
1047 partRelInfo->ri_BatchSize =
1048 partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1049 else
1050 partRelInfo->ri_BatchSize = 1;
1051
1052 Assert(partRelInfo->ri_BatchSize >= 1);
1053
1054 partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1055
1056 /*
1057 * Keep track of it in the PartitionTupleRouting->partitions array.
1058 */
1059 Assert(dispatch->indexes[partidx] == -1);
1060
1061 rri_index = proute->num_partitions++;
1062
1063 /* Allocate or enlarge the array, as needed */
1064 if (proute->num_partitions >= proute->max_partitions)
1065 {
1066 if (proute->max_partitions == 0)
1067 {
1068 proute->max_partitions = 8;
1069 proute->partitions = (ResultRelInfo **)
1070 palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
1071 proute->is_borrowed_rel = (bool *)
1072 palloc(sizeof(bool) * proute->max_partitions);
1073 }
1074 else
1075 {
1076 proute->max_partitions *= 2;
1077 proute->partitions = (ResultRelInfo **)
1078 repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1079 proute->max_partitions);
1080 proute->is_borrowed_rel = (bool *)
1081 repalloc(proute->is_borrowed_rel, sizeof(bool) *
1082 proute->max_partitions);
1083 }
1084 }
1085
1086 proute->partitions[rri_index] = partRelInfo;
1087 proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
1088 dispatch->indexes[partidx] = rri_index;
1089
1090 MemoryContextSwitchTo(oldcxt);
1091}
1092
1093/*
1094 * ExecInitPartitionDispatchInfo
1095 * Lock the partitioned table (if not locked already) and initialize
1096 * PartitionDispatch for a partitioned table and store it in the next
1097 * available slot in the proute->partition_dispatch_info array. Also,
1098 * record the index into this array in the parent_pd->indexes[] array in
1099 * the partidx element so that we can properly retrieve the newly created
1100 * PartitionDispatch later.
1101 */
1102static PartitionDispatch
1104 PartitionTupleRouting *proute, Oid partoid,
1105 PartitionDispatch parent_pd, int partidx,
1106 ResultRelInfo *rootResultRelInfo)
1107{
1108 Relation rel;
1109 PartitionDesc partdesc;
1111 int dispatchidx;
1112 MemoryContext oldcxt;
1113
1114 /*
1115 * For data modification, it is better that executor does not include
1116 * partitions being detached, except when running in snapshot-isolation
1117 * mode. This means that a read-committed transaction immediately gets a
1118 * "no partition for tuple" error when a tuple is inserted into a
1119 * partition that's being detached concurrently, but a transaction in
1120 * repeatable-read mode can still use such a partition.
1121 */
1122 if (estate->es_partition_directory == NULL)
1123 estate->es_partition_directory =
1126
1127 oldcxt = MemoryContextSwitchTo(proute->memcxt);
1128
1129 /*
1130 * Only sub-partitioned tables need to be locked here. The root
1131 * partitioned table will already have been locked as it's referenced in
1132 * the query's rtable.
1133 */
1134 if (partoid != RelationGetRelid(proute->partition_root))
1135 rel = table_open(partoid, RowExclusiveLock);
1136 else
1137 rel = proute->partition_root;
1138 partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1139
1140 pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
1141 partdesc->nparts * sizeof(int));
1142 pd->reldesc = rel;
1143 pd->key = RelationGetPartitionKey(rel);
1144 pd->keystate = NIL;
1145 pd->partdesc = partdesc;
1146 if (parent_pd != NULL)
1147 {
1148 TupleDesc tupdesc = RelationGetDescr(rel);
1149
1150 /*
1151 * For sub-partitioned tables where the column order differs from its
1152 * direct parent partitioned table, we must store a tuple table slot
1153 * initialized with its tuple descriptor and a tuple conversion map to
1154 * convert a tuple from its parent's rowtype to its own. This is to
1155 * make sure that we are looking at the correct row using the correct
1156 * tuple descriptor when computing its partition key for tuple
1157 * routing.
1158 */
1160 tupdesc,
1161 false);
1162 pd->tupslot = pd->tupmap ?
1163 MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1164 }
1165 else
1166 {
1167 /* Not required for the root partitioned table */
1168 pd->tupmap = NULL;
1169 pd->tupslot = NULL;
1170 }
1171
1172 /*
1173 * Initialize with -1 to signify that the corresponding partition's
1174 * ResultRelInfo or PartitionDispatch has not been created yet.
1175 */
1176 memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1177
1178 /* Track in PartitionTupleRouting for later use */
1179 dispatchidx = proute->num_dispatch++;
1180
1181 /* Allocate or enlarge the array, as needed */
1182 if (proute->num_dispatch >= proute->max_dispatch)
1183 {
1184 if (proute->max_dispatch == 0)
1185 {
1186 proute->max_dispatch = 4;
1188 palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
1189 proute->nonleaf_partitions = (ResultRelInfo **)
1190 palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
1191 }
1192 else
1193 {
1194 proute->max_dispatch *= 2;
1197 sizeof(PartitionDispatch) * proute->max_dispatch);
1198 proute->nonleaf_partitions = (ResultRelInfo **)
1200 sizeof(ResultRelInfo *) * proute->max_dispatch);
1201 }
1202 }
1203 proute->partition_dispatch_info[dispatchidx] = pd;
1204
1205 /*
1206 * If setting up a PartitionDispatch for a sub-partitioned table, we may
1207 * also need a minimally valid ResultRelInfo for checking the partition
1208 * constraint later; set that up now.
1209 */
1210 if (parent_pd)
1211 {
1213
1214 InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
1215 proute->nonleaf_partitions[dispatchidx] = rri;
1216 }
1217 else
1218 proute->nonleaf_partitions[dispatchidx] = NULL;
1219
1220 /*
1221 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1222 * install a downlink in the parent to allow quick descent.
1223 */
1224 if (parent_pd)
1225 {
1226 Assert(parent_pd->indexes[partidx] == -1);
1227 parent_pd->indexes[partidx] = dispatchidx;
1228 }
1229
1230 MemoryContextSwitchTo(oldcxt);
1231
1232 return pd;
1233}
1234
1235/*
1236 * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1237 * routing.
1238 *
1239 * Close all the partitioned tables, leaf partitions, and their indices.
1240 */
1241void
1243 PartitionTupleRouting *proute)
1244{
1245 int i;
1246
1247 /*
1248 * Remember, proute->partition_dispatch_info[0] corresponds to the root
1249 * partitioned table, which we must not try to close, because it is the
1250 * main target table of the query that will be closed by callers such as
1251 * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1252 * partitioned table.
1253 */
1254 for (i = 1; i < proute->num_dispatch; i++)
1255 {
1257
1259
1260 if (pd->tupslot)
1262 }
1263
1264 for (i = 0; i < proute->num_partitions; i++)
1265 {
1266 ResultRelInfo *resultRelInfo = proute->partitions[i];
1267
1268 /* Allow any FDWs to shut down */
1269 if (resultRelInfo->ri_FdwRoutine != NULL &&
1270 resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1271 resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1272 resultRelInfo);
1273
1274 /*
1275 * Close it if it's not one of the result relations borrowed from the
1276 * owning ModifyTableState; those will be closed by ExecEndPlan().
1277 */
1278 if (proute->is_borrowed_rel[i])
1279 continue;
1280
1281 ExecCloseIndices(resultRelInfo);
1282 table_close(resultRelInfo->ri_RelationDesc, NoLock);
1283 }
1284}
1285
1286/* ----------------
1287 * FormPartitionKeyDatum
1288 * Construct values[] and isnull[] arrays for the partition key
1289 * of a tuple.
1290 *
1291 * pd Partition dispatch object of the partitioned table
1292 * slot Heap tuple from which to extract partition key
1293 * estate executor state for evaluating any partition key
1294 * expressions (must be non-NULL)
1295 * values Array of partition key Datums (output area)
1296 * isnull Array of is-null indicators (output area)
1297 *
1298 * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1299 * the heap tuple passed in.
1300 * ----------------
1301 */
1302static void
1304 TupleTableSlot *slot,
1305 EState *estate,
1306 Datum *values,
1307 bool *isnull)
1308{
1309 ListCell *partexpr_item;
1310 int i;
1311
1312 if (pd->key->partexprs != NIL && pd->keystate == NIL)
1313 {
1314 /* Check caller has set up context correctly */
1315 Assert(estate != NULL &&
1316 GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1317
1318 /* First time through, set up expression evaluation state */
1319 pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1320 }
1321
1322 partexpr_item = list_head(pd->keystate);
1323 for (i = 0; i < pd->key->partnatts; i++)
1324 {
1325 AttrNumber keycol = pd->key->partattrs[i];
1326 Datum datum;
1327 bool isNull;
1328
1329 if (keycol != 0)
1330 {
1331 /* Plain column; get the value directly from the heap tuple */
1332 datum = slot_getattr(slot, keycol, &isNull);
1333 }
1334 else
1335 {
1336 /* Expression; need to evaluate it */
1337 if (partexpr_item == NULL)
1338 elog(ERROR, "wrong number of partition key expressions");
1339 datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1340 GetPerTupleExprContext(estate),
1341 &isNull);
1342 partexpr_item = lnext(pd->keystate, partexpr_item);
1343 }
1344 values[i] = datum;
1345 isnull[i] = isNull;
1346 }
1347
1348 if (partexpr_item != NULL)
1349 elog(ERROR, "wrong number of partition key expressions");
1350}
1351
1352/*
1353 * The number of times the same partition must be found in a row before we
1354 * switch from a binary search for the given values to just checking if the
1355 * values belong to the last found partition. This must be above 0.
1356 */
1357#define PARTITION_CACHED_FIND_THRESHOLD 16
1358
1359/*
1360 * get_partition_for_tuple
1361 * Finds partition of relation which accepts the partition key specified
1362 * in values and isnull.
1363 *
1364 * Calling this function can be quite expensive when LIST and RANGE
1365 * partitioned tables have many partitions. This is due to the binary search
1366 * that's done to find the correct partition. Many of the use cases for LIST
1367 * and RANGE partitioned tables make it likely that the same partition is
1368 * found in subsequent ExecFindPartition() calls. This is especially true for
1369 * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1370 * partition key is the current time. When asked to find a partition for a
1371 * RANGE or LIST partitioned table, we record the partition index and datum
1372 * offset we've found for the given 'values' in the PartitionDesc (which is
1373 * stored in relcache), and if we keep finding the same partition
1374 * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1375 * logic and instead of performing a binary search to find the correct
1376 * partition, we'll just double-check that 'values' still belong to the last
1377 * found partition, and if so, we'll return that partition index, thus
1378 * skipping the need for the binary search. If we fail to match the last
1379 * partition when double checking, then we fall back on doing a binary search.
1380 * In this case, unless we find 'values' belong to the DEFAULT partition,
1381 * we'll reset the number of times we've hit the same partition so that we
1382 * don't attempt to use the cache again until we've found that partition at
1383 * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1384 *
1385 * For cases where the partition changes on each lookup, the amount of
1386 * additional work required just amounts to recording the last found partition
1387 * and bound offset then resetting the found counter. This is cheap and does
1388 * not appear to cause any meaningful slowdowns for such cases.
1389 *
1390 * No caching of partitions is done when the last found partition is the
1391 * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1392 * is no bound offset storing the matching datum, so we cannot confirm the
1393 * indexes match. For the NULL partition, this is just so cheap, there's no
1394 * sense in caching.
1395 *
1396 * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1397 * found or -1 if none found.
1398 */
1399static int
1401{
1402 int bound_offset = -1;
1403 int part_index = -1;
1404 PartitionKey key = pd->key;
1405 PartitionDesc partdesc = pd->partdesc;
1406 PartitionBoundInfo boundinfo = partdesc->boundinfo;
1407
1408 /*
1409 * In the switch statement below, when we perform a cached lookup for
1410 * RANGE and LIST partitioned tables, if we find that the last found
1411 * partition matches the 'values', we return the partition index right
1412 * away. We do this instead of breaking out of the switch as we don't
1413 * want to execute the code about the DEFAULT partition or do any updates
1414 * for any of the cache-related fields. That would be a waste of effort
1415 * as we already know it's not the DEFAULT partition and have no need to
1416 * increment the number of times we found the same partition any higher
1417 * than PARTITION_CACHED_FIND_THRESHOLD.
1418 */
1419
1420 /* Route as appropriate based on partitioning strategy. */
1421 switch (key->strategy)
1422 {
1424 {
1425 uint64 rowHash;
1426
1427 /* hash partitioning is too cheap to bother caching */
1428 rowHash = compute_partition_hash_value(key->partnatts,
1429 key->partsupfunc,
1430 key->partcollation,
1431 values, isnull);
1432
1433 /*
1434 * HASH partitions can't have a DEFAULT partition and we don't
1435 * do any caching work for them, so just return the part index
1436 */
1437 return boundinfo->indexes[rowHash % boundinfo->nindexes];
1438 }
1439
1441 if (isnull[0])
1442 {
1443 /* this is far too cheap to bother doing any caching */
1444 if (partition_bound_accepts_nulls(boundinfo))
1445 {
1446 /*
1447 * When there is a NULL partition we just return that
1448 * directly. We don't have a bound_offset so it's not
1449 * valid to drop into the code after the switch which
1450 * checks and updates the cache fields. We perhaps should
1451 * be invalidating the details of the last cached
1452 * partition but there's no real need to. Keeping those
1453 * fields set gives a chance at matching to the cached
1454 * partition on the next lookup.
1455 */
1456 return boundinfo->null_index;
1457 }
1458 }
1459 else
1460 {
1461 bool equal;
1462
1464 {
1465 int last_datum_offset = partdesc->last_found_datum_index;
1466 Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1467 int32 cmpval;
1468
1469 /* does the last found datum index match this datum? */
1470 cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1471 key->partcollation[0],
1472 lastDatum,
1473 values[0]));
1474
1475 if (cmpval == 0)
1476 return boundinfo->indexes[last_datum_offset];
1477
1478 /* fall-through and do a manual lookup */
1479 }
1480
1481 bound_offset = partition_list_bsearch(key->partsupfunc,
1482 key->partcollation,
1483 boundinfo,
1484 values[0], &equal);
1485 if (bound_offset >= 0 && equal)
1486 part_index = boundinfo->indexes[bound_offset];
1487 }
1488 break;
1489
1491 {
1492 bool equal = false,
1493 range_partkey_has_null = false;
1494 int i;
1495
1496 /*
1497 * No range includes NULL, so this will be accepted by the
1498 * default partition if there is one, and otherwise rejected.
1499 */
1500 for (i = 0; i < key->partnatts; i++)
1501 {
1502 if (isnull[i])
1503 {
1504 range_partkey_has_null = true;
1505 break;
1506 }
1507 }
1508
1509 /* NULLs belong in the DEFAULT partition */
1510 if (range_partkey_has_null)
1511 break;
1512
1514 {
1515 int last_datum_offset = partdesc->last_found_datum_index;
1516 Datum *lastDatums = boundinfo->datums[last_datum_offset];
1517 PartitionRangeDatumKind *kind = boundinfo->kind[last_datum_offset];
1518 int32 cmpval;
1519
1520 /* check if the value is >= to the lower bound */
1521 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1522 key->partcollation,
1523 lastDatums,
1524 kind,
1525 values,
1526 key->partnatts);
1527
1528 /*
1529 * If it's equal to the lower bound then no need to check
1530 * the upper bound.
1531 */
1532 if (cmpval == 0)
1533 return boundinfo->indexes[last_datum_offset + 1];
1534
1535 if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1536 {
1537 /* check if the value is below the upper bound */
1538 lastDatums = boundinfo->datums[last_datum_offset + 1];
1539 kind = boundinfo->kind[last_datum_offset + 1];
1540 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1541 key->partcollation,
1542 lastDatums,
1543 kind,
1544 values,
1545 key->partnatts);
1546
1547 if (cmpval > 0)
1548 return boundinfo->indexes[last_datum_offset + 1];
1549 }
1550 /* fall-through and do a manual lookup */
1551 }
1552
1553 bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1554 key->partcollation,
1555 boundinfo,
1556 key->partnatts,
1557 values,
1558 &equal);
1559
1560 /*
1561 * The bound at bound_offset is less than or equal to the
1562 * tuple value, so the bound at offset+1 is the upper bound of
1563 * the partition we're looking for, if there actually exists
1564 * one.
1565 */
1566 part_index = boundinfo->indexes[bound_offset + 1];
1567 }
1568 break;
1569
1570 default:
1571 elog(ERROR, "unexpected partition strategy: %d",
1572 (int) key->strategy);
1573 }
1574
1575 /*
1576 * part_index < 0 means we failed to find a partition of this parent. Use
1577 * the default partition, if there is one.
1578 */
1579 if (part_index < 0)
1580 {
1581 /*
1582 * No need to reset the cache fields here. The next set of values
1583 * might end up belonging to the cached partition, so leaving the
1584 * cache alone improves the chances of a cache hit on the next lookup.
1585 */
1586 return boundinfo->default_index;
1587 }
1588
1589 /* we should only make it here when the code above set bound_offset */
1590 Assert(bound_offset >= 0);
1591
1592 /*
1593 * Attend to the cache fields. If the bound_offset matches the last
1594 * cached bound offset then we've found the same partition as last time,
1595 * so bump the count by one. If all goes well, we'll eventually reach
1596 * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1597 * around. Otherwise, we'll reset the cache count back to 1 to mark that
1598 * we've found this partition for the first time.
1599 */
1600 if (bound_offset == partdesc->last_found_datum_index)
1601 partdesc->last_found_count++;
1602 else
1603 {
1604 partdesc->last_found_count = 1;
1605 partdesc->last_found_part_index = part_index;
1606 partdesc->last_found_datum_index = bound_offset;
1607 }
1608
1609 return part_index;
1610}
1611
1612/*
1613 * ExecBuildSlotPartitionKeyDescription
1614 *
1615 * This works very much like BuildIndexValueDescription() and is currently
1616 * used for building error messages when ExecFindPartition() fails to find
1617 * partition for a row.
1618 */
1619static char *
1621 const Datum *values,
1622 const bool *isnull,
1623 int maxfieldlen)
1624{
1627 int partnatts = get_partition_natts(key);
1628 int i;
1629 Oid relid = RelationGetRelid(rel);
1630 AclResult aclresult;
1631
1632 if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1633 return NULL;
1634
1635 /* If the user has table-level access, just go build the description. */
1636 aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1637 if (aclresult != ACLCHECK_OK)
1638 {
1639 /*
1640 * Step through the columns of the partition key and make sure the
1641 * user has SELECT rights on all of them.
1642 */
1643 for (i = 0; i < partnatts; i++)
1644 {
1646
1647 /*
1648 * If this partition key column is an expression, we return no
1649 * detail rather than try to figure out what column(s) the
1650 * expression includes and if the user has SELECT rights on them.
1651 */
1652 if (attnum == InvalidAttrNumber ||
1655 return NULL;
1656 }
1657 }
1658
1660 appendStringInfo(&buf, "(%s) = (",
1661 pg_get_partkeydef_columns(relid, true));
1662
1663 for (i = 0; i < partnatts; i++)
1664 {
1665 char *val;
1666 int vallen;
1667
1668 if (isnull[i])
1669 val = "null";
1670 else
1671 {
1672 Oid foutoid;
1673 bool typisvarlena;
1674
1676 &foutoid, &typisvarlena);
1677 val = OidOutputFunctionCall(foutoid, values[i]);
1678 }
1679
1680 if (i > 0)
1682
1683 /* truncate if needed */
1684 vallen = strlen(val);
1685 if (vallen <= maxfieldlen)
1686 appendBinaryStringInfo(&buf, val, vallen);
1687 else
1688 {
1689 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1690 appendBinaryStringInfo(&buf, val, vallen);
1691 appendStringInfoString(&buf, "...");
1692 }
1693 }
1694
1696
1697 return buf.data;
1698}
1699
1700/*
1701 * adjust_partition_colnos
1702 * Adjust the list of UPDATE target column numbers to account for
1703 * attribute differences between the parent and the partition.
1704 *
1705 * Note: mustn't be called if no adjustment is required.
1706 */
1707static List *
1709{
1710 TupleConversionMap *map = ExecGetChildToRootMap(leaf_part_rri);
1711
1712 Assert(map != NULL);
1713
1714 return adjust_partition_colnos_using_map(colnos, map->attrMap);
1715}
1716
1717/*
1718 * adjust_partition_colnos_using_map
1719 * Like adjust_partition_colnos, but uses a caller-supplied map instead
1720 * of assuming to map from the "root" result relation.
1721 *
1722 * Note: mustn't be called if no adjustment is required.
1723 */
1724static List *
1726{
1727 List *new_colnos = NIL;
1728 ListCell *lc;
1729
1730 Assert(attrMap != NULL); /* else we shouldn't be here */
1731
1732 foreach(lc, colnos)
1733 {
1734 AttrNumber parentattrno = lfirst_int(lc);
1735
1736 if (parentattrno <= 0 ||
1737 parentattrno > attrMap->maplen ||
1738 attrMap->attnums[parentattrno - 1] == 0)
1739 elog(ERROR, "unexpected attno %d in target column list",
1740 parentattrno);
1741 new_colnos = lappend_int(new_colnos,
1742 attrMap->attnums[parentattrno - 1]);
1743 }
1744
1745 return new_colnos;
1746}
1747
1748/*-------------------------------------------------------------------------
1749 * Run-Time Partition Pruning Support.
1750 *
1751 * The following series of functions exist to support the removal of unneeded
1752 * subplans for queries against partitioned tables. The supporting functions
1753 * here are designed to work with any plan type which supports an arbitrary
1754 * number of subplans, e.g. Append, MergeAppend.
1755 *
1756 * When pruning involves comparison of a partition key to a constant, it's
1757 * done by the planner. However, if we have a comparison to a non-constant
1758 * but not volatile expression, that presents an opportunity for run-time
1759 * pruning by the executor, allowing irrelevant partitions to be skipped
1760 * dynamically.
1761 *
1762 * We must distinguish expressions containing PARAM_EXEC Params from
1763 * expressions that don't contain those. Even though a PARAM_EXEC Param is
1764 * considered to be a stable expression, it can change value from one plan
1765 * node scan to the next during query execution. Stable comparison
1766 * expressions that don't involve such Params allow partition pruning to be
1767 * done once during executor startup. Expressions that do involve such Params
1768 * require us to prune separately for each scan of the parent plan node.
1769 *
1770 * Note that pruning away unneeded subplans during executor startup has the
1771 * added benefit of not having to initialize the unneeded subplans at all.
1772 *
1773 *
1774 * Functions:
1775 *
1776 * ExecDoInitialPruning:
1777 * Perform runtime "initial" pruning, if necessary, to determine the set
1778 * of child subnodes that need to be initialized during ExecInitNode() for
1779 * all plan nodes that contain a PartitionPruneInfo.
1780 *
1781 * ExecInitPartitionExecPruning:
1782 * Updates the PartitionPruneState found at given part_prune_index in
1783 * EState.es_part_prune_states for use during "exec" pruning if required.
1784 * Also returns the set of subplans to initialize that would be stored at
1785 * part_prune_index in EState.es_part_prune_results by
1786 * ExecDoInitialPruning(). Maps in PartitionPruneState are updated to
1787 * account for initial pruning possibly having eliminated some of the
1788 * subplans.
1789 *
1790 * ExecFindMatchingSubPlans:
1791 * Returns indexes of matching subplans after evaluating the expressions
1792 * that are safe to evaluate at a given point. This function is first
1793 * called during ExecDoInitialPruning() to find the initially matching
1794 * subplans based on performing the initial pruning steps and then must be
1795 * called again each time the value of a Param listed in
1796 * PartitionPruneState's 'execparamids' changes.
1797 *-------------------------------------------------------------------------
1798 */
1799
1800
1801/*
1802 * ExecDoInitialPruning
1803 * Perform runtime "initial" pruning, if necessary, to determine the set
1804 * of child subnodes that need to be initialized during ExecInitNode() for
1805 * plan nodes that support partition pruning.
1806 *
1807 * This function iterates over each PartitionPruneInfo entry in
1808 * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
1809 * and adds it to es_part_prune_states. ExecInitPartitionExecPruning() accesses
1810 * these states through their corresponding indexes in es_part_prune_states and
1811 * assign each state to the parent node's PlanState, from where it will be used
1812 * for "exec" pruning.
1813 *
1814 * If initial pruning steps exist for a PartitionPruneInfo entry, this function
1815 * executes those pruning steps and stores the result as a bitmapset of valid
1816 * child subplans, identifying which subplans should be initialized for
1817 * execution. The results are saved in estate->es_part_prune_results.
1818 *
1819 * If no initial pruning is performed for a given PartitionPruneInfo, a NULL
1820 * entry is still added to es_part_prune_results to maintain alignment with
1821 * es_part_prune_infos. This ensures that ExecInitPartitionExecPruning() can
1822 * use the same index to retrieve the pruning results.
1823 */
1824void
1826{
1827 ListCell *lc;
1828
1829 foreach(lc, estate->es_part_prune_infos)
1830 {
1832 PartitionPruneState *prunestate;
1833 Bitmapset *validsubplans = NULL;
1834 Bitmapset *all_leafpart_rtis = NULL;
1835 Bitmapset *validsubplan_rtis = NULL;
1836
1837 /* Create and save the PartitionPruneState. */
1838 prunestate = CreatePartitionPruneState(estate, pruneinfo,
1839 &all_leafpart_rtis);
1841 prunestate);
1842
1843 /*
1844 * Perform initial pruning steps, if any, and save the result
1845 * bitmapset or NULL as described in the header comment.
1846 */
1847 if (prunestate->do_initial_prune)
1848 validsubplans = ExecFindMatchingSubPlans(prunestate, true,
1849 &validsubplan_rtis);
1850 else
1851 validsubplan_rtis = all_leafpart_rtis;
1852
1854 validsubplan_rtis);
1856 validsubplans);
1857 }
1858}
1859
1860/*
1861 * ExecInitPartitionExecPruning
1862 * Initialize the data structures needed for runtime "exec" partition
1863 * pruning and return the result of initial pruning, if available.
1864 *
1865 * 'relids' identifies the relation to which both the parent plan and the
1866 * PartitionPruneInfo given by 'part_prune_index' belong.
1867 *
1868 * On return, *initially_valid_subplans is assigned the set of indexes of
1869 * child subplans that must be initialized along with the parent plan node.
1870 * Initial pruning would have been performed by ExecDoInitialPruning(), if
1871 * necessary, and the bitmapset of surviving subplans' indexes would have
1872 * been stored as the part_prune_index'th element of
1873 * EState.es_part_prune_results.
1874 *
1875 * If subplans were indeed pruned during initial pruning, the subplan_map
1876 * arrays in the returned PartitionPruneState are re-sequenced to exclude those
1877 * subplans, but only if the maps will be needed for subsequent execution
1878 * pruning passes.
1879 */
1882 int n_total_subplans,
1883 int part_prune_index,
1884 Bitmapset *relids,
1885 Bitmapset **initially_valid_subplans)
1886{
1887 PartitionPruneState *prunestate;
1888 EState *estate = planstate->state;
1889 PartitionPruneInfo *pruneinfo;
1890
1891 /* Obtain the pruneinfo we need. */
1893 part_prune_index);
1894
1895 /* Its relids better match the plan node's or the planner messed up. */
1896 if (!bms_equal(relids, pruneinfo->relids))
1897 elog(ERROR, "wrong pruneinfo with relids=%s found at part_prune_index=%d contained in plan node with relids=%s",
1898 bmsToString(pruneinfo->relids), part_prune_index,
1899 bmsToString(relids));
1900
1901 /*
1902 * The PartitionPruneState would have been created by
1903 * ExecDoInitialPruning() and stored as the part_prune_index'th element of
1904 * EState.es_part_prune_states.
1905 */
1906 prunestate = list_nth(estate->es_part_prune_states, part_prune_index);
1907 Assert(prunestate != NULL);
1908
1909 /* Use the result of initial pruning done by ExecDoInitialPruning(). */
1910 if (prunestate->do_initial_prune)
1911 *initially_valid_subplans = list_nth_node(Bitmapset,
1912 estate->es_part_prune_results,
1913 part_prune_index);
1914 else
1915 {
1916 /* No pruning, so we'll need to initialize all subplans */
1917 Assert(n_total_subplans > 0);
1918 *initially_valid_subplans = bms_add_range(NULL, 0,
1919 n_total_subplans - 1);
1920 }
1921
1922 /*
1923 * The exec pruning state must also be initialized, if needed, before it
1924 * can be used for pruning during execution.
1925 *
1926 * This also re-sequences subplan indexes contained in prunestate to
1927 * account for any that were removed due to initial pruning; refer to the
1928 * condition in InitExecPartitionPruneContexts() that is used to determine
1929 * whether to do this. If no exec pruning needs to be done, we would thus
1930 * leave the maps to be in an invalid state, but that's ok since that data
1931 * won't be consulted again (cf initial Assert in
1932 * ExecFindMatchingSubPlans).
1933 */
1934 if (prunestate->do_exec_prune)
1935 InitExecPartitionPruneContexts(prunestate, planstate,
1936 *initially_valid_subplans,
1937 n_total_subplans);
1938
1939 return prunestate;
1940}
1941
1942/*
1943 * CreatePartitionPruneState
1944 * Build the data structure required for calling ExecFindMatchingSubPlans
1945 *
1946 * This includes PartitionPruneContexts (stored in each
1947 * PartitionedRelPruningData corresponding to a PartitionedRelPruneInfo),
1948 * which hold the ExprStates needed to evaluate pruning expressions, and
1949 * mapping arrays to convert partition indexes from the pruning logic
1950 * into subplan indexes in the parent plan node's list of child subplans.
1951 *
1952 * 'pruneinfo' is a PartitionPruneInfo as generated by
1953 * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
1954 * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
1955 * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
1956 * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
1957 * system is needed to keep from confusing the different hierarchies when a
1958 * UNION ALL contains multiple partitioned tables as children. The data
1959 * stored in each PartitionedRelPruningData can be re-used each time we
1960 * re-evaluate which partitions match the pruning steps provided in each
1961 * PartitionedRelPruneInfo.
1962 *
1963 * Note that only the PartitionPruneContexts for initial pruning are
1964 * initialized here. Those required for exec pruning are initialized later in
1965 * ExecInitPartitionExecPruning(), as they depend on the availability of the
1966 * parent plan node's PlanState.
1967 *
1968 * If initial pruning steps are to be skipped (e.g., during EXPLAIN
1969 * (GENERIC_PLAN)), *all_leafpart_rtis will be populated with the RT indexes of
1970 * all leaf partitions whose scanning subnode is included in the parent plan
1971 * node's list of child plans. The caller must add these RT indexes to
1972 * estate->es_unpruned_relids.
1973 */
1974static PartitionPruneState *
1976 Bitmapset **all_leafpart_rtis)
1977{
1978 PartitionPruneState *prunestate;
1979 int n_part_hierarchies;
1980 ListCell *lc;
1981 int i;
1982
1983 /*
1984 * Expression context that will be used by partkey_datum_from_expr() to
1985 * evaluate expressions for comparison against partition bounds.
1986 */
1987 ExprContext *econtext = CreateExprContext(estate);
1988
1989 /* For data reading, executor always includes detached partitions */
1990 if (estate->es_partition_directory == NULL)
1991 estate->es_partition_directory =
1992 CreatePartitionDirectory(estate->es_query_cxt, false);
1993
1994 n_part_hierarchies = list_length(pruneinfo->prune_infos);
1995 Assert(n_part_hierarchies > 0);
1996
1997 /*
1998 * Allocate the data structure
1999 */
2000 prunestate = (PartitionPruneState *)
2001 palloc(offsetof(PartitionPruneState, partprunedata) +
2002 sizeof(PartitionPruningData *) * n_part_hierarchies);
2003
2004 /* Save ExprContext for use during InitExecPartitionPruneContexts(). */
2005 prunestate->econtext = econtext;
2006 prunestate->execparamids = NULL;
2007 /* other_subplans can change at runtime, so we need our own copy */
2008 prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
2009 prunestate->do_initial_prune = false; /* may be set below */
2010 prunestate->do_exec_prune = false; /* may be set below */
2011 prunestate->num_partprunedata = n_part_hierarchies;
2012
2013 /*
2014 * Create a short-term memory context which we'll use when making calls to
2015 * the partition pruning functions. This avoids possible memory leaks,
2016 * since the pruning functions call comparison functions that aren't under
2017 * our control.
2018 */
2019 prunestate->prune_context =
2021 "Partition Prune",
2023
2024 i = 0;
2025 foreach(lc, pruneinfo->prune_infos)
2026 {
2027 List *partrelpruneinfos = lfirst_node(List, lc);
2028 int npartrelpruneinfos = list_length(partrelpruneinfos);
2029 PartitionPruningData *prunedata;
2030 ListCell *lc2;
2031 int j;
2032
2033 prunedata = (PartitionPruningData *)
2034 palloc(offsetof(PartitionPruningData, partrelprunedata) +
2035 npartrelpruneinfos * sizeof(PartitionedRelPruningData));
2036 prunestate->partprunedata[i] = prunedata;
2037 prunedata->num_partrelprunedata = npartrelpruneinfos;
2038
2039 j = 0;
2040 foreach(lc2, partrelpruneinfos)
2041 {
2043 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2044 Relation partrel;
2045 PartitionDesc partdesc;
2046 PartitionKey partkey;
2047
2048 /*
2049 * We can rely on the copies of the partitioned table's partition
2050 * key and partition descriptor appearing in its relcache entry,
2051 * because that entry will be held open and locked for the
2052 * duration of this executor run.
2053 */
2054 partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false);
2055
2056 /* Remember for InitExecPartitionPruneContexts(). */
2057 pprune->partrel = partrel;
2058
2059 partkey = RelationGetPartitionKey(partrel);
2061 partrel);
2062
2063 /*
2064 * Initialize the subplan_map and subpart_map.
2065 *
2066 * The set of partitions that exist now might not be the same that
2067 * existed when the plan was made. The normal case is that it is;
2068 * optimize for that case with a quick comparison, and just copy
2069 * the subplan_map and make subpart_map, leafpart_rti_map point to
2070 * the ones in PruneInfo.
2071 *
2072 * For the case where they aren't identical, we could have more
2073 * partitions on either side; or even exactly the same number of
2074 * them on both but the set of OIDs doesn't match fully. Handle
2075 * this by creating new subplan_map and subpart_map arrays that
2076 * corresponds to the ones in the PruneInfo where the new
2077 * partition descriptor's OIDs match. Any that don't match can be
2078 * set to -1, as if they were pruned. By construction, both
2079 * arrays are in partition bounds order.
2080 */
2081 pprune->nparts = partdesc->nparts;
2082 pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts);
2083
2084 if (partdesc->nparts == pinfo->nparts &&
2085 memcmp(partdesc->oids, pinfo->relid_map,
2086 sizeof(int) * partdesc->nparts) == 0)
2087 {
2088 pprune->subpart_map = pinfo->subpart_map;
2089 pprune->leafpart_rti_map = pinfo->leafpart_rti_map;
2090 memcpy(pprune->subplan_map, pinfo->subplan_map,
2091 sizeof(int) * pinfo->nparts);
2092 }
2093 else
2094 {
2095 int pd_idx = 0;
2096 int pp_idx;
2097
2098 /*
2099 * When the partition arrays are not identical, there could be
2100 * some new ones but it's also possible that one was removed;
2101 * we cope with both situations by walking the arrays and
2102 * discarding those that don't match.
2103 *
2104 * If the number of partitions on both sides match, it's still
2105 * possible that one partition has been detached and another
2106 * attached. Cope with that by creating a map that skips any
2107 * mismatches.
2108 */
2109 pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts);
2110 pprune->leafpart_rti_map = palloc(sizeof(int) * partdesc->nparts);
2111
2112 for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2113 {
2114 /* Skip any InvalidOid relid_map entries */
2115 while (pd_idx < pinfo->nparts &&
2116 !OidIsValid(pinfo->relid_map[pd_idx]))
2117 pd_idx++;
2118
2119 recheck:
2120 if (pd_idx < pinfo->nparts &&
2121 pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
2122 {
2123 /* match... */
2124 pprune->subplan_map[pp_idx] =
2125 pinfo->subplan_map[pd_idx];
2126 pprune->subpart_map[pp_idx] =
2127 pinfo->subpart_map[pd_idx];
2128 pprune->leafpart_rti_map[pp_idx] =
2129 pinfo->leafpart_rti_map[pd_idx];
2130 pd_idx++;
2131 continue;
2132 }
2133
2134 /*
2135 * There isn't an exact match in the corresponding
2136 * positions of both arrays. Peek ahead in
2137 * pinfo->relid_map to see if we have a match for the
2138 * current partition in partdesc. Normally if a match
2139 * exists it's just one element ahead, and it means the
2140 * planner saw one extra partition that we no longer see
2141 * now (its concurrent detach finished just in between);
2142 * so we skip that one by updating pd_idx to the new
2143 * location and jumping above. We can then continue to
2144 * match the rest of the elements after skipping the OID
2145 * with no match; no future matches are tried for the
2146 * element that was skipped, because we know the arrays to
2147 * be in the same order.
2148 *
2149 * If we don't see a match anywhere in the rest of the
2150 * pinfo->relid_map array, that means we see an element
2151 * now that the planner didn't see, so mark that one as
2152 * pruned and move on.
2153 */
2154 for (int pd_idx2 = pd_idx + 1; pd_idx2 < pinfo->nparts; pd_idx2++)
2155 {
2156 if (pd_idx2 >= pinfo->nparts)
2157 break;
2158 if (pinfo->relid_map[pd_idx2] == partdesc->oids[pp_idx])
2159 {
2160 pd_idx = pd_idx2;
2161 goto recheck;
2162 }
2163 }
2164
2165 pprune->subpart_map[pp_idx] = -1;
2166 pprune->subplan_map[pp_idx] = -1;
2167 pprune->leafpart_rti_map[pp_idx] = 0;
2168 }
2169 }
2170
2171 /* present_parts is also subject to later modification */
2172 pprune->present_parts = bms_copy(pinfo->present_parts);
2173
2174 /*
2175 * Only initial_context is initialized here. exec_context is
2176 * initialized during ExecInitPartitionExecPruning() when the
2177 * parent plan's PlanState is available.
2178 *
2179 * Note that we must skip execution-time (both "init" and "exec")
2180 * partition pruning in EXPLAIN (GENERIC_PLAN), since parameter
2181 * values may be missing.
2182 */
2184 if (pinfo->initial_pruning_steps &&
2186 {
2188 pprune->initial_pruning_steps,
2189 partdesc, partkey, NULL,
2190 econtext);
2191 /* Record whether initial pruning is needed at any level */
2192 prunestate->do_initial_prune = true;
2193 }
2194 pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
2195 if (pinfo->exec_pruning_steps &&
2197 {
2198 /* Record whether exec pruning is needed at any level */
2199 prunestate->do_exec_prune = true;
2200 }
2201
2202 /*
2203 * Accumulate the IDs of all PARAM_EXEC Params affecting the
2204 * partitioning decisions at this plan node.
2205 */
2206 prunestate->execparamids = bms_add_members(prunestate->execparamids,
2207 pinfo->execparamids);
2208
2209 /*
2210 * Return all leaf partition indexes if we're skipping pruning in
2211 * the EXPLAIN (GENERIC_PLAN) case.
2212 */
2213 if (pinfo->initial_pruning_steps && !prunestate->do_initial_prune)
2214 {
2215 int part_index = -1;
2216
2217 while ((part_index = bms_next_member(pprune->present_parts,
2218 part_index)) >= 0)
2219 {
2220 Index rtindex = pprune->leafpart_rti_map[part_index];
2221
2222 if (rtindex)
2223 *all_leafpart_rtis = bms_add_member(*all_leafpart_rtis,
2224 rtindex);
2225 }
2226 }
2227
2228 j++;
2229 }
2230 i++;
2231 }
2232
2233 return prunestate;
2234}
2235
2236/*
2237 * Initialize a PartitionPruneContext for the given list of pruning steps.
2238 */
2239static void
2241 List *pruning_steps,
2242 PartitionDesc partdesc,
2243 PartitionKey partkey,
2244 PlanState *planstate,
2245 ExprContext *econtext)
2246{
2247 int n_steps;
2248 int partnatts;
2249 ListCell *lc;
2250
2251 n_steps = list_length(pruning_steps);
2252
2253 context->strategy = partkey->strategy;
2254 context->partnatts = partnatts = partkey->partnatts;
2255 context->nparts = partdesc->nparts;
2256 context->boundinfo = partdesc->boundinfo;
2257 context->partcollation = partkey->partcollation;
2258 context->partsupfunc = partkey->partsupfunc;
2259
2260 /* We'll look up type-specific support functions as needed */
2261 context->stepcmpfuncs = (FmgrInfo *)
2262 palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
2263
2265 context->planstate = planstate;
2266 context->exprcontext = econtext;
2267
2268 /* Initialize expression state for each expression we need */
2269 context->exprstates = (ExprState **)
2270 palloc0(sizeof(ExprState *) * n_steps * partnatts);
2271 foreach(lc, pruning_steps)
2272 {
2274 ListCell *lc2 = list_head(step->exprs);
2275 int keyno;
2276
2277 /* not needed for other step kinds */
2278 if (!IsA(step, PartitionPruneStepOp))
2279 continue;
2280
2281 Assert(list_length(step->exprs) <= partnatts);
2282
2283 for (keyno = 0; keyno < partnatts; keyno++)
2284 {
2285 if (bms_is_member(keyno, step->nullkeys))
2286 continue;
2287
2288 if (lc2 != NULL)
2289 {
2290 Expr *expr = lfirst(lc2);
2291
2292 /* not needed for Consts */
2293 if (!IsA(expr, Const))
2294 {
2295 int stateidx = PruneCxtStateIdx(partnatts,
2296 step->step.step_id,
2297 keyno);
2298
2299 /*
2300 * When planstate is NULL, pruning_steps is known not to
2301 * contain any expressions that depend on the parent plan.
2302 * Information of any available EXTERN parameters must be
2303 * passed explicitly in that case, which the caller must
2304 * have made available via econtext.
2305 */
2306 if (planstate == NULL)
2307 context->exprstates[stateidx] =
2309 econtext->ecxt_param_list_info);
2310 else
2311 context->exprstates[stateidx] =
2312 ExecInitExpr(expr, context->planstate);
2313 }
2314 lc2 = lnext(step->exprs, lc2);
2315 }
2316 }
2317 }
2318}
2319
2320/*
2321 * InitExecPartitionPruneContexts
2322 * Initialize exec pruning contexts deferred by CreatePartitionPruneState()
2323 *
2324 * This function finalizes exec pruning setup for a PartitionPruneState by
2325 * initializing contexts for pruning steps that require the parent plan's
2326 * PlanState. It iterates over PartitionPruningData entries and sets up the
2327 * necessary execution contexts for pruning during query execution.
2328 *
2329 * Also fix the mapping of partition indexes to subplan indexes contained in
2330 * prunestate by considering the new list of subplans that survived initial
2331 * pruning.
2332 *
2333 * Current values of the indexes present in PartitionPruneState count all the
2334 * subplans that would be present before initial pruning was done. If initial
2335 * pruning got rid of some of the subplans, any subsequent pruning passes will
2336 * be looking at a different set of target subplans to choose from than those
2337 * in the pre-initial-pruning set, so the maps in PartitionPruneState
2338 * containing those indexes must be updated to reflect the new indexes of
2339 * subplans in the post-initial-pruning set.
2340 */
2341static void
2343 PlanState *parent_plan,
2344 Bitmapset *initially_valid_subplans,
2345 int n_total_subplans)
2346{
2347 EState *estate;
2348 int *new_subplan_indexes = NULL;
2349 Bitmapset *new_other_subplans;
2350 int i;
2351 int newidx;
2352 bool fix_subplan_map = false;
2353
2354 Assert(prunestate->do_exec_prune);
2355 Assert(parent_plan != NULL);
2356 estate = parent_plan->state;
2357
2358 /*
2359 * No need to fix subplans maps if initial pruning didn't eliminate any
2360 * subplans.
2361 */
2362 if (bms_num_members(initially_valid_subplans) < n_total_subplans)
2363 {
2364 fix_subplan_map = true;
2365
2366 /*
2367 * First we must build a temporary array which maps old subplan
2368 * indexes to new ones. For convenience of initialization, we use
2369 * 1-based indexes in this array and leave pruned items as 0.
2370 */
2371 new_subplan_indexes = (int *) palloc0(sizeof(int) * n_total_subplans);
2372 newidx = 1;
2373 i = -1;
2374 while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2375 {
2376 Assert(i < n_total_subplans);
2377 new_subplan_indexes[i] = newidx++;
2378 }
2379 }
2380
2381 /*
2382 * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2383 * subplan indexes. We must also recompute its present_parts bitmap.
2384 */
2385 for (i = 0; i < prunestate->num_partprunedata; i++)
2386 {
2387 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2388 int j;
2389
2390 /*
2391 * Within each hierarchy, we perform this loop in back-to-front order
2392 * so that we determine present_parts for the lowest-level partitioned
2393 * tables first. This way we can tell whether a sub-partitioned
2394 * table's partitions were entirely pruned so we can exclude it from
2395 * the current level's present_parts.
2396 */
2397 for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
2398 {
2399 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2400 int nparts = pprune->nparts;
2401 int k;
2402
2403 /* Initialize PartitionPruneContext for exec pruning, if needed. */
2404 if (pprune->exec_pruning_steps != NIL)
2405 {
2406 PartitionKey partkey;
2407 PartitionDesc partdesc;
2408
2409 /*
2410 * See the comment in CreatePartitionPruneState() regarding
2411 * the usage of partdesc and partkey.
2412 */
2413 partkey = RelationGetPartitionKey(pprune->partrel);
2415 pprune->partrel);
2416
2418 pprune->exec_pruning_steps,
2419 partdesc, partkey, parent_plan,
2420 prunestate->econtext);
2421 }
2422
2423 if (!fix_subplan_map)
2424 continue;
2425
2426 /* We just rebuild present_parts from scratch */
2427 bms_free(pprune->present_parts);
2428 pprune->present_parts = NULL;
2429
2430 for (k = 0; k < nparts; k++)
2431 {
2432 int oldidx = pprune->subplan_map[k];
2433 int subidx;
2434
2435 /*
2436 * If this partition existed as a subplan then change the old
2437 * subplan index to the new subplan index. The new index may
2438 * become -1 if the partition was pruned above, or it may just
2439 * come earlier in the subplan list due to some subplans being
2440 * removed earlier in the list. If it's a subpartition, add
2441 * it to present_parts unless it's entirely pruned.
2442 */
2443 if (oldidx >= 0)
2444 {
2445 Assert(oldidx < n_total_subplans);
2446 pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2447
2448 if (new_subplan_indexes[oldidx] > 0)
2449 pprune->present_parts =
2450 bms_add_member(pprune->present_parts, k);
2451 }
2452 else if ((subidx = pprune->subpart_map[k]) >= 0)
2453 {
2454 PartitionedRelPruningData *subprune;
2455
2456 subprune = &prunedata->partrelprunedata[subidx];
2457
2458 if (!bms_is_empty(subprune->present_parts))
2459 pprune->present_parts =
2460 bms_add_member(pprune->present_parts, k);
2461 }
2462 }
2463 }
2464 }
2465
2466 /*
2467 * If we fixed subplan maps, we must also recompute the other_subplans
2468 * set, since indexes in it may change.
2469 */
2470 if (fix_subplan_map)
2471 {
2472 new_other_subplans = NULL;
2473 i = -1;
2474 while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2475 new_other_subplans = bms_add_member(new_other_subplans,
2476 new_subplan_indexes[i] - 1);
2477
2478 bms_free(prunestate->other_subplans);
2479 prunestate->other_subplans = new_other_subplans;
2480
2481 pfree(new_subplan_indexes);
2482 }
2483}
2484
2485/*
2486 * ExecFindMatchingSubPlans
2487 * Determine which subplans match the pruning steps detailed in
2488 * 'prunestate' for the current comparison expression values.
2489 *
2490 * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2491 * differentiates the initial executor-time pruning step from later
2492 * runtime pruning.
2493 *
2494 * The caller must pass a non-NULL validsubplan_rtis during initial pruning
2495 * to collect the RT indexes of leaf partitions whose subnodes will be
2496 * executed. These RT indexes are later added to EState.es_unpruned_relids.
2497 */
2498Bitmapset *
2500 bool initial_prune,
2501 Bitmapset **validsubplan_rtis)
2502{
2503 Bitmapset *result = NULL;
2504 MemoryContext oldcontext;
2505 int i;
2506
2507 /*
2508 * Either we're here on the initial prune done during pruning
2509 * initialization, or we're at a point where PARAM_EXEC Params can be
2510 * evaluated *and* there are steps in which to do so.
2511 */
2512 Assert(initial_prune || prunestate->do_exec_prune);
2513 Assert(validsubplan_rtis != NULL || !initial_prune);
2514
2515 /*
2516 * Switch to a temp context to avoid leaking memory in the executor's
2517 * query-lifespan memory context.
2518 */
2519 oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2520
2521 /*
2522 * For each hierarchy, do the pruning tests, and add nondeletable
2523 * subplans' indexes to "result".
2524 */
2525 for (i = 0; i < prunestate->num_partprunedata; i++)
2526 {
2527 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2529
2530 /*
2531 * We pass the zeroth item, belonging to the root table of the
2532 * hierarchy, and find_matching_subplans_recurse() takes care of
2533 * recursing to other (lower-level) parents as needed.
2534 */
2535 pprune = &prunedata->partrelprunedata[0];
2536 find_matching_subplans_recurse(prunedata, pprune, initial_prune,
2537 &result, validsubplan_rtis);
2538
2539 /*
2540 * Expression eval may have used space in ExprContext too. Avoid
2541 * accessing exec_context during initial pruning, as it is not valid
2542 * at that stage.
2543 */
2544 if (!initial_prune && pprune->exec_pruning_steps)
2546 }
2547
2548 /* Add in any subplans that partition pruning didn't account for */
2549 result = bms_add_members(result, prunestate->other_subplans);
2550
2551 MemoryContextSwitchTo(oldcontext);
2552
2553 /* Copy result out of the temp context before we reset it */
2554 result = bms_copy(result);
2555 if (validsubplan_rtis)
2556 *validsubplan_rtis = bms_copy(*validsubplan_rtis);
2557
2558 MemoryContextReset(prunestate->prune_context);
2559
2560 return result;
2561}
2562
2563/*
2564 * find_matching_subplans_recurse
2565 * Recursive worker function for ExecFindMatchingSubPlans
2566 *
2567 * Adds valid (non-prunable) subplan IDs to *validsubplans. If
2568 * *validsubplan_rtis is non-NULL, it also adds the RT indexes of their
2569 * corresponding partitions, but only if they are leaf partitions.
2570 */
2571static void
2574 bool initial_prune,
2575 Bitmapset **validsubplans,
2576 Bitmapset **validsubplan_rtis)
2577{
2578 Bitmapset *partset;
2579 int i;
2580
2581 /* Guard against stack overflow due to overly deep partition hierarchy. */
2583
2584 /*
2585 * Prune as appropriate, if we have pruning steps matching the current
2586 * execution context. Otherwise just include all partitions at this
2587 * level.
2588 */
2589 if (initial_prune && pprune->initial_pruning_steps)
2590 partset = get_matching_partitions(&pprune->initial_context,
2591 pprune->initial_pruning_steps);
2592 else if (!initial_prune && pprune->exec_pruning_steps)
2593 partset = get_matching_partitions(&pprune->exec_context,
2594 pprune->exec_pruning_steps);
2595 else
2596 partset = pprune->present_parts;
2597
2598 /* Translate partset into subplan indexes */
2599 i = -1;
2600 while ((i = bms_next_member(partset, i)) >= 0)
2601 {
2602 if (pprune->subplan_map[i] >= 0)
2603 {
2604 *validsubplans = bms_add_member(*validsubplans,
2605 pprune->subplan_map[i]);
2606
2607 /*
2608 * Only report leaf partitions. Non-leaf partitions may appear
2609 * here when they use an unflattened Append or MergeAppend.
2610 */
2611 if (validsubplan_rtis && pprune->leafpart_rti_map[i])
2612 *validsubplan_rtis = bms_add_member(*validsubplan_rtis,
2613 pprune->leafpart_rti_map[i]);
2614 }
2615 else
2616 {
2617 int partidx = pprune->subpart_map[i];
2618
2619 if (partidx >= 0)
2621 &prunedata->partrelprunedata[partidx],
2622 initial_prune, validsubplans,
2623 validsubplan_rtis);
2624 else
2625 {
2626 /*
2627 * We get here if the planner already pruned all the sub-
2628 * partitions for this partition. Silently ignore this
2629 * partition in this case. The end result is the same: we
2630 * would have pruned all partitions just the same, but we
2631 * don't have any pruning steps to execute to verify this.
2632 */
2633 }
2634 }
2635 }
2636}
AclResult
Definition: acl.h:182
@ ACLCHECK_OK
Definition: acl.h:183
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition: aclchk.c:3866
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4037
AttrMap * build_attrmap_by_name(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition: attmap.c:175
AttrMap * build_attrmap_by_name_if_req(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition: attmap.c:261
int16 AttrNumber
Definition: attnum.h:21
#define InvalidAttrNumber
Definition: attnum.h:23
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:142
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1305
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
Definition: bitmapset.c:1018
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:750
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:814
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:916
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:122
#define bms_is_empty(a)
Definition: bitmapset.h:118
static Datum values[MAXATTR]
Definition: bootstrap.c:153
#define likely(x)
Definition: c.h:406
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:475
int32_t int32
Definition: c.h:539
uint64_t uint64
Definition: c.h:544
#define unlikely(x)
Definition: c.h:407
unsigned int Index
Definition: c.h:624
#define OidIsValid(objectId)
Definition: c.h:779
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:143
ProjectionInfo * ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent, TupleDesc inputDesc)
Definition: execExpr.c:370
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:229
ExprState * ExecInitExprWithParams(Expr *node, ParamListInfo ext_params)
Definition: execExpr.c:180
ProjectionInfo * ExecBuildUpdateProjection(List *targetList, bool evalTargetList, List *targetColnos, TupleDesc relDesc, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent)
Definition: execExpr.c:547
List * ExecPrepareExprList(List *nodes, EState *estate)
Definition: execExpr.c:839
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
Definition: execIndexing.c:239
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
Definition: execIndexing.c:161
void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation, OnConflictAction onConflictAction, List *mergeActions)
Definition: execMain.c:1050
bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool emitError)
Definition: execMain.c:1856
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
Definition: execMain.c:1243
static void InitExecPartitionPruneContexts(PartitionPruneState *prunestate, PlanState *parent_plan, Bitmapset *initially_valid_subplans, int n_total_subplans)
static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate, PartitionTupleRouting *proute, Oid partoid, PartitionDispatch parent_pd, int partidx, ResultRelInfo *rootResultRelInfo)
void ExecDoInitialPruning(EState *estate)
static ResultRelInfo * ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *rootResultRelInfo, int partidx)
PartitionPruneState * ExecInitPartitionExecPruning(PlanState *planstate, int n_total_subplans, int part_prune_index, Bitmapset *relids, Bitmapset **initially_valid_subplans)
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, bool initial_prune, Bitmapset **validsubplan_rtis)
static void ExecInitRoutingInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *partRelInfo, int partidx, bool is_borrowed_rel)
static char * ExecBuildSlotPartitionKeyDescription(Relation rel, const Datum *values, const bool *isnull, int maxfieldlen)
static void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
static int get_partition_for_tuple(PartitionDispatch pd, const Datum *values, const bool *isnull)
#define PARTITION_CACHED_FIND_THRESHOLD
PartitionTupleRouting * ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
static List * adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
static List * adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
ResultRelInfo * ExecFindPartition(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate)
static void InitPartitionPruneContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, PartitionKey partkey, PlanState *planstate, ExprContext *econtext)
struct PartitionDispatchData PartitionDispatchData
static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans, Bitmapset **validsubplan_rtis)
static PartitionPruneState * CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo, Bitmapset **all_leafpart_rtis)
void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute)
struct PartitionDispatchData * PartitionDispatch
Definition: execPartition.h:22
struct PartitionedRelPruningData PartitionedRelPruningData
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1427
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:84
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1443
Relation ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel)
Definition: execUtils.c:825
TupleConversionMap * ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
Definition: execUtils.c:1326
ExprContext * CreateExprContext(EState *estate)
Definition: execUtils.c:307
TupleConversionMap * ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
Definition: execUtils.c:1300
#define GetPerTupleExprContext(estate)
Definition: executor.h:656
#define EXEC_FLAG_EXPLAIN_GENERIC
Definition: executor.h:67
#define ResetExprContext(econtext)
Definition: executor.h:650
#define GetPerTupleMemoryContext(estate)
Definition: executor.h:661
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:436
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1763
Assert(PointerIsAligned(start, uint64))
long val
Definition: informix.c:689
int j
Definition: isn.c:78
int i
Definition: isn.c:77
List * lappend(List *list, void *datum)
Definition: list.c:339
List * lappend_int(List *list, int datum)
Definition: list.c:357
List * lappend_oid(List *list, Oid datum)
Definition: list.c:375
void list_free(List *list)
Definition: list.c:1546
bool list_member_oid(const List *list, Oid datum)
Definition: list.c:722
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:3074
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1084
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:400
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
Oid GetUserId(void)
Definition: miscinit.c:469
ResultRelInfo * ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache)
void ExecInitMergeTupleSlots(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo)
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
#define copyObject(obj)
Definition: nodes.h:232
@ ONCONFLICT_NONE
Definition: nodes.h:428
@ ONCONFLICT_UPDATE
Definition: nodes.h:430
@ CMD_MERGE
Definition: nodes.h:279
@ CMD_INSERT
Definition: nodes.h:277
@ CMD_DELETE
Definition: nodes.h:278
@ CMD_UPDATE
Definition: nodes.h:276
@ CMD_NOTHING
Definition: nodes.h:282
#define makeNode(_type_)
Definition: nodes.h:161
#define castNode(_type_, nodeptr)
Definition: nodes.h:182
char * bmsToString(const Bitmapset *bms)
Definition: outfuncs.c:819
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
@ PARTITION_STRATEGY_HASH
Definition: parsenodes.h:902
@ PARTITION_STRATEGY_LIST
Definition: parsenodes.h:900
@ PARTITION_STRATEGY_RANGE
Definition: parsenodes.h:901
PartitionRangeDatumKind
Definition: parsenodes.h:951
#define ACL_SELECT
Definition: parsenodes.h:77
int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, const Datum *rb_datums, PartitionRangeDatumKind *rb_kind, const Datum *tuple_datums, int n_tuple_datums)
Definition: partbounds.c:3557
uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation, const Datum *values, const bool *isnull)
Definition: partbounds.c:4723
int partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, int nvalues, const Datum *values, bool *is_equal)
Definition: partbounds.c:3696
int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, Datum value, bool *is_equal)
Definition: partbounds.c:3608
#define partition_bound_accepts_nulls(bi)
Definition: partbounds.h:98
PartitionKey RelationGetPartitionKey(Relation rel)
Definition: partcache.c:51
static int16 get_partition_col_attnum(PartitionKey key, int col)
Definition: partcache.h:80
static int get_partition_natts(PartitionKey key)
Definition: partcache.h:65
static Oid get_partition_col_typid(PartitionKey key, int col)
Definition: partcache.h:86
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
Definition: partdesc.c:423
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
Definition: partdesc.c:456
List * get_partition_ancestors(Oid relid)
Definition: partition.c:134
Bitmapset * get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
Definition: partprune.c:846
#define PruneCxtStateIdx(partnatts, step_id, keyno)
Definition: partprune.h:70
int16 attnum
Definition: pg_attribute.h:74
#define PARTITION_MAX_KEYS
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define lfirst_int(lc)
Definition: pg_list.h:173
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
#define linitial(l)
Definition: pg_list.h:178
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define list_nth_node(type, list, n)
Definition: pg_list.h:327
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define lfirst_oid(lc)
Definition: pg_list.h:174
static char * buf
Definition: pg_test_fsync.c:72
uint64_t Datum
Definition: postgres.h:70
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
#define INNER_VAR
Definition: primnodes.h:242
#define RelationGetForm(relation)
Definition: rel.h:509
#define RelationGetRelid(relation)
Definition: rel.h:515
#define RelationGetDescr(relation)
Definition: rel.h:541
#define RelationGetRelationName(relation)
Definition: rel.h:549
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4836
int errtable(Relation rel)
Definition: relcache.c:6049
Node * map_variable_attnos(Node *node, int target_varno, int sublevels_up, const AttrMap *attno_map, Oid to_rowtype, bool *found_whole_row)
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition: rls.c:52
@ RLS_ENABLED
Definition: rls.h:45
char * pg_get_partkeydef_columns(Oid relid, bool pretty)
Definition: ruleutils.c:1923
void check_stack_depth(void)
Definition: stack_depth.c:95
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
Definition: attmap.h:35
int maplen
Definition: attmap.h:37
AttrNumber * attnums
Definition: attmap.h:36
List * es_part_prune_infos
Definition: execnodes.h:670
List * es_tuple_routing_result_relations
Definition: execnodes.h:698
int es_top_eflags
Definition: execnodes.h:719
int es_instrument
Definition: execnodes.h:720
Bitmapset * es_unpruned_relids
Definition: execnodes.h:673
List * es_part_prune_states
Definition: execnodes.h:671
MemoryContext es_query_cxt
Definition: execnodes.h:710
List * es_tupleTable
Definition: execnodes.h:712
PartitionDirectory es_partition_directory
Definition: execnodes.h:692
List * es_part_prune_results
Definition: execnodes.h:672
ParamListInfo ecxt_param_list_info
Definition: execnodes.h:285
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:273
struct EState * ecxt_estate
Definition: execnodes.h:315
EndForeignInsert_function EndForeignInsert
Definition: fdwapi.h:239
BeginForeignInsert_function BeginForeignInsert
Definition: fdwapi.h:238
ExecForeignBatchInsert_function ExecForeignBatchInsert
Definition: fdwapi.h:233
GetForeignModifyBatchSize_function GetForeignModifyBatchSize
Definition: fdwapi.h:234
Definition: fmgr.h:57
Definition: pg_list.h:54
MergeAction * mas_action
Definition: execnodes.h:449
ProjectionInfo * mas_proj
Definition: execnodes.h:450
ExprState * mas_whenqual
Definition: execnodes.h:452
ResultRelInfo * resultRelInfo
Definition: execnodes.h:1408
PlanState ps
Definition: execnodes.h:1403
ResultRelInfo * rootResultRelInfo
Definition: execnodes.h:1416
List * onConflictCols
Definition: plannodes.h:368
List * mergeJoinConditions
Definition: plannodes.h:378
CmdType operation
Definition: plannodes.h:334
List * resultRelations
Definition: plannodes.h:342
List * onConflictSet
Definition: plannodes.h:366
List * mergeActionLists
Definition: plannodes.h:376
List * returningLists
Definition: plannodes.h:352
List * withCheckOptionLists
Definition: plannodes.h:346
Node * onConflictWhere
Definition: plannodes.h:370
OnConflictAction onConflictAction
Definition: plannodes.h:362
Definition: nodes.h:135
TupleTableSlot * oc_ProjSlot
Definition: execnodes.h:434
TupleTableSlot * oc_Existing
Definition: execnodes.h:433
ExprState * oc_WhereClause
Definition: execnodes.h:436
ProjectionInfo * oc_ProjInfo
Definition: execnodes.h:435
PartitionRangeDatumKind ** kind
Definition: partbounds.h:84
int last_found_datum_index
Definition: partdesc.h:46
PartitionBoundInfo boundinfo
Definition: partdesc.h:38
int last_found_count
Definition: partdesc.h:63
bool * is_leaf
Definition: partdesc.h:35
int last_found_part_index
Definition: partdesc.h:52
TupleTableSlot * tupslot
PartitionDesc partdesc
int indexes[FLEXIBLE_ARRAY_MEMBER]
Oid * partcollation
Definition: partcache.h:39
PartitionStrategy strategy
Definition: partcache.h:27
List * partexprs
Definition: partcache.h:31
FmgrInfo * partsupfunc
Definition: partcache.h:36
AttrNumber * partattrs
Definition: partcache.h:29
FmgrInfo * partsupfunc
Definition: partprune.h:56
ExprContext * exprcontext
Definition: partprune.h:60
MemoryContext ppccontext
Definition: partprune.h:58
PartitionBoundInfo boundinfo
Definition: partprune.h:54
PlanState * planstate
Definition: partprune.h:59
FmgrInfo * stepcmpfuncs
Definition: partprune.h:57
ExprState ** exprstates
Definition: partprune.h:61
Bitmapset * other_subplans
Definition: plannodes.h:1649
Bitmapset * relids
Definition: plannodes.h:1647
PartitionPruningData * partprunedata[FLEXIBLE_ARRAY_MEMBER]
Bitmapset * execparamids
ExprContext * econtext
Bitmapset * other_subplans
MemoryContext prune_context
PartitionPruneStep step
Definition: plannodes.h:1758
Bitmapset * nullkeys
Definition: plannodes.h:1763
PartitionedRelPruningData partrelprunedata[FLEXIBLE_ARRAY_MEMBER]
Definition: execPartition.h:87
PartitionDispatch * partition_dispatch_info
Definition: execPartition.c:94
ResultRelInfo ** partitions
Definition: execPartition.c:98
MemoryContext memcxt
ResultRelInfo ** nonleaf_partitions
Definition: execPartition.c:95
Bitmapset * present_parts
Definition: plannodes.h:1683
Bitmapset * execparamids
Definition: plannodes.h:1712
PartitionPruneContext exec_context
Definition: execPartition.h:74
PartitionPruneContext initial_context
Definition: execPartition.h:73
Plan * plan
Definition: execnodes.h:1165
EState * state
Definition: execnodes.h:1167
ExprContext * ps_ExprContext
Definition: execnodes.h:1204
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1203
Form_pg_class rd_rel
Definition: rel.h:111
TupleTableSlot * ri_PartitionTupleSlot
Definition: execnodes.h:619
OnConflictSetState * ri_onConflict
Definition: execnodes.h:583
List * ri_onConflictArbiterIndexes
Definition: execnodes.h:580
Relation ri_RelationDesc
Definition: execnodes.h:480
struct CopyMultiInsertBuffer * ri_CopyMultiInsertBuffer
Definition: execnodes.h:622
Index ri_RangeTableIndex
Definition: execnodes.h:477
struct FdwRoutine * ri_FdwRoutine
Definition: execnodes.h:533
int ri_BatchSize
Definition: execnodes.h:544
AttrMap * attrMap
Definition: tupconvert.h:28
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
TupleTableSlot * execute_attr_map_slot(AttrMap *attrMap, TupleTableSlot *in_slot, TupleTableSlot *out_slot)
Definition: tupconvert.c:193
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:398
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:457
#define IsolationUsesXactSnapshot()
Definition: xact.h:52