Fix EXPLAIN Bitmap heap scan to count pages with no visible tuples
[pgsql.git] / src / backend / executor / nodeBitmapHeapscan.c
blobcee7f45aabec12040abf0a71eeae46cd3e5991c7
1 /*-------------------------------------------------------------------------
3 * nodeBitmapHeapscan.c
4 * Routines to support bitmapped scans of relations
6 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 * special snapshots). The reason is that since index and heap scans are
9 * decoupled, there can be no assurance that the index tuple prompting a
10 * visit to a particular heap TID still exists when the visit is made.
11 * Therefore the tuple might not exist anymore either (which is OK because
12 * heap_fetch will cope) --- but worse, the tuple slot could have been
13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 * certain to fail the time qual and so it will not be mistakenly returned,
15 * but with anything else we might return a tuple that doesn't meet the
16 * required index qual conditions.
19 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
23 * IDENTIFICATION
24 * src/backend/executor/nodeBitmapHeapscan.c
26 *-------------------------------------------------------------------------
29 * INTERFACE ROUTINES
30 * ExecBitmapHeapScan scans a relation using bitmap info
31 * ExecBitmapHeapNext workhorse for above
32 * ExecInitBitmapHeapScan creates and initializes state info.
33 * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 * ExecEndBitmapHeapScan releases all storage.
36 #include "postgres.h"
38 #include <math.h>
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/visibilitymap.h"
43 #include "executor/executor.h"
44 #include "executor/nodeBitmapHeapscan.h"
45 #include "miscadmin.h"
46 #include "pgstat.h"
47 #include "storage/bufmgr.h"
48 #include "utils/rel.h"
49 #include "utils/snapmgr.h"
50 #include "utils/spccache.h"
52 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
53 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
54 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
55 TBMIterateResult *tbmres);
56 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
57 static inline void BitmapPrefetch(BitmapHeapScanState *node,
58 TableScanDesc scan);
59 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
62 /* ----------------------------------------------------------------
63 * BitmapHeapNext
65 * Retrieve next tuple from the BitmapHeapScan node's currentRelation
66 * ----------------------------------------------------------------
68 static TupleTableSlot *
69 BitmapHeapNext(BitmapHeapScanState *node)
71 ExprContext *econtext;
72 TableScanDesc scan;
73 TIDBitmap *tbm;
74 TBMIterator *tbmiterator = NULL;
75 TBMSharedIterator *shared_tbmiterator = NULL;
76 TBMIterateResult *tbmres;
77 TupleTableSlot *slot;
78 ParallelBitmapHeapState *pstate = node->pstate;
79 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
82 * extract necessary information from index scan node
84 econtext = node->ss.ps.ps_ExprContext;
85 slot = node->ss.ss_ScanTupleSlot;
86 scan = node->ss.ss_currentScanDesc;
87 tbm = node->tbm;
88 if (pstate == NULL)
89 tbmiterator = node->tbmiterator;
90 else
91 shared_tbmiterator = node->shared_tbmiterator;
92 tbmres = node->tbmres;
95 * If we haven't yet performed the underlying index scan, do it, and begin
96 * the iteration over the bitmap.
98 * For prefetching, we use *two* iterators, one for the pages we are
99 * actually scanning and another that runs ahead of the first for
100 * prefetching. node->prefetch_pages tracks exactly how many pages ahead
101 * the prefetch iterator is. Also, node->prefetch_target tracks the
102 * desired prefetch distance, which starts small and increases up to the
103 * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
104 * a scan that stops after a few tuples because of a LIMIT.
106 if (!node->initialized)
108 if (!pstate)
110 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
112 if (!tbm || !IsA(tbm, TIDBitmap))
113 elog(ERROR, "unrecognized result from subplan");
115 node->tbm = tbm;
116 node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
117 node->tbmres = tbmres = NULL;
119 #ifdef USE_PREFETCH
120 if (node->prefetch_maximum > 0)
122 node->prefetch_iterator = tbm_begin_iterate(tbm);
123 node->prefetch_pages = 0;
124 node->prefetch_target = -1;
126 #endif /* USE_PREFETCH */
128 else
131 * The leader will immediately come out of the function, but
132 * others will be blocked until leader populates the TBM and wakes
133 * them up.
135 if (BitmapShouldInitializeSharedState(pstate))
137 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
138 if (!tbm || !IsA(tbm, TIDBitmap))
139 elog(ERROR, "unrecognized result from subplan");
141 node->tbm = tbm;
144 * Prepare to iterate over the TBM. This will return the
145 * dsa_pointer of the iterator state which will be used by
146 * multiple processes to iterate jointly.
148 pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
149 #ifdef USE_PREFETCH
150 if (node->prefetch_maximum > 0)
152 pstate->prefetch_iterator =
153 tbm_prepare_shared_iterate(tbm);
156 * We don't need the mutex here as we haven't yet woke up
157 * others.
159 pstate->prefetch_pages = 0;
160 pstate->prefetch_target = -1;
162 #endif
164 /* We have initialized the shared state so wake up others. */
165 BitmapDoneInitializingSharedState(pstate);
168 /* Allocate a private iterator and attach the shared state to it */
169 node->shared_tbmiterator = shared_tbmiterator =
170 tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
171 node->tbmres = tbmres = NULL;
173 #ifdef USE_PREFETCH
174 if (node->prefetch_maximum > 0)
176 node->shared_prefetch_iterator =
177 tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
179 #endif /* USE_PREFETCH */
181 node->initialized = true;
184 for (;;)
186 bool skip_fetch;
188 CHECK_FOR_INTERRUPTS();
191 * Get next page of results if needed
193 if (tbmres == NULL)
195 if (!pstate)
196 node->tbmres = tbmres = tbm_iterate(tbmiterator);
197 else
198 node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
199 if (tbmres == NULL)
201 /* no more entries in the bitmap */
202 break;
205 BitmapAdjustPrefetchIterator(node, tbmres);
207 if (tbmres->ntuples >= 0)
208 node->exact_pages++;
209 else
210 node->lossy_pages++;
213 * We can skip fetching the heap page if we don't need any fields
214 * from the heap, and the bitmap entries don't need rechecking,
215 * and all tuples on the page are visible to our transaction.
217 * XXX: It's a layering violation that we do these checks above
218 * tableam, they should probably moved below it at some point.
220 skip_fetch = (node->can_skip_fetch &&
221 !tbmres->recheck &&
222 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
223 tbmres->blockno,
224 &node->vmbuffer));
226 if (skip_fetch)
228 /* can't be lossy in the skip_fetch case */
229 Assert(tbmres->ntuples >= 0);
232 * The number of tuples on this page is put into
233 * node->return_empty_tuples.
235 node->return_empty_tuples = tbmres->ntuples;
237 else if (!table_scan_bitmap_next_block(scan, tbmres))
239 /* AM doesn't think this block is valid, skip */
240 continue;
243 /* Adjust the prefetch target */
244 BitmapAdjustPrefetchTarget(node);
246 else
249 * Continuing in previously obtained page.
252 #ifdef USE_PREFETCH
255 * Try to prefetch at least a few pages even before we get to the
256 * second page if we don't stop reading after the first tuple.
258 if (!pstate)
260 if (node->prefetch_target < node->prefetch_maximum)
261 node->prefetch_target++;
263 else if (pstate->prefetch_target < node->prefetch_maximum)
265 /* take spinlock while updating shared state */
266 SpinLockAcquire(&pstate->mutex);
267 if (pstate->prefetch_target < node->prefetch_maximum)
268 pstate->prefetch_target++;
269 SpinLockRelease(&pstate->mutex);
271 #endif /* USE_PREFETCH */
275 * We issue prefetch requests *after* fetching the current page to try
276 * to avoid having prefetching interfere with the main I/O. Also, this
277 * should happen only when we have determined there is still something
278 * to do on the current page, else we may uselessly prefetch the same
279 * page we are just about to request for real.
281 * XXX: It's a layering violation that we do these checks above
282 * tableam, they should probably moved below it at some point.
284 BitmapPrefetch(node, scan);
286 if (node->return_empty_tuples > 0)
289 * If we don't have to fetch the tuple, just return nulls.
291 ExecStoreAllNullTuple(slot);
293 if (--node->return_empty_tuples == 0)
295 /* no more tuples to return in the next round */
296 node->tbmres = tbmres = NULL;
299 else
302 * Attempt to fetch tuple from AM.
304 if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
306 /* nothing more to look at on this page */
307 node->tbmres = tbmres = NULL;
308 continue;
312 * If we are using lossy info, we have to recheck the qual
313 * conditions at every tuple.
315 if (tbmres->recheck)
317 econtext->ecxt_scantuple = slot;
318 if (!ExecQualAndReset(node->bitmapqualorig, econtext))
320 /* Fails recheck, so drop it and loop back for another */
321 InstrCountFiltered2(node, 1);
322 ExecClearTuple(slot);
323 continue;
328 /* OK to return this tuple */
329 return slot;
333 * if we get here it means we are at the end of the scan..
335 return ExecClearTuple(slot);
339 * BitmapDoneInitializingSharedState - Shared state is initialized
341 * By this time the leader has already populated the TBM and initialized the
342 * shared state so wake up other processes.
344 static inline void
345 BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
347 SpinLockAcquire(&pstate->mutex);
348 pstate->state = BM_FINISHED;
349 SpinLockRelease(&pstate->mutex);
350 ConditionVariableBroadcast(&pstate->cv);
354 * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
356 static inline void
357 BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
358 TBMIterateResult *tbmres)
360 #ifdef USE_PREFETCH
361 ParallelBitmapHeapState *pstate = node->pstate;
363 if (pstate == NULL)
365 TBMIterator *prefetch_iterator = node->prefetch_iterator;
367 if (node->prefetch_pages > 0)
369 /* The main iterator has closed the distance by one page */
370 node->prefetch_pages--;
372 else if (prefetch_iterator)
374 /* Do not let the prefetch iterator get behind the main one */
375 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
377 if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
378 elog(ERROR, "prefetch and main iterators are out of sync");
380 return;
383 if (node->prefetch_maximum > 0)
385 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
387 SpinLockAcquire(&pstate->mutex);
388 if (pstate->prefetch_pages > 0)
390 pstate->prefetch_pages--;
391 SpinLockRelease(&pstate->mutex);
393 else
395 /* Release the mutex before iterating */
396 SpinLockRelease(&pstate->mutex);
399 * In case of shared mode, we can not ensure that the current
400 * blockno of the main iterator and that of the prefetch iterator
401 * are same. It's possible that whatever blockno we are
402 * prefetching will be processed by another process. Therefore,
403 * we don't validate the blockno here as we do in non-parallel
404 * case.
406 if (prefetch_iterator)
407 tbm_shared_iterate(prefetch_iterator);
410 #endif /* USE_PREFETCH */
414 * BitmapAdjustPrefetchTarget - Adjust the prefetch target
416 * Increase prefetch target if it's not yet at the max. Note that
417 * we will increase it to zero after fetching the very first
418 * page/tuple, then to one after the second tuple is fetched, then
419 * it doubles as later pages are fetched.
421 static inline void
422 BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
424 #ifdef USE_PREFETCH
425 ParallelBitmapHeapState *pstate = node->pstate;
427 if (pstate == NULL)
429 if (node->prefetch_target >= node->prefetch_maximum)
430 /* don't increase any further */ ;
431 else if (node->prefetch_target >= node->prefetch_maximum / 2)
432 node->prefetch_target = node->prefetch_maximum;
433 else if (node->prefetch_target > 0)
434 node->prefetch_target *= 2;
435 else
436 node->prefetch_target++;
437 return;
440 /* Do an unlocked check first to save spinlock acquisitions. */
441 if (pstate->prefetch_target < node->prefetch_maximum)
443 SpinLockAcquire(&pstate->mutex);
444 if (pstate->prefetch_target >= node->prefetch_maximum)
445 /* don't increase any further */ ;
446 else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
447 pstate->prefetch_target = node->prefetch_maximum;
448 else if (pstate->prefetch_target > 0)
449 pstate->prefetch_target *= 2;
450 else
451 pstate->prefetch_target++;
452 SpinLockRelease(&pstate->mutex);
454 #endif /* USE_PREFETCH */
458 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
460 static inline void
461 BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
463 #ifdef USE_PREFETCH
464 ParallelBitmapHeapState *pstate = node->pstate;
466 if (pstate == NULL)
468 TBMIterator *prefetch_iterator = node->prefetch_iterator;
470 if (prefetch_iterator)
472 while (node->prefetch_pages < node->prefetch_target)
474 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
475 bool skip_fetch;
477 if (tbmpre == NULL)
479 /* No more pages to prefetch */
480 tbm_end_iterate(prefetch_iterator);
481 node->prefetch_iterator = NULL;
482 break;
484 node->prefetch_pages++;
487 * If we expect not to have to actually read this heap page,
488 * skip this prefetch call, but continue to run the prefetch
489 * logic normally. (Would it be better not to increment
490 * prefetch_pages?)
492 * This depends on the assumption that the index AM will
493 * report the same recheck flag for this future heap page as
494 * it did for the current heap page; which is not a certainty
495 * but is true in many cases.
497 skip_fetch = (node->can_skip_fetch &&
498 (node->tbmres ? !node->tbmres->recheck : false) &&
499 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
500 tbmpre->blockno,
501 &node->pvmbuffer));
503 if (!skip_fetch)
504 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
508 return;
511 if (pstate->prefetch_pages < pstate->prefetch_target)
513 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
515 if (prefetch_iterator)
517 while (1)
519 TBMIterateResult *tbmpre;
520 bool do_prefetch = false;
521 bool skip_fetch;
524 * Recheck under the mutex. If some other process has already
525 * done enough prefetching then we need not to do anything.
527 SpinLockAcquire(&pstate->mutex);
528 if (pstate->prefetch_pages < pstate->prefetch_target)
530 pstate->prefetch_pages++;
531 do_prefetch = true;
533 SpinLockRelease(&pstate->mutex);
535 if (!do_prefetch)
536 return;
538 tbmpre = tbm_shared_iterate(prefetch_iterator);
539 if (tbmpre == NULL)
541 /* No more pages to prefetch */
542 tbm_end_shared_iterate(prefetch_iterator);
543 node->shared_prefetch_iterator = NULL;
544 break;
547 /* As above, skip prefetch if we expect not to need page */
548 skip_fetch = (node->can_skip_fetch &&
549 (node->tbmres ? !node->tbmres->recheck : false) &&
550 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
551 tbmpre->blockno,
552 &node->pvmbuffer));
554 if (!skip_fetch)
555 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
559 #endif /* USE_PREFETCH */
563 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
565 static bool
566 BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
568 ExprContext *econtext;
571 * extract necessary information from index scan node
573 econtext = node->ss.ps.ps_ExprContext;
575 /* Does the tuple meet the original qual conditions? */
576 econtext->ecxt_scantuple = slot;
577 return ExecQualAndReset(node->bitmapqualorig, econtext);
580 /* ----------------------------------------------------------------
581 * ExecBitmapHeapScan(node)
582 * ----------------------------------------------------------------
584 static TupleTableSlot *
585 ExecBitmapHeapScan(PlanState *pstate)
587 BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
589 return ExecScan(&node->ss,
590 (ExecScanAccessMtd) BitmapHeapNext,
591 (ExecScanRecheckMtd) BitmapHeapRecheck);
594 /* ----------------------------------------------------------------
595 * ExecReScanBitmapHeapScan(node)
596 * ----------------------------------------------------------------
598 void
599 ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
601 PlanState *outerPlan = outerPlanState(node);
603 /* rescan to release any page pin */
604 table_rescan(node->ss.ss_currentScanDesc, NULL);
606 /* release bitmaps and buffers if any */
607 if (node->tbmiterator)
608 tbm_end_iterate(node->tbmiterator);
609 if (node->prefetch_iterator)
610 tbm_end_iterate(node->prefetch_iterator);
611 if (node->shared_tbmiterator)
612 tbm_end_shared_iterate(node->shared_tbmiterator);
613 if (node->shared_prefetch_iterator)
614 tbm_end_shared_iterate(node->shared_prefetch_iterator);
615 if (node->tbm)
616 tbm_free(node->tbm);
617 if (node->vmbuffer != InvalidBuffer)
618 ReleaseBuffer(node->vmbuffer);
619 if (node->pvmbuffer != InvalidBuffer)
620 ReleaseBuffer(node->pvmbuffer);
621 node->tbm = NULL;
622 node->tbmiterator = NULL;
623 node->tbmres = NULL;
624 node->prefetch_iterator = NULL;
625 node->initialized = false;
626 node->shared_tbmiterator = NULL;
627 node->shared_prefetch_iterator = NULL;
628 node->vmbuffer = InvalidBuffer;
629 node->pvmbuffer = InvalidBuffer;
631 ExecScanReScan(&node->ss);
634 * if chgParam of subnode is not null then plan will be re-scanned by
635 * first ExecProcNode.
637 if (outerPlan->chgParam == NULL)
638 ExecReScan(outerPlan);
641 /* ----------------------------------------------------------------
642 * ExecEndBitmapHeapScan
643 * ----------------------------------------------------------------
645 void
646 ExecEndBitmapHeapScan(BitmapHeapScanState *node)
648 TableScanDesc scanDesc;
651 * extract information from the node
653 scanDesc = node->ss.ss_currentScanDesc;
656 * close down subplans
658 ExecEndNode(outerPlanState(node));
661 * release bitmaps and buffers if any
663 if (node->tbmiterator)
664 tbm_end_iterate(node->tbmiterator);
665 if (node->prefetch_iterator)
666 tbm_end_iterate(node->prefetch_iterator);
667 if (node->tbm)
668 tbm_free(node->tbm);
669 if (node->shared_tbmiterator)
670 tbm_end_shared_iterate(node->shared_tbmiterator);
671 if (node->shared_prefetch_iterator)
672 tbm_end_shared_iterate(node->shared_prefetch_iterator);
673 if (node->vmbuffer != InvalidBuffer)
674 ReleaseBuffer(node->vmbuffer);
675 if (node->pvmbuffer != InvalidBuffer)
676 ReleaseBuffer(node->pvmbuffer);
679 * close heap scan
681 table_endscan(scanDesc);
684 /* ----------------------------------------------------------------
685 * ExecInitBitmapHeapScan
687 * Initializes the scan's state information.
688 * ----------------------------------------------------------------
690 BitmapHeapScanState *
691 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
693 BitmapHeapScanState *scanstate;
694 Relation currentRelation;
696 /* check for unsupported flags */
697 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
700 * Assert caller didn't ask for an unsafe snapshot --- see comments at
701 * head of file.
703 Assert(IsMVCCSnapshot(estate->es_snapshot));
706 * create state structure
708 scanstate = makeNode(BitmapHeapScanState);
709 scanstate->ss.ps.plan = (Plan *) node;
710 scanstate->ss.ps.state = estate;
711 scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
713 scanstate->tbm = NULL;
714 scanstate->tbmiterator = NULL;
715 scanstate->tbmres = NULL;
716 scanstate->return_empty_tuples = 0;
717 scanstate->vmbuffer = InvalidBuffer;
718 scanstate->pvmbuffer = InvalidBuffer;
719 scanstate->exact_pages = 0;
720 scanstate->lossy_pages = 0;
721 scanstate->prefetch_iterator = NULL;
722 scanstate->prefetch_pages = 0;
723 scanstate->prefetch_target = 0;
724 scanstate->initialized = false;
725 scanstate->shared_tbmiterator = NULL;
726 scanstate->shared_prefetch_iterator = NULL;
727 scanstate->pstate = NULL;
730 * We can potentially skip fetching heap pages if we do not need any
731 * columns of the table, either for checking non-indexable quals or for
732 * returning data. This test is a bit simplistic, as it checks the
733 * stronger condition that there's no qual or return tlist at all. But in
734 * most cases it's probably not worth working harder than that.
736 scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
737 node->scan.plan.targetlist == NIL);
740 * Miscellaneous initialization
742 * create expression context for node
744 ExecAssignExprContext(estate, &scanstate->ss.ps);
747 * open the scan relation
749 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
752 * initialize child nodes
754 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
757 * get the scan type from the relation descriptor.
759 ExecInitScanTupleSlot(estate, &scanstate->ss,
760 RelationGetDescr(currentRelation),
761 table_slot_callbacks(currentRelation));
764 * Initialize result type and projection.
766 ExecInitResultTypeTL(&scanstate->ss.ps);
767 ExecAssignScanProjectionInfo(&scanstate->ss);
770 * initialize child expressions
772 scanstate->ss.ps.qual =
773 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
774 scanstate->bitmapqualorig =
775 ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
778 * Maximum number of prefetches for the tablespace if configured,
779 * otherwise the current value of the effective_io_concurrency GUC.
781 scanstate->prefetch_maximum =
782 get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
784 scanstate->ss.ss_currentRelation = currentRelation;
786 scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
787 estate->es_snapshot,
789 NULL);
792 * all done.
794 return scanstate;
797 /*----------------
798 * BitmapShouldInitializeSharedState
800 * The first process to come here and see the state to the BM_INITIAL
801 * will become the leader for the parallel bitmap scan and will be
802 * responsible for populating the TIDBitmap. The other processes will
803 * be blocked by the condition variable until the leader wakes them up.
804 * ---------------
806 static bool
807 BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
809 SharedBitmapState state;
811 while (1)
813 SpinLockAcquire(&pstate->mutex);
814 state = pstate->state;
815 if (pstate->state == BM_INITIAL)
816 pstate->state = BM_INPROGRESS;
817 SpinLockRelease(&pstate->mutex);
819 /* Exit if bitmap is done, or if we're the leader. */
820 if (state != BM_INPROGRESS)
821 break;
823 /* Wait for the leader to wake us up. */
824 ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
827 ConditionVariableCancelSleep();
829 return (state == BM_INITIAL);
832 /* ----------------------------------------------------------------
833 * ExecBitmapHeapEstimate
835 * Compute the amount of space we'll need in the parallel
836 * query DSM, and inform pcxt->estimator about our needs.
837 * ----------------------------------------------------------------
839 void
840 ExecBitmapHeapEstimate(BitmapHeapScanState *node,
841 ParallelContext *pcxt)
843 shm_toc_estimate_chunk(&pcxt->estimator, sizeof(ParallelBitmapHeapState));
844 shm_toc_estimate_keys(&pcxt->estimator, 1);
847 /* ----------------------------------------------------------------
848 * ExecBitmapHeapInitializeDSM
850 * Set up a parallel bitmap heap scan descriptor.
851 * ----------------------------------------------------------------
853 void
854 ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
855 ParallelContext *pcxt)
857 ParallelBitmapHeapState *pstate;
858 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
860 /* If there's no DSA, there are no workers; initialize nothing. */
861 if (dsa == NULL)
862 return;
864 pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelBitmapHeapState));
866 pstate->tbmiterator = 0;
867 pstate->prefetch_iterator = 0;
869 /* Initialize the mutex */
870 SpinLockInit(&pstate->mutex);
871 pstate->prefetch_pages = 0;
872 pstate->prefetch_target = 0;
873 pstate->state = BM_INITIAL;
875 ConditionVariableInit(&pstate->cv);
877 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
878 node->pstate = pstate;
881 /* ----------------------------------------------------------------
882 * ExecBitmapHeapReInitializeDSM
884 * Reset shared state before beginning a fresh scan.
885 * ----------------------------------------------------------------
887 void
888 ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
889 ParallelContext *pcxt)
891 ParallelBitmapHeapState *pstate = node->pstate;
892 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
894 /* If there's no DSA, there are no workers; do nothing. */
895 if (dsa == NULL)
896 return;
898 pstate->state = BM_INITIAL;
900 if (DsaPointerIsValid(pstate->tbmiterator))
901 tbm_free_shared_area(dsa, pstate->tbmiterator);
903 if (DsaPointerIsValid(pstate->prefetch_iterator))
904 tbm_free_shared_area(dsa, pstate->prefetch_iterator);
906 pstate->tbmiterator = InvalidDsaPointer;
907 pstate->prefetch_iterator = InvalidDsaPointer;
910 /* ----------------------------------------------------------------
911 * ExecBitmapHeapInitializeWorker
913 * Copy relevant information from TOC into planstate.
914 * ----------------------------------------------------------------
916 void
917 ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
918 ParallelWorkerContext *pwcxt)
920 ParallelBitmapHeapState *pstate;
922 Assert(node->ss.ps.state->es_query_dsa != NULL);
924 pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
925 node->pstate = pstate;