Update copyright for 2022
[pgsql.git] / src / backend / executor / nodeSamplescan.c
bloba03ae120f8b56bf8ec536ec85f556e9396e19299
1 /*-------------------------------------------------------------------------
3 * nodeSamplescan.c
4 * Support routines for sample scans of relations (table sampling).
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/executor/nodeSamplescan.c
13 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "access/relscan.h"
18 #include "access/tableam.h"
19 #include "access/tsmapi.h"
20 #include "common/pg_prng.h"
21 #include "executor/executor.h"
22 #include "executor/nodeSamplescan.h"
23 #include "miscadmin.h"
24 #include "pgstat.h"
25 #include "storage/bufmgr.h"
26 #include "storage/predicate.h"
27 #include "utils/builtins.h"
28 #include "utils/rel.h"
30 static TupleTableSlot *SampleNext(SampleScanState *node);
31 static void tablesample_init(SampleScanState *scanstate);
32 static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
34 /* ----------------------------------------------------------------
35 * Scan Support
36 * ----------------------------------------------------------------
39 /* ----------------------------------------------------------------
40 * SampleNext
42 * This is a workhorse for ExecSampleScan
43 * ----------------------------------------------------------------
45 static TupleTableSlot *
46 SampleNext(SampleScanState *node)
49 * if this is first call within a scan, initialize
51 if (!node->begun)
52 tablesample_init(node);
55 * get the next tuple, and store it in our result slot
57 return tablesample_getnext(node);
61 * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
63 static bool
64 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
67 * No need to recheck for SampleScan, since like SeqScan we don't pass any
68 * checkable keys to heap_beginscan.
70 return true;
73 /* ----------------------------------------------------------------
74 * ExecSampleScan(node)
76 * Scans the relation using the sampling method and returns
77 * the next qualifying tuple.
78 * We call the ExecScan() routine and pass it the appropriate
79 * access method functions.
80 * ----------------------------------------------------------------
82 static TupleTableSlot *
83 ExecSampleScan(PlanState *pstate)
85 SampleScanState *node = castNode(SampleScanState, pstate);
87 return ExecScan(&node->ss,
88 (ExecScanAccessMtd) SampleNext,
89 (ExecScanRecheckMtd) SampleRecheck);
92 /* ----------------------------------------------------------------
93 * ExecInitSampleScan
94 * ----------------------------------------------------------------
96 SampleScanState *
97 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
99 SampleScanState *scanstate;
100 TableSampleClause *tsc = node->tablesample;
101 TsmRoutine *tsm;
103 Assert(outerPlan(node) == NULL);
104 Assert(innerPlan(node) == NULL);
107 * create state structure
109 scanstate = makeNode(SampleScanState);
110 scanstate->ss.ps.plan = (Plan *) node;
111 scanstate->ss.ps.state = estate;
112 scanstate->ss.ps.ExecProcNode = ExecSampleScan;
115 * Miscellaneous initialization
117 * create expression context for node
119 ExecAssignExprContext(estate, &scanstate->ss.ps);
122 * open the scan relation
124 scanstate->ss.ss_currentRelation =
125 ExecOpenScanRelation(estate,
126 node->scan.scanrelid,
127 eflags);
129 /* we won't set up the HeapScanDesc till later */
130 scanstate->ss.ss_currentScanDesc = NULL;
132 /* and create slot with appropriate rowtype */
133 ExecInitScanTupleSlot(estate, &scanstate->ss,
134 RelationGetDescr(scanstate->ss.ss_currentRelation),
135 table_slot_callbacks(scanstate->ss.ss_currentRelation));
138 * Initialize result type and projection.
140 ExecInitResultTypeTL(&scanstate->ss.ps);
141 ExecAssignScanProjectionInfo(&scanstate->ss);
144 * initialize child expressions
146 scanstate->ss.ps.qual =
147 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
149 scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
150 scanstate->repeatable =
151 ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
154 * If we don't have a REPEATABLE clause, select a random seed. We want to
155 * do this just once, since the seed shouldn't change over rescans.
157 if (tsc->repeatable == NULL)
158 scanstate->seed = pg_prng_uint32(&pg_global_prng_state);
161 * Finally, initialize the TABLESAMPLE method handler.
163 tsm = GetTsmRoutine(tsc->tsmhandler);
164 scanstate->tsmroutine = tsm;
165 scanstate->tsm_state = NULL;
167 if (tsm->InitSampleScan)
168 tsm->InitSampleScan(scanstate, eflags);
170 /* We'll do BeginSampleScan later; we can't evaluate params yet */
171 scanstate->begun = false;
173 return scanstate;
176 /* ----------------------------------------------------------------
177 * ExecEndSampleScan
179 * frees any storage allocated through C routines.
180 * ----------------------------------------------------------------
182 void
183 ExecEndSampleScan(SampleScanState *node)
186 * Tell sampling function that we finished the scan.
188 if (node->tsmroutine->EndSampleScan)
189 node->tsmroutine->EndSampleScan(node);
192 * Free the exprcontext
194 ExecFreeExprContext(&node->ss.ps);
197 * clean out the tuple table
199 if (node->ss.ps.ps_ResultTupleSlot)
200 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
201 ExecClearTuple(node->ss.ss_ScanTupleSlot);
204 * close heap scan
206 if (node->ss.ss_currentScanDesc)
207 table_endscan(node->ss.ss_currentScanDesc);
210 /* ----------------------------------------------------------------
211 * ExecReScanSampleScan
213 * Rescans the relation.
215 * ----------------------------------------------------------------
217 void
218 ExecReScanSampleScan(SampleScanState *node)
220 /* Remember we need to do BeginSampleScan again (if we did it at all) */
221 node->begun = false;
222 node->done = false;
223 node->haveblock = false;
224 node->donetuples = 0;
226 ExecScanReScan(&node->ss);
231 * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
233 static void
234 tablesample_init(SampleScanState *scanstate)
236 TsmRoutine *tsm = scanstate->tsmroutine;
237 ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
238 Datum *params;
239 Datum datum;
240 bool isnull;
241 uint32 seed;
242 bool allow_sync;
243 int i;
244 ListCell *arg;
246 scanstate->donetuples = 0;
247 params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
249 i = 0;
250 foreach(arg, scanstate->args)
252 ExprState *argstate = (ExprState *) lfirst(arg);
254 params[i] = ExecEvalExprSwitchContext(argstate,
255 econtext,
256 &isnull);
257 if (isnull)
258 ereport(ERROR,
259 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
260 errmsg("TABLESAMPLE parameter cannot be null")));
261 i++;
264 if (scanstate->repeatable)
266 datum = ExecEvalExprSwitchContext(scanstate->repeatable,
267 econtext,
268 &isnull);
269 if (isnull)
270 ereport(ERROR,
271 (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
272 errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
275 * The REPEATABLE parameter has been coerced to float8 by the parser.
276 * The reason for using float8 at the SQL level is that it will
277 * produce unsurprising results both for users used to databases that
278 * accept only integers in the REPEATABLE clause and for those who
279 * might expect that REPEATABLE works like setseed() (a float in the
280 * range from -1 to 1).
282 * We use hashfloat8() to convert the supplied value into a suitable
283 * seed. For regression-testing purposes, that has the convenient
284 * property that REPEATABLE(0) gives a machine-independent result.
286 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
288 else
290 /* Use the seed selected by ExecInitSampleScan */
291 seed = scanstate->seed;
294 /* Set default values for params that BeginSampleScan can adjust */
295 scanstate->use_bulkread = true;
296 scanstate->use_pagemode = true;
298 /* Let tablesample method do its thing */
299 tsm->BeginSampleScan(scanstate,
300 params,
301 list_length(scanstate->args),
302 seed);
304 /* We'll use syncscan if there's no NextSampleBlock function */
305 allow_sync = (tsm->NextSampleBlock == NULL);
307 /* Now we can create or reset the HeapScanDesc */
308 if (scanstate->ss.ss_currentScanDesc == NULL)
310 scanstate->ss.ss_currentScanDesc =
311 table_beginscan_sampling(scanstate->ss.ss_currentRelation,
312 scanstate->ss.ps.state->es_snapshot,
313 0, NULL,
314 scanstate->use_bulkread,
315 allow_sync,
316 scanstate->use_pagemode);
318 else
320 table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
321 scanstate->use_bulkread,
322 allow_sync,
323 scanstate->use_pagemode);
326 pfree(params);
328 /* And we're initialized. */
329 scanstate->begun = true;
333 * Get next tuple from TABLESAMPLE method.
335 static TupleTableSlot *
336 tablesample_getnext(SampleScanState *scanstate)
338 TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
339 TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
341 ExecClearTuple(slot);
343 if (scanstate->done)
344 return NULL;
346 for (;;)
348 if (!scanstate->haveblock)
350 if (!table_scan_sample_next_block(scan, scanstate))
352 scanstate->haveblock = false;
353 scanstate->done = true;
355 /* exhausted relation */
356 return NULL;
359 scanstate->haveblock = true;
362 if (!table_scan_sample_next_tuple(scan, scanstate, slot))
365 * If we get here, it means we've exhausted the items on this page
366 * and it's time to move to the next.
368 scanstate->haveblock = false;
369 continue;
372 /* Found visible tuple, return it. */
373 break;
376 scanstate->donetuples++;
378 return slot;