3131 * result by interpolating between startup_cost and total_cost. In detail:
3232 * actual_cost = startup_cost +
3333 * (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
34- * Note that a relation's rows count (and, by extension, a Plan's plan_rows)
35- * are set without regard to any LIMIT, so that this equation works properly.
36- * (Also, these routines guarantee not to set the rows count to zero, so there
37- * will be no zero divide.) The LIMIT is applied as a separate Plan node.
34+ * Note that a base relation's rows count (and, by extension, plan_rows for
35+ * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
36+ * that this equation works properly. (Also, these routines guarantee not to
37+ * set the rows count to zero, so there will be no zero divide.) The LIMIT is
38+ * applied as a top-level plan node.
3839 *
3940 *
4041 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
4142 * Portions Copyright (c) 1994, Regents of the University of California
4243 *
4344 * IDENTIFICATION
44- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.72 2001/05/09 00:35:09 tgl Exp $
45+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.73 2001/05/09 23:13:34 tgl Exp $
4546 *
4647 *-------------------------------------------------------------------------
4748 */
@@ -205,12 +206,18 @@ cost_index(Path *path, Query *root,
205206{
206207 Cost startup_cost = 0 ;
207208 Cost run_cost = 0 ;
208- Cost cpu_per_tuple ;
209209 Cost indexStartupCost ;
210210 Cost indexTotalCost ;
211211 Selectivity indexSelectivity ;
212+ double indexCorrelation ,
213+ csquared ;
214+ Cost min_IO_cost ,
215+ max_IO_cost ;
216+ Cost cpu_per_tuple ;
212217 double tuples_fetched ;
213218 double pages_fetched ;
219+ double T ,
220+ b ;
214221
215222 /* Should only be applied to base relations */
216223 Assert (IsA (baserel , RelOptInfo ) && IsA (index , IndexOptInfo ));
@@ -224,63 +231,109 @@ cost_index(Path *path, Query *root,
224231 * Call index-access-method-specific code to estimate the processing
225232 * cost for scanning the index, as well as the selectivity of the
226233 * index (ie, the fraction of main-table tuples we will have to
227- * retrieve).
234+ * retrieve) and its correlation to the main-table tuple order .
228235 */
229- OidFunctionCall7 (index -> amcostestimate ,
236+ OidFunctionCall8 (index -> amcostestimate ,
230237 PointerGetDatum (root ),
231238 PointerGetDatum (baserel ),
232239 PointerGetDatum (index ),
233240 PointerGetDatum (indexQuals ),
234241 PointerGetDatum (& indexStartupCost ),
235242 PointerGetDatum (& indexTotalCost ),
236- PointerGetDatum (& indexSelectivity ));
243+ PointerGetDatum (& indexSelectivity ),
244+ PointerGetDatum (& indexCorrelation ));
237245
238246 /* all costs for touching index itself included here */
239247 startup_cost += indexStartupCost ;
240248 run_cost += indexTotalCost - indexStartupCost ;
241249
242- /*
250+ /*----------
243251 * Estimate number of main-table tuples and pages fetched.
244252 *
245- * If the number of tuples is much smaller than the number of pages in
246- * the relation, each tuple will cost a separate nonsequential fetch.
247- * If it is comparable or larger, then probably we will be able to
248- * avoid some fetches. We use a growth rate of log(#tuples/#pages +
249- * 1) --- probably totally bogus, but intuitively it gives the right
250- * shape of curve at least.
253+ * When the index ordering is uncorrelated with the table ordering,
254+ * we use an approximation proposed by Mackert and Lohman, "Index Scans
255+ * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
256+ * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
257+ * The Mackert and Lohman approximation is that the number of pages
258+ * fetched is
259+ * PF =
260+ * min(2TNs/(2T+Ns), T) when T <= b
261+ * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
262+ * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
263+ * where
264+ * T = # pages in table
265+ * N = # tuples in table
266+ * s = selectivity = fraction of table to be scanned
267+ * b = # buffer pages available (we include kernel space here)
251268 *
252- * XXX if the relation has recently been "clustered" using this index,
253- * then in fact the target tuples will be highly nonuniformly
254- * distributed, and we will be seriously overestimating the scan cost!
255- * Currently we have no way to know whether the relation has been
256- * clustered, nor how much it's been modified since the last
257- * clustering, so we ignore this effect. Would be nice to do better
258- * someday.
269+ * When the index ordering is exactly correlated with the table ordering
270+ * (just after a CLUSTER, for example), the number of pages fetched should
271+ * be just sT. What's more, these will be sequential fetches, not the
272+ * random fetches that occur in the uncorrelated case. So, depending on
273+ * the extent of correlation, we should estimate the actual I/O cost
274+ * somewhere between s * T * 1.0 and PF * random_cost. We currently
275+ * interpolate linearly between these two endpoints based on the
276+ * correlation squared (XXX is that appropriate?).
277+ *
278+ * In any case the number of tuples fetched is Ns.
279+ *----------
259280 */
260281
261282 tuples_fetched = indexSelectivity * baserel -> tuples ;
262283 /* Don't believe estimates less than 1... */
263284 if (tuples_fetched < 1.0 )
264285 tuples_fetched = 1.0 ;
265286
266- if (baserel -> pages > 0 )
267- pages_fetched = ceil (baserel -> pages *
268- log (tuples_fetched / baserel -> pages + 1.0 ));
287+ /* This part is the Mackert and Lohman formula */
288+
289+ T = (baserel -> pages > 1 ) ? (double ) baserel -> pages : 1.0 ;
290+ b = (effective_cache_size > 1 ) ? effective_cache_size : 1.0 ;
291+
292+ if (T <= b )
293+ {
294+ pages_fetched =
295+ (2.0 * T * tuples_fetched ) / (2.0 * T + tuples_fetched );
296+ if (pages_fetched > T )
297+ pages_fetched = T ;
298+ }
269299 else
270- pages_fetched = tuples_fetched ;
300+ {
301+ double lim ;
302+
303+ lim = (2.0 * T * b ) / (2.0 * T - b );
304+ if (tuples_fetched <= lim )
305+ {
306+ pages_fetched =
307+ (2.0 * T * tuples_fetched ) / (2.0 * T + tuples_fetched );
308+ }
309+ else
310+ {
311+ pages_fetched =
312+ b + (tuples_fetched - lim ) * (T - b ) / T ;
313+ }
314+ }
271315
272316 /*
273- * Now estimate one nonsequential access per page fetched, plus
274- * appropriate CPU costs per tuple.
317+ * min_IO_cost corresponds to the perfectly correlated case (csquared=1),
318+ * max_IO_cost to the perfectly uncorrelated case (csquared=0). Note
319+ * that we just charge random_page_cost per page in the uncorrelated
320+ * case, rather than using cost_nonsequential_access, since we've already
321+ * accounted for caching effects by using the Mackert model.
275322 */
323+ min_IO_cost = ceil (indexSelectivity * T );
324+ max_IO_cost = pages_fetched * random_page_cost ;
276325
277- /* disk costs for main table */
278- run_cost += pages_fetched * cost_nonsequential_access (baserel -> pages );
326+ /*
327+ * Now interpolate based on estimated index order correlation
328+ * to get total disk I/O cost for main table accesses.
329+ */
330+ csquared = indexCorrelation * indexCorrelation ;
279331
280- /* CPU costs */
281- cpu_per_tuple = cpu_tuple_cost + baserel -> baserestrictcost ;
332+ run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost );
282333
283334 /*
335+ * Estimate CPU costs per tuple.
336+ *
284337 * Normally the indexquals will be removed from the list of
285338 * restriction clauses that we have to evaluate as qpquals, so we
286339 * should subtract their costs from baserestrictcost. For a lossy
@@ -290,6 +343,8 @@ cost_index(Path *path, Query *root,
290343 * Rather than work out exactly how much to subtract, we don't
291344 * subtract anything in that case either.
292345 */
346+ cpu_per_tuple = cpu_tuple_cost + baserel -> baserestrictcost ;
347+
293348 if (!index -> lossy && !is_injoin )
294349 cpu_per_tuple -= cost_qual_eval (indexQuals );
295350
0 commit comments