Use estimateRowCount() instead of actually counting everything

On large wikis with lots of lint errors, counting the entire table can
be problematic from a performance perspective, sometimes taking minutes.

Instead, use Database::estimateRowCount(), which uses EXPLAIN SELECT
COUNT(*) to get an approximate value for the number of rows. We do make
sure that if the category actually has no rows, that it will return 0.

This should be considered a temporary solution, and we should look into
doing something like the SiteStats incremental updates table in the long
run.

Bug: T184280
Change-Id: I2d4dcc615477fd60e41dfed4a3d1a3ad52a9f4af
This commit is contained in:
Kunal Mehta 2018-02-01 12:59:08 -08:00
parent 7ba5f25eeb
commit db5e5e9003

View file

@ -230,14 +230,53 @@ class Database {
* @return int[]
*/
public function getTotalsForPage() {
return $this->getTotals( [ 'linter_page' => $this->pageId ] );
return $this->getTotalsAccurate( [ 'linter_page' => $this->pageId ] );
}
/**
* @param array $conds Query conditions
* Get an estimate of how many rows are there for the
* specified category with EXPLAIN SELECT COUNT(*).
* If the category actually has no rows, then 0 will
* be returned.
*
* @param int $catId
*
* @return int
*/
private function getTotalsEstimate( $catId ) {
$dbr = wfGetDB( DB_REPLICA );
// First see if there are no rows, since the distinction
// between 0 and 1 is important. And estimateRowCount seems
// to never return 0.
$rows = $dbr->selectRowCount(
'linter',
'*',
[ 'linter_cat' => $catId ],
__METHOD__,
[ 'LIMIT' => 1 ]
);
if ( $rows === 0 ) {
return 0;
}
// Now we can just estimate
return $dbr->estimateRowCount(
'linter',
'*',
[ 'linter_cat' => $catId ],
__METHOD__
);
}
/**
* This uses COUNT(*), which is accurate, but can be significantly
* slower depending upon how many rows are in the database.
*
* @param array $conds
*
* @return int[]
*/
public function getTotals( $conds = [] ) {
private function getTotalsAccurate( $conds = [] ) {
$rows = wfGetDB( DB_REPLICA )->select(
'linter',
[ 'linter_cat', 'COUNT(*) AS count' ],
@ -260,4 +299,17 @@ class Database {
return $ret;
}
/**
* @return int[]
*/
public function getTotals() {
$ret = [];
foreach ( $this->categoryManager->getVisibleCategories() as $cat ) {
$id = $this->categoryManager->getCategoryId( $cat );
$ret[$cat] = $this->getTotalsEstimate( $id );
}
return $ret;
}
}