mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Linter
synced 2024-11-23 23:44:17 +00:00
Use estimateRowCount() instead of actually counting everything
On large wikis with lots of lint errors, counting the entire table can be problematic from a performance perspective, sometimes taking minutes. Instead, use Database::estimateRowCount(), which uses EXPLAIN SELECT COUNT(*) to get an approximate value for the number of rows. We do make sure that if the category actually has no rows, that it will return 0. This should be considered a temporary solution, and we should look into doing something like the SiteStats incremental updates table in the long run. Bug: T184280 Change-Id: I2d4dcc615477fd60e41dfed4a3d1a3ad52a9f4af
This commit is contained in:
parent
7ba5f25eeb
commit
db5e5e9003
|
@ -230,14 +230,53 @@ class Database {
|
|||
* @return int[]
|
||||
*/
|
||||
public function getTotalsForPage() {
|
||||
return $this->getTotals( [ 'linter_page' => $this->pageId ] );
|
||||
return $this->getTotalsAccurate( [ 'linter_page' => $this->pageId ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $conds Query conditions
|
||||
* Get an estimate of how many rows are there for the
|
||||
* specified category with EXPLAIN SELECT COUNT(*).
|
||||
* If the category actually has no rows, then 0 will
|
||||
* be returned.
|
||||
*
|
||||
* @param int $catId
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
private function getTotalsEstimate( $catId ) {
|
||||
$dbr = wfGetDB( DB_REPLICA );
|
||||
// First see if there are no rows, since the distinction
|
||||
// between 0 and 1 is important. And estimateRowCount seems
|
||||
// to never return 0.
|
||||
$rows = $dbr->selectRowCount(
|
||||
'linter',
|
||||
'*',
|
||||
[ 'linter_cat' => $catId ],
|
||||
__METHOD__,
|
||||
[ 'LIMIT' => 1 ]
|
||||
);
|
||||
if ( $rows === 0 ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Now we can just estimate
|
||||
return $dbr->estimateRowCount(
|
||||
'linter',
|
||||
'*',
|
||||
[ 'linter_cat' => $catId ],
|
||||
__METHOD__
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* This uses COUNT(*), which is accurate, but can be significantly
|
||||
* slower depending upon how many rows are in the database.
|
||||
*
|
||||
* @param array $conds
|
||||
*
|
||||
* @return int[]
|
||||
*/
|
||||
public function getTotals( $conds = [] ) {
|
||||
private function getTotalsAccurate( $conds = [] ) {
|
||||
$rows = wfGetDB( DB_REPLICA )->select(
|
||||
'linter',
|
||||
[ 'linter_cat', 'COUNT(*) AS count' ],
|
||||
|
@ -260,4 +299,17 @@ class Database {
|
|||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int[]
|
||||
*/
|
||||
public function getTotals() {
|
||||
$ret = [];
|
||||
foreach ( $this->categoryManager->getVisibleCategories() as $cat ) {
|
||||
$id = $this->categoryManager->getCategoryId( $cat );
|
||||
$ret[$cat] = $this->getTotalsEstimate( $id );
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue