mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Linter
synced 2024-11-23 23:44:17 +00:00
Make Linter category counts more accurate when counts are low
* The code now produces an accurate count if the number of errors for a category is below the threshold set by a public constant MAX_ACCURATE_COUNT (currently 20). The database record count limit was originally set to 1, to determine accurately, if there were actually 0 errors in a category as the estimate code would never report 0. If not 0, it would use the estimated count which does not produce an accurate count for any other number of errors. For low error counts this is annoying to editors and unnecessary. The additional CPU/disk activity to accurately check for low error counts is not significantly more than checking for 0 or 1, as checking for 0 likely requires a complete table scan which is probably expensive compared to a low count that early outs when it hits to record limit. * An improvement to consider is recording the accurate count in a separate tiny table, and maintaining an accurate count there which is used in preference to doing the select with row limit based on say a 30 second TTL, to prevent a stampede of requests from doing extraneous database operations. * Added unit test coverage for accurately counting low error conditions that are lower than the threshold and also verify that the estimate is inaccurate beyond the error count threshold. Bug: T194872 Change-Id: I4f74cfe3bf9601baa0dc8fa6464a68030ac2bc4b
This commit is contained in:
parent
077c30220e
commit
201b47e01d
|
@ -33,6 +33,7 @@ class Database {
|
|||
* for a page, the rest are just dropped
|
||||
*/
|
||||
public const MAX_PER_CAT = 20;
|
||||
public const MAX_ACCURATE_COUNT = 20;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
|
@ -246,21 +247,25 @@ class Database {
|
|||
*/
|
||||
private function getTotalsEstimate( $catId ) {
|
||||
$dbr = wfGetDB( DB_REPLICA );
|
||||
// First see if there are no rows, since the distinction
|
||||
// between 0 and 1 is important. And estimateRowCount seems
|
||||
// to never return 0.
|
||||
// First see if there are no rows, or a moderate number
|
||||
// within the limit specified by the MAX_ACCURATE_COUNT.
|
||||
// The distinction between 0, a few and a lot is important
|
||||
// to determine first, as estimateRowCount seem to never
|
||||
// return 0 or accurate low error counts.
|
||||
$rows = $dbr->selectRowCount(
|
||||
'linter',
|
||||
'*',
|
||||
[ 'linter_cat' => $catId ],
|
||||
__METHOD__,
|
||||
[ 'LIMIT' => 1 ]
|
||||
[ 'LIMIT' => self::MAX_ACCURATE_COUNT ]
|
||||
);
|
||||
if ( $rows === 0 ) {
|
||||
return 0;
|
||||
// Return an accurate count if the number of errors is
|
||||
// below the maximum accurate count limit
|
||||
if ( $rows < self::MAX_ACCURATE_COUNT ) {
|
||||
return $rows;
|
||||
}
|
||||
|
||||
// Now we can just estimate
|
||||
// Now we can just estimate if the maximum accurate count limit
|
||||
// was returned, which isn't the actual count but the limit reached
|
||||
return $dbr->estimateRowCount(
|
||||
'linter',
|
||||
'*',
|
||||
|
|
|
@ -29,7 +29,6 @@ use MediaWikiTestCase;
|
|||
* @covers MediaWiki\Linter\Database
|
||||
*/
|
||||
class DatabaseTest extends MediaWikiTestCase {
|
||||
|
||||
public function testConstructor() {
|
||||
$this->assertInstanceOf( Database::class, new Database( 5 ) );
|
||||
}
|
||||
|
@ -62,6 +61,16 @@ class DatabaseTest extends MediaWikiTestCase {
|
|||
$this->assertArrayEquals( $expectedIds, $actualIds );
|
||||
}
|
||||
|
||||
private function createManyLintErrors( $lintDb, $errorCount ) {
|
||||
$manyLintErrors = [];
|
||||
for ( $i = 0; $i < $errorCount; $i++ ) {
|
||||
$manyLintErrors[] = new LintError(
|
||||
'obsolete-tag', [ 15, 20 + $i ], [ 'name' => 'big' ]
|
||||
);
|
||||
}
|
||||
$lintDb->setForPage( $manyLintErrors );
|
||||
}
|
||||
|
||||
public function testSetForPage() {
|
||||
$lintDb = new Database( 5 );
|
||||
$dummyErrors = $this->getDummyLintErrors();
|
||||
|
@ -69,11 +78,21 @@ class DatabaseTest extends MediaWikiTestCase {
|
|||
$this->assertSetForPageResult( $result, [], [ 'fostered' => 1, 'obsolete-tag' => 1 ] );
|
||||
$this->assertLintErrorsEqual( $dummyErrors, $lintDb->getForPage() );
|
||||
|
||||
// Accurate low error count values should match for both methods
|
||||
$resultTotals = $lintDb->getTotalsForPage();
|
||||
$resultEstimatedTotals = $lintDb->getTotals();
|
||||
$this->assertEquals( $resultTotals, $resultEstimatedTotals );
|
||||
|
||||
// Should delete the second error
|
||||
$result2 = $lintDb->setForPage( [ $dummyErrors[0] ] );
|
||||
$this->assertSetForPageResult( $result2, [ 'obsolete-tag' => 1 ], [] );
|
||||
$this->assertLintErrorsEqual( [ $dummyErrors[0] ], $lintDb->getForPage() );
|
||||
|
||||
// Accurate low error count values should match for both methods
|
||||
$resultTotals = $lintDb->getTotalsForPage();
|
||||
$resultEstimatedTotals = $lintDb->getTotals();
|
||||
$this->assertEquals( $resultTotals, $resultEstimatedTotals );
|
||||
|
||||
// Insert the second error, delete the first
|
||||
$result3 = $lintDb->setForPage( [ $dummyErrors[1] ] );
|
||||
$this->assertSetForPageResult( $result3, [ 'fostered' => 1 ], [ 'obsolete-tag' => 1 ] );
|
||||
|
@ -83,6 +102,36 @@ class DatabaseTest extends MediaWikiTestCase {
|
|||
$result4 = $lintDb->setForPage( [] );
|
||||
$this->assertSetForPageResult( $result4, [ 'obsolete-tag' => 1 ], [] );
|
||||
$this->assertLintErrorsEqual( [], $lintDb->getForPage() );
|
||||
|
||||
// Accurate low error count values should match for both methods
|
||||
$resultTotals = $lintDb->getTotalsForPage();
|
||||
$resultEstimatedTotals = $lintDb->getTotals();
|
||||
$this->assertEquals( $resultTotals, $resultEstimatedTotals );
|
||||
|
||||
// For error counts below the MAX_ACCURATE_COUNT, both error
|
||||
// count methods should return the same count.
|
||||
self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT - 1 );
|
||||
$resultTotals = $lintDb->getTotalsForPage();
|
||||
$resultEstimatedTotals = $lintDb->getTotals();
|
||||
$this->assertEquals( $resultTotals, $resultEstimatedTotals );
|
||||
|
||||
// For error counts equal to or above the MAX_ACCURATE_COUNT, both error
|
||||
// count methods should NOT return the same count in this test scenario
|
||||
// because previously added and deleted records will be included
|
||||
// in the estimated count which is normal.
|
||||
self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT );
|
||||
$resultTotals = $lintDb->getTotalsForPage();
|
||||
$resultEstimatedTotals = $lintDb->getTotals();
|
||||
$this->assertNotEquals( $resultTotals, $resultEstimatedTotals );
|
||||
|
||||
// For error counts greatly above the MAX_ACCURATE_COUNT, the estimated
|
||||
// count method should return a greater count in this test scenario
|
||||
// because previously added and deleted records will be included
|
||||
// in the estimated count which is normal.
|
||||
self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT * 10 );
|
||||
$resultTotals = $lintDb->getTotalsForPage();
|
||||
$resultEstimatedTotals = $lintDb->getTotals();
|
||||
$this->assertGreaterThan( $resultTotals, $resultEstimatedTotals );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue