From 201b47e01d3884458073870af12640135b0ed666 Mon Sep 17 00:00:00 2001 From: sbailey Date: Wed, 7 Apr 2021 12:33:16 -0700 Subject: [PATCH] Make Linter category counts more accurate when counts are low * The code now produces an accurate count if the number of errors for a category is below the threshold set by a public constant MAX_ACCURATE_COUNT (currently 20). The database record count limit was originally set to 1, to determine accurately, if there were actually 0 errors in a category as the estimate code would never report 0. If not 0, it would use the estimated count which does not produce an accurate count for any other number of errors. For low error counts this is annoying to editors and unnecessary. The additional CPU/disk activity to accurately check for low error counts is not significantly more than checking for 0 or 1, as checking for 0 likely requires a complete table scan which is probably expensive compared to a low count that early outs when it hits to record limit. * An improvement to consider is recording the accurate count in a separate tiny table, and maintaining an accurate count there which is used in preference to doing the select with row limit based on say a 30 second TTL, to prevent a stampede of requests from doing extraneous database operations. * Added unit test coverage for accurately counting low error conditions that are lower than the threshold and also verify that the estimate is inaccurate beyond the error count threshold. Bug: T194872 Change-Id: I4f74cfe3bf9601baa0dc8fa6464a68030ac2bc4b --- includes/Database.php | 21 ++++++++------ tests/phpunit/DatabaseTest.php | 51 +++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/includes/Database.php b/includes/Database.php index 1fbe0823..ac5a20a8 100644 --- a/includes/Database.php +++ b/includes/Database.php @@ -33,6 +33,7 @@ class Database { * for a page, the rest are just dropped */ public const MAX_PER_CAT = 20; + public const MAX_ACCURATE_COUNT = 20; /** * @var int @@ -246,21 +247,25 @@ class Database { */ private function getTotalsEstimate( $catId ) { $dbr = wfGetDB( DB_REPLICA ); - // First see if there are no rows, since the distinction - // between 0 and 1 is important. And estimateRowCount seems - // to never return 0. + // First see if there are no rows, or a moderate number + // within the limit specified by the MAX_ACCURATE_COUNT. + // The distinction between 0, a few and a lot is important + // to determine first, as estimateRowCount seem to never + // return 0 or accurate low error counts. $rows = $dbr->selectRowCount( 'linter', '*', [ 'linter_cat' => $catId ], __METHOD__, - [ 'LIMIT' => 1 ] + [ 'LIMIT' => self::MAX_ACCURATE_COUNT ] ); - if ( $rows === 0 ) { - return 0; + // Return an accurate count if the number of errors is + // below the maximum accurate count limit + if ( $rows < self::MAX_ACCURATE_COUNT ) { + return $rows; } - - // Now we can just estimate + // Now we can just estimate if the maximum accurate count limit + // was returned, which isn't the actual count but the limit reached return $dbr->estimateRowCount( 'linter', '*', diff --git a/tests/phpunit/DatabaseTest.php b/tests/phpunit/DatabaseTest.php index 35cc3ef3..0837e27e 100644 --- a/tests/phpunit/DatabaseTest.php +++ b/tests/phpunit/DatabaseTest.php @@ -29,7 +29,6 @@ use MediaWikiTestCase; * @covers MediaWiki\Linter\Database */ class DatabaseTest extends MediaWikiTestCase { - public function testConstructor() { $this->assertInstanceOf( Database::class, new Database( 5 ) ); } @@ -62,6 +61,16 @@ class DatabaseTest extends MediaWikiTestCase { $this->assertArrayEquals( $expectedIds, $actualIds ); } + private function createManyLintErrors( $lintDb, $errorCount ) { + $manyLintErrors = []; + for ( $i = 0; $i < $errorCount; $i++ ) { + $manyLintErrors[] = new LintError( + 'obsolete-tag', [ 15, 20 + $i ], [ 'name' => 'big' ] + ); + } + $lintDb->setForPage( $manyLintErrors ); + } + public function testSetForPage() { $lintDb = new Database( 5 ); $dummyErrors = $this->getDummyLintErrors(); @@ -69,11 +78,21 @@ class DatabaseTest extends MediaWikiTestCase { $this->assertSetForPageResult( $result, [], [ 'fostered' => 1, 'obsolete-tag' => 1 ] ); $this->assertLintErrorsEqual( $dummyErrors, $lintDb->getForPage() ); + // Accurate low error count values should match for both methods + $resultTotals = $lintDb->getTotalsForPage(); + $resultEstimatedTotals = $lintDb->getTotals(); + $this->assertEquals( $resultTotals, $resultEstimatedTotals ); + // Should delete the second error $result2 = $lintDb->setForPage( [ $dummyErrors[0] ] ); $this->assertSetForPageResult( $result2, [ 'obsolete-tag' => 1 ], [] ); $this->assertLintErrorsEqual( [ $dummyErrors[0] ], $lintDb->getForPage() ); + // Accurate low error count values should match for both methods + $resultTotals = $lintDb->getTotalsForPage(); + $resultEstimatedTotals = $lintDb->getTotals(); + $this->assertEquals( $resultTotals, $resultEstimatedTotals ); + // Insert the second error, delete the first $result3 = $lintDb->setForPage( [ $dummyErrors[1] ] ); $this->assertSetForPageResult( $result3, [ 'fostered' => 1 ], [ 'obsolete-tag' => 1 ] ); @@ -83,6 +102,36 @@ class DatabaseTest extends MediaWikiTestCase { $result4 = $lintDb->setForPage( [] ); $this->assertSetForPageResult( $result4, [ 'obsolete-tag' => 1 ], [] ); $this->assertLintErrorsEqual( [], $lintDb->getForPage() ); + + // Accurate low error count values should match for both methods + $resultTotals = $lintDb->getTotalsForPage(); + $resultEstimatedTotals = $lintDb->getTotals(); + $this->assertEquals( $resultTotals, $resultEstimatedTotals ); + + // For error counts below the MAX_ACCURATE_COUNT, both error + // count methods should return the same count. + self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT - 1 ); + $resultTotals = $lintDb->getTotalsForPage(); + $resultEstimatedTotals = $lintDb->getTotals(); + $this->assertEquals( $resultTotals, $resultEstimatedTotals ); + + // For error counts equal to or above the MAX_ACCURATE_COUNT, both error + // count methods should NOT return the same count in this test scenario + // because previously added and deleted records will be included + // in the estimated count which is normal. + self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT ); + $resultTotals = $lintDb->getTotalsForPage(); + $resultEstimatedTotals = $lintDb->getTotals(); + $this->assertNotEquals( $resultTotals, $resultEstimatedTotals ); + + // For error counts greatly above the MAX_ACCURATE_COUNT, the estimated + // count method should return a greater count in this test scenario + // because previously added and deleted records will be included + // in the estimated count which is normal. + self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT * 10 ); + $resultTotals = $lintDb->getTotalsForPage(); + $resultEstimatedTotals = $lintDb->getTotals(); + $this->assertGreaterThan( $resultTotals, $resultEstimatedTotals ); } }