mediawiki-extensions-Linter/tests/phpunit/DatabaseTest.php
sbailey 201b47e01d Make Linter category counts more accurate when counts are low
* The code now produces an accurate count if the number of
   errors for a category is below the threshold set by a
   public constant MAX_ACCURATE_COUNT (currently 20).
   The database record count limit was originally set to 1,
   to determine accurately, if there were actually 0 errors
   in a category as the estimate code would never report 0.
   If not 0, it would use the estimated count which does not
   produce an accurate count for any other number of errors.
   For low error counts this is annoying to editors and
   unnecessary. The additional CPU/disk activity to accurately
   check for low error counts is not significantly more than
   checking for 0 or 1, as checking for 0 likely requires
   a complete table scan which is probably expensive compared
   to a low count that early outs when it hits to record limit.

 * An improvement to consider is recording the accurate count in
   a separate tiny table, and maintaining an accurate count there
   which is used in preference to doing the select with row limit
   based on say a 30 second TTL, to prevent a stampede of requests
   from doing extraneous database operations.

 * Added unit test coverage for accurately counting low error
   conditions that are lower than the threshold and also verify
   that the estimate is inaccurate beyond the error count
   threshold.

Bug: T194872
Change-Id: I4f74cfe3bf9601baa0dc8fa6464a68030ac2bc4b
2021-04-27 10:38:24 -07:00

138 lines
5.2 KiB
PHP

<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace MediaWiki\Linter\Test;
use MediaWiki\Linter\Database;
use MediaWiki\Linter\LintError;
use MediaWikiTestCase;
/**
* @group Database
* @covers MediaWiki\Linter\Database
*/
class DatabaseTest extends MediaWikiTestCase {
public function testConstructor() {
$this->assertInstanceOf( Database::class, new Database( 5 ) );
}
private function getDummyLintErrors() {
return [
new LintError(
'fostered', [ 0, 10 ], []
),
new LintError(
'obsolete-tag', [ 15, 20 ], [ 'name' => 'big' ]
),
];
}
private function assertSetForPageResult( $result, $deleted, $added ) {
$this->assertArrayHasKey( 'deleted', $result );
$this->assertEquals( $deleted, $result['deleted'] );
$this->assertArrayHasKey( 'added', $result );
$this->assertEquals( $added, $result['added'] );
}
private function assertLintErrorsEqual( $expected, $actual ) {
$expectedIds = array_map( function ( LintError $error ) {
return $error->id();
}, $expected );
$actualIds = array_map( function ( LintError $error ) {
return $error->id();
}, $actual );
$this->assertArrayEquals( $expectedIds, $actualIds );
}
private function createManyLintErrors( $lintDb, $errorCount ) {
$manyLintErrors = [];
for ( $i = 0; $i < $errorCount; $i++ ) {
$manyLintErrors[] = new LintError(
'obsolete-tag', [ 15, 20 + $i ], [ 'name' => 'big' ]
);
}
$lintDb->setForPage( $manyLintErrors );
}
public function testSetForPage() {
$lintDb = new Database( 5 );
$dummyErrors = $this->getDummyLintErrors();
$result = $lintDb->setForPage( $dummyErrors );
$this->assertSetForPageResult( $result, [], [ 'fostered' => 1, 'obsolete-tag' => 1 ] );
$this->assertLintErrorsEqual( $dummyErrors, $lintDb->getForPage() );
// Accurate low error count values should match for both methods
$resultTotals = $lintDb->getTotalsForPage();
$resultEstimatedTotals = $lintDb->getTotals();
$this->assertEquals( $resultTotals, $resultEstimatedTotals );
// Should delete the second error
$result2 = $lintDb->setForPage( [ $dummyErrors[0] ] );
$this->assertSetForPageResult( $result2, [ 'obsolete-tag' => 1 ], [] );
$this->assertLintErrorsEqual( [ $dummyErrors[0] ], $lintDb->getForPage() );
// Accurate low error count values should match for both methods
$resultTotals = $lintDb->getTotalsForPage();
$resultEstimatedTotals = $lintDb->getTotals();
$this->assertEquals( $resultTotals, $resultEstimatedTotals );
// Insert the second error, delete the first
$result3 = $lintDb->setForPage( [ $dummyErrors[1] ] );
$this->assertSetForPageResult( $result3, [ 'fostered' => 1 ], [ 'obsolete-tag' => 1 ] );
$this->assertLintErrorsEqual( [ $dummyErrors[1] ], $lintDb->getForPage() );
// Delete the second (only) error
$result4 = $lintDb->setForPage( [] );
$this->assertSetForPageResult( $result4, [ 'obsolete-tag' => 1 ], [] );
$this->assertLintErrorsEqual( [], $lintDb->getForPage() );
// Accurate low error count values should match for both methods
$resultTotals = $lintDb->getTotalsForPage();
$resultEstimatedTotals = $lintDb->getTotals();
$this->assertEquals( $resultTotals, $resultEstimatedTotals );
// For error counts below the MAX_ACCURATE_COUNT, both error
// count methods should return the same count.
self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT - 1 );
$resultTotals = $lintDb->getTotalsForPage();
$resultEstimatedTotals = $lintDb->getTotals();
$this->assertEquals( $resultTotals, $resultEstimatedTotals );
// For error counts equal to or above the MAX_ACCURATE_COUNT, both error
// count methods should NOT return the same count in this test scenario
// because previously added and deleted records will be included
// in the estimated count which is normal.
self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT );
$resultTotals = $lintDb->getTotalsForPage();
$resultEstimatedTotals = $lintDb->getTotals();
$this->assertNotEquals( $resultTotals, $resultEstimatedTotals );
// For error counts greatly above the MAX_ACCURATE_COUNT, the estimated
// count method should return a greater count in this test scenario
// because previously added and deleted records will be included
// in the estimated count which is normal.
self::createManyLintErrors( $lintDb, Database::MAX_ACCURATE_COUNT * 10 );
$resultTotals = $lintDb->getTotalsForPage();
$resultEstimatedTotals = $lintDb->getTotals();
$this->assertGreaterThan( $resultTotals, $resultEstimatedTotals );
}
}