Phase 3 migrate code for namespace column add to Linter table

* Migrates namespace info from the page tables page_namespace field
   to the new linter table field linter_namespace. This duplication
   of the namespace value was requested to greatly reduce the amount
   of database activity required by the linter search and reporting
   code.

 * This patch has been prepared as a dark launch patch enabled with
   config value LinterMigrateNamespaceStage and assumes that the
   Linter table has had the linter_namespace column added to it,
   and recording of the namespace field is already enabled and is
   populating the namespace column.

 * The migrate code now runnable from Linter/maintenance directory,
   using migrateNamespace.php, which will be deployed in a separate
   patch. The maintenance code creates an appropriate environment
   to call migrateNamespace( in Database.php.

Bug: T299612
Change-Id: I73cb80729d6a5a8716fe93164ad1e42e6958d672
This commit is contained in:
sbailey 2022-01-25 15:27:07 -08:00
parent babda020af
commit 702ce215d0
3 changed files with 204 additions and 2 deletions

View file

@ -200,6 +200,9 @@
"LinterWriteNamespaceColumnStage": {
"value": false
},
"LinterMigrateNamespaceStage": {
"value": false
},
"LinterWriteTagAndTemplateColumnsStage": {
"value": false
}

View file

@ -25,6 +25,7 @@ use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MediaWikiServices;
use WikiMap;
use Wikimedia\Rdbms\DBConnRef;
use Wikimedia\Rdbms\SelectQueryBuilder;
/**
* Database logic
@ -420,4 +421,112 @@ class Database {
$totalsLookup->touchAllCategoriesCache();
}
/**
* This code migrates namespace ID identified by the Linter records linter_page
* field and populates the new linter_namespace field if it is unpopulated.
* This code is intended to be run once though it could be run multiple times
* using `--force` if needed via the maintenance script.
* It is safe to run more than once, and will quickly exit if no records need updating.
* @param int $pageBatchSize
* @param int $linterBatchSize
* @param int $sleep
* @param bool $bypassConfig
* @return int number of pages updated, each with one or more linter records
*/
public static function migrateNamespace( int $pageBatchSize,
int $linterBatchSize,
int $sleep,
bool $bypassConfig = false ): int {
// code used by phpunit test, bypassed when run as a maintenance script
if ( !$bypassConfig ) {
$mwServices = MediaWikiServices::getInstance();
$config = $mwServices->getMainConfig();
$enableMigrateNamespaceStage = $config->get( 'LinterMigrateNamespaceStage' );
if ( !$enableMigrateNamespaceStage ) {
return 0;
}
}
if ( gettype( $sleep ) !== 'integer' || $sleep < 0 ) {
$sleep = 0;
}
$logger = LoggerFactory::getInstance( 'MigrateNamespaceChannel' );
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
$dbw = self::getDBConnectionRef( DB_PRIMARY );
$dbread = self::getDBConnectionRef( DB_REPLICA );
$logger->info( "Migrate namespace starting\n" );
$updated = 0;
$lastElement = 0;
do {
// Gather some unique pageId values in linter table records into an array
$linterPages = [];
$queryLinterTable = new SelectQueryBuilder( $dbw );
$queryLinterTable
->select( 'DISTINCT linter_page' )
->from( 'linter' )
->where( [ 'linter_namespace IS NULL', 'linter_page > ' . $lastElement ] )
->orderBy( 'linter_page' )
->limit( $linterBatchSize )
->caller( __METHOD__ );
$result = $queryLinterTable->fetchResultSet();
foreach ( $result as $row ) {
$lastElement = intval( $row->linter_page );
$linterPages[] = $lastElement;
}
$linterPagesLength = count( $linterPages );
$pageStartIndex = 0;
do {
$pageIdBatch = array_slice( $linterPages, $pageStartIndex, $pageBatchSize );
if ( count( $pageIdBatch ) > 0 ) {
$queryPageTable = new SelectQueryBuilder( $dbread );
$queryPageTable
->fields( [ 'page_id', 'page_namespace' ] )
->from( 'page' )
->where( [ 'page_id' => $pageIdBatch ] )
->caller( __METHOD__ );
$pageResults = $queryPageTable->fetchResultSet();
foreach ( $pageResults as $pageRow ) {
$pageId = intval( $pageRow->page_id );
$namespaceID = intval( $pageRow->page_namespace );
// If a record about to be updated has been removed by another process,
// the update will not error, and continue updating the existing records.
$dbw->update(
'linter',
[
'linter_namespace' => $namespaceID
],
[ 'linter_namespace IS NULL', 'linter_page = ' . $pageId ],
__METHOD__
);
$updated++;
}
// Sleep between batches for replication to catch up
$lbFactory->waitForReplication();
sleep( $sleep );
}
$pageStartIndex += $pageBatchSize;
} while ( $linterPagesLength > $pageStartIndex );
$logger->info( 'Migrated ' . $updated . " page IDs\n" );
} while ( $linterPagesLength > 0 );
$logger->info( "Migrate namespace finished!\n" );
return $updated;
}
}

View file

@ -28,6 +28,7 @@ use MediaWikiIntegrationTestCase;
use stdClass;
use Title;
use User;
use Wikimedia\Rdbms\SelectQueryBuilder;
/**
* @group Database
@ -36,13 +37,13 @@ use User;
class RecordLintJobTest extends MediaWikiIntegrationTestCase {
/**
* @param string $titleText
* @param int|null $ns
* @return array
*/
private function createTitleAndPage( string $titleText ) {
private function createTitleAndPage( string $titleText, ?int $ns = 0 ) {
$userName = 'LinterUser';
$baseText = 'wikitext test content';
$ns = $this->getDefaultWikitextNS();
$title = Title::newFromText( $titleText, $ns );
$user = User::newFromName( $userName );
if ( $user->getId() === 0 ) {
@ -93,6 +94,22 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
);
}
/**
* Get just the linter_namespace field value from the linter table for a page
*
* @param int $pageId
* @return mixed
*/
private function getNamespaceForPage( int $pageId ) {
$queryLinterPageNamespace = new SelectQueryBuilder( $this->db );
$queryLinterPageNamespace
->select( 'linter_namespace' )
->table( 'linter' )
->where( [ 'linter_page' => $pageId ] )
->caller( __METHOD__ );
return $queryLinterPageNamespace->fetchField();
}
public function testRun() {
$error = [
'type' => 'fostered',
@ -150,6 +167,79 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
$this->assertEquals( $error[ 'params' ][ 'templateInfo' ][ 'name' ], $template );
}
/**
* @param string $titleText
* @param int $namespace
* @return array
*/
private function createTitleAndPageAndRunJob( string $titleText, int $namespace ): array {
$titleAndPage = $this->createTitleAndPage( $titleText, $namespace );
$error = [
'type' => 'fostered',
'location' => [ 0, 10 ],
'params' => [],
'dbid' => null,
];
$job = new RecordLintJob( $titleAndPage[ 'title' ], [
'errors' => [ $error ],
'revision' => $titleAndPage[ 'revID' ]
] );
$this->assertTrue( $job->run() );
return $titleAndPage;
}
/**
* @param array $namespaceIds
* @param array $writeEnables
* @return array
*/
private function createPagesWithNamespace( array $namespaceIds, array $writeEnables ): array {
$titleAndPages = [];
foreach ( $namespaceIds as $index => $namespaceId ) {
// enable/disable writing the namespace field in the linter table during page creation
$this->overrideConfigValue( 'LinterWriteNamespaceColumnStage', $writeEnables[ $index ] );
$titleAndPages[] = $this->createTitleAndPageAndRunJob(
'TestPageNamespace' . $index,
intval( $namespaceId ) );
}
return $titleAndPages;
}
/**
* @param array $pages
* @param array $namespaceIds
* @return void
*/
private function checkPagesNamespace( array $pages, array $namespaceIds ) {
foreach ( $pages as $index => $page ) {
$pageId = $page[ 'pageID' ];
$namespace = $this->getNamespaceForPage( $pageId );
$namespaceId = $namespaceIds[ $index ];
$this->assertSame( "$namespaceId", $namespace );
}
}
public function testMigrateNamespace() {
$this->overrideConfigValue( 'LinterMigrateNamespaceStage', true );
// Create groups of records that do not need migrating to ensure batching works properly
$namespaceIds = [ '0', '1', '2', '3', '4', '5', '4', '3', '2', '1', '0', '1', '2' ];
$writeEnables = [ false, true, true, true, false, false, true, true, false, false, false, true, false ];
$titleAndPages = $this->createPagesWithNamespace( $namespaceIds, $writeEnables );
// Verify the create page function did not populate the linter_namespace field for TestPageNamespace0
$pageId = $titleAndPages[ 0 ][ 'pageID' ];
$namespace = $this->getNamespaceForPage( $pageId );
$this->assertNull( $namespace );
// migrate unpopulated namespace_id(s) from the page table to linter table
Database::migrateNamespace( 2, 3, 0, false );
// Verify all linter records now have proper namespace IDs in the linter_namespace field
$this->checkPagesNamespace( $titleAndPages, $namespaceIds );
}
public function testDropInlineMediaCaptionLints() {
$error = [
'type' => 'inline-media-caption',