From 702ce215d09dcc921df779b4a079c52be68931a8 Mon Sep 17 00:00:00 2001 From: sbailey Date: Tue, 25 Jan 2022 15:27:07 -0800 Subject: [PATCH] Phase 3 migrate code for namespace column add to Linter table * Migrates namespace info from the page tables page_namespace field to the new linter table field linter_namespace. This duplication of the namespace value was requested to greatly reduce the amount of database activity required by the linter search and reporting code. * This patch has been prepared as a dark launch patch enabled with config value LinterMigrateNamespaceStage and assumes that the Linter table has had the linter_namespace column added to it, and recording of the namespace field is already enabled and is populating the namespace column. * The migrate code now runnable from Linter/maintenance directory, using migrateNamespace.php, which will be deployed in a separate patch. The maintenance code creates an appropriate environment to call migrateNamespace( in Database.php. Bug: T299612 Change-Id: I73cb80729d6a5a8716fe93164ad1e42e6958d672 --- extension.json | 3 + includes/Database.php | 109 ++++++++++++++++++++++++++++ tests/phpunit/RecordLintJobTest.php | 94 +++++++++++++++++++++++- 3 files changed, 204 insertions(+), 2 deletions(-) diff --git a/extension.json b/extension.json index 0245534e..a8d2dcc1 100644 --- a/extension.json +++ b/extension.json @@ -200,6 +200,9 @@ "LinterWriteNamespaceColumnStage": { "value": false }, + "LinterMigrateNamespaceStage": { + "value": false + }, "LinterWriteTagAndTemplateColumnsStage": { "value": false } diff --git a/includes/Database.php b/includes/Database.php index 3c5ba0c9..5955aaf3 100644 --- a/includes/Database.php +++ b/includes/Database.php @@ -25,6 +25,7 @@ use MediaWiki\Logger\LoggerFactory; use MediaWiki\MediaWikiServices; use WikiMap; use Wikimedia\Rdbms\DBConnRef; +use Wikimedia\Rdbms\SelectQueryBuilder; /** * Database logic @@ -420,4 +421,112 @@ class Database { $totalsLookup->touchAllCategoriesCache(); } + /** + * This code migrates namespace ID identified by the Linter records linter_page + * field and populates the new linter_namespace field if it is unpopulated. + * This code is intended to be run once though it could be run multiple times + * using `--force` if needed via the maintenance script. + * It is safe to run more than once, and will quickly exit if no records need updating. + * @param int $pageBatchSize + * @param int $linterBatchSize + * @param int $sleep + * @param bool $bypassConfig + * @return int number of pages updated, each with one or more linter records + */ + public static function migrateNamespace( int $pageBatchSize, + int $linterBatchSize, + int $sleep, + bool $bypassConfig = false ): int { + // code used by phpunit test, bypassed when run as a maintenance script + if ( !$bypassConfig ) { + $mwServices = MediaWikiServices::getInstance(); + $config = $mwServices->getMainConfig(); + $enableMigrateNamespaceStage = $config->get( 'LinterMigrateNamespaceStage' ); + if ( !$enableMigrateNamespaceStage ) { + return 0; + } + } + if ( gettype( $sleep ) !== 'integer' || $sleep < 0 ) { + $sleep = 0; + } + + $logger = LoggerFactory::getInstance( 'MigrateNamespaceChannel' ); + + $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $dbw = self::getDBConnectionRef( DB_PRIMARY ); + $dbread = self::getDBConnectionRef( DB_REPLICA ); + + $logger->info( "Migrate namespace starting\n" ); + + $updated = 0; + $lastElement = 0; + do { + // Gather some unique pageId values in linter table records into an array + $linterPages = []; + + $queryLinterTable = new SelectQueryBuilder( $dbw ); + $queryLinterTable + ->select( 'DISTINCT linter_page' ) + ->from( 'linter' ) + ->where( [ 'linter_namespace IS NULL', 'linter_page > ' . $lastElement ] ) + ->orderBy( 'linter_page' ) + ->limit( $linterBatchSize ) + ->caller( __METHOD__ ); + $result = $queryLinterTable->fetchResultSet(); + + foreach ( $result as $row ) { + $lastElement = intval( $row->linter_page ); + $linterPages[] = $lastElement; + } + $linterPagesLength = count( $linterPages ); + + $pageStartIndex = 0; + do { + $pageIdBatch = array_slice( $linterPages, $pageStartIndex, $pageBatchSize ); + + if ( count( $pageIdBatch ) > 0 ) { + + $queryPageTable = new SelectQueryBuilder( $dbread ); + $queryPageTable + ->fields( [ 'page_id', 'page_namespace' ] ) + ->from( 'page' ) + ->where( [ 'page_id' => $pageIdBatch ] ) + ->caller( __METHOD__ ); + + $pageResults = $queryPageTable->fetchResultSet(); + + foreach ( $pageResults as $pageRow ) { + $pageId = intval( $pageRow->page_id ); + $namespaceID = intval( $pageRow->page_namespace ); + + // If a record about to be updated has been removed by another process, + // the update will not error, and continue updating the existing records. + $dbw->update( + 'linter', + [ + 'linter_namespace' => $namespaceID + ], + [ 'linter_namespace IS NULL', 'linter_page = ' . $pageId ], + __METHOD__ + ); + $updated++; + } + + // Sleep between batches for replication to catch up + $lbFactory->waitForReplication(); + sleep( $sleep ); + } + + $pageStartIndex += $pageBatchSize; + } while ( $linterPagesLength > $pageStartIndex ); + + $logger->info( 'Migrated ' . $updated . " page IDs\n" ); + + } while ( $linterPagesLength > 0 ); + + $logger->info( "Migrate namespace finished!\n" ); + + return $updated; + } + } diff --git a/tests/phpunit/RecordLintJobTest.php b/tests/phpunit/RecordLintJobTest.php index 0e840e7e..572651bc 100644 --- a/tests/phpunit/RecordLintJobTest.php +++ b/tests/phpunit/RecordLintJobTest.php @@ -28,6 +28,7 @@ use MediaWikiIntegrationTestCase; use stdClass; use Title; use User; +use Wikimedia\Rdbms\SelectQueryBuilder; /** * @group Database @@ -36,13 +37,13 @@ use User; class RecordLintJobTest extends MediaWikiIntegrationTestCase { /** * @param string $titleText + * @param int|null $ns * @return array */ - private function createTitleAndPage( string $titleText ) { + private function createTitleAndPage( string $titleText, ?int $ns = 0 ) { $userName = 'LinterUser'; $baseText = 'wikitext test content'; - $ns = $this->getDefaultWikitextNS(); $title = Title::newFromText( $titleText, $ns ); $user = User::newFromName( $userName ); if ( $user->getId() === 0 ) { @@ -93,6 +94,22 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase { ); } + /** + * Get just the linter_namespace field value from the linter table for a page + * + * @param int $pageId + * @return mixed + */ + private function getNamespaceForPage( int $pageId ) { + $queryLinterPageNamespace = new SelectQueryBuilder( $this->db ); + $queryLinterPageNamespace + ->select( 'linter_namespace' ) + ->table( 'linter' ) + ->where( [ 'linter_page' => $pageId ] ) + ->caller( __METHOD__ ); + return $queryLinterPageNamespace->fetchField(); + } + public function testRun() { $error = [ 'type' => 'fostered', @@ -150,6 +167,79 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase { $this->assertEquals( $error[ 'params' ][ 'templateInfo' ][ 'name' ], $template ); } + /** + * @param string $titleText + * @param int $namespace + * @return array + */ + private function createTitleAndPageAndRunJob( string $titleText, int $namespace ): array { + $titleAndPage = $this->createTitleAndPage( $titleText, $namespace ); + $error = [ + 'type' => 'fostered', + 'location' => [ 0, 10 ], + 'params' => [], + 'dbid' => null, + ]; + $job = new RecordLintJob( $titleAndPage[ 'title' ], [ + 'errors' => [ $error ], + 'revision' => $titleAndPage[ 'revID' ] + ] ); + $this->assertTrue( $job->run() ); + return $titleAndPage; + } + + /** + * @param array $namespaceIds + * @param array $writeEnables + * @return array + */ + private function createPagesWithNamespace( array $namespaceIds, array $writeEnables ): array { + $titleAndPages = []; + foreach ( $namespaceIds as $index => $namespaceId ) { + // enable/disable writing the namespace field in the linter table during page creation + $this->overrideConfigValue( 'LinterWriteNamespaceColumnStage', $writeEnables[ $index ] ); + $titleAndPages[] = $this->createTitleAndPageAndRunJob( + 'TestPageNamespace' . $index, + intval( $namespaceId ) ); + } + return $titleAndPages; + } + + /** + * @param array $pages + * @param array $namespaceIds + * @return void + */ + private function checkPagesNamespace( array $pages, array $namespaceIds ) { + foreach ( $pages as $index => $page ) { + $pageId = $page[ 'pageID' ]; + $namespace = $this->getNamespaceForPage( $pageId ); + $namespaceId = $namespaceIds[ $index ]; + $this->assertSame( "$namespaceId", $namespace ); + } + } + + public function testMigrateNamespace() { + $this->overrideConfigValue( 'LinterMigrateNamespaceStage', true ); + + // Create groups of records that do not need migrating to ensure batching works properly + $namespaceIds = [ '0', '1', '2', '3', '4', '5', '4', '3', '2', '1', '0', '1', '2' ]; + $writeEnables = [ false, true, true, true, false, false, true, true, false, false, false, true, false ]; + + $titleAndPages = $this->createPagesWithNamespace( $namespaceIds, $writeEnables ); + + // Verify the create page function did not populate the linter_namespace field for TestPageNamespace0 + $pageId = $titleAndPages[ 0 ][ 'pageID' ]; + $namespace = $this->getNamespaceForPage( $pageId ); + $this->assertNull( $namespace ); + + // migrate unpopulated namespace_id(s) from the page table to linter table + Database::migrateNamespace( 2, 3, 0, false ); + + // Verify all linter records now have proper namespace IDs in the linter_namespace field + $this->checkPagesNamespace( $titleAndPages, $namespaceIds ); + } + public function testDropInlineMediaCaptionLints() { $error = [ 'type' => 'inline-media-caption',