Phase 3 of T175177: Migrate linter_params into new fields

* The migrate code is designed to perform a one-time update of
   linter_params JSON encoded template and tag information into
   the new discrete template and tag text fields for use as
   additional search criteria. The function can be restarted if
   it is interrupted.
 * It now uses configurable batching and sleep times between
   batches to allow the database to do other work and replication
   to occur without stressing infrastructure.
 * The migrate code is only called by test code and needs to be
   called one-time from a maintenance script.

Bug: T175177
Change-Id: Idc4ca88d4762bc7a3bcbc4e66c0f275562083867
This commit is contained in:
sbailey 2022-08-22 14:24:19 -07:00
parent 9a46876e95
commit 350d677c5b
3 changed files with 240 additions and 30 deletions

View file

@ -205,6 +205,9 @@
},
"LinterWriteTagAndTemplateColumnsStage": {
"value": false
},
"LinterMigrateTagAndTemplateColumnsStage": {
"value": false
}
},
"manifest_version": 2

View file

@ -23,6 +23,7 @@ namespace MediaWiki\Linter;
use FormatJson;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MediaWikiServices;
use stdClass;
use WikiMap;
use Wikimedia\Rdbms\DBConnRef;
use Wikimedia\Rdbms\SelectQueryBuilder;
@ -103,7 +104,7 @@ class Database {
/**
* Turn a database row into a LintError object
*
* @param \stdClass $row
* @param stdClass $row
* @return LintError|bool false on error
*/
public static function makeLintError( $row ) {
@ -529,4 +530,108 @@ class Database {
return $updated;
}
/**
* This code migrates the content of Linter record linter_params to linter_template and
* linter_tag fields if they are unpopulated or stale.
* This code should only be run once and thereafter disabled but must run to completion.
* It can be restarted if interrupted and will pick up where new divergences are found.
* Note: When linter_params are not set, the content is set to '[]' indicating no content
* and the code also handles a null linter_params field if found.
* This code is only run once by maintenance script migrateTagTemplate.php
* @param int $batchSize
* @param int $sleep
* @param bool $bypassConfig
* @return int
*/
public static function migrateTemplateAndTagInfo( int $batchSize,
int $sleep,
bool $bypassConfig = false
): int {
// code used by phpunit test, bypassed when run as a maintenance script
if ( !$bypassConfig ) {
$mwServices = MediaWikiServices::getInstance();
$config = $mwServices->getMainConfig();
$enableMigrateTagAndTemplateColumnsStage = $config->get( 'LinterMigrateTagAndTemplateColumnsStage' );
if ( !$enableMigrateTagAndTemplateColumnsStage ) {
return 0;
}
}
if ( gettype( $sleep ) !== 'integer' || $sleep < 0 ) {
$sleep = 0;
}
$logger = LoggerFactory::getInstance( 'MigrateTagAndTemplateChannel' );
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
$dbw = self::getDBConnectionRef( DB_PRIMARY );
$logger->info( "Migration of linter_params field to linter_tag and linter_template fields starting\n" );
$updated = 0;
$lastElement = 0;
do {
$queryLinterTable = new SelectQueryBuilder( $dbw );
$queryLinterTable
->table( 'linter' )
->fields( [ 'linter_id', 'linter_params', 'linter_template', 'linter_tag' ] )
->where( [ 'linter_params != \'[]\'', 'linter_params IS NOT NULL', 'linter_id > ' . $lastElement ] )
->orderBy( 'linter_id', selectQueryBuilder::SORT_ASC )
->limit( $batchSize )
->caller( __METHOD__ );
$results = $queryLinterTable->fetchResultSet();
$linterBatchLength = 0;
foreach ( $results as $row ) {
$linter_id = intval( $row->linter_id );
$lastElement = $linter_id;
$linter_params = FormatJson::decode( $row->linter_params );
$templateInfo = $linter_params->templateInfo ?? '';
if ( is_object( $templateInfo ) ) {
if ( isset( $templateInfo->multiPartTemplateBlock ) ) {
$templateInfo = 'multi-part-template-block';
} else {
$templateInfo = $templateInfo->name ?? '';
}
}
$tagInfo = $linter_params->name ?? '';
// compare the content of linter_params to the template and tag field contents
// and if they diverge, update the field with the correct template and tag info.
// This behavior allows this function to be restarted should it be interrupted
// and avoids repeating database record updates that are already correct due to
// having been populated when the error record was created with the new recordLintError
// write code that populates the template and tag fields, or for records populated
// during a previous but interrupted run of this migrate code.
if ( $templateInfo != $row->linter_template || $tagInfo != $row->linter_tag ) {
// If the record about to be updated has been removed by another process,
// the update will not do anything and just return with no records updated.
$dbw->update(
'linter',
[
'linter_template' => $templateInfo, 'linter_tag' => $tagInfo
],
[ 'linter_id' => $linter_id ],
__METHOD__
);
$updated++;
}
$linterBatchLength++;
}
// Sleep between batches for replication to catch up
$lbFactory->waitForReplication();
if ( $sleep > 0 ) {
sleep( $sleep );
}
$logger->info( 'Migrated ' . $updated . " linter IDs\n" );
} while ( $linterBatchLength > 0 );
$logger->info( "Migrate linter_params to linter_tag and linter_template fields finished!\n" );
return $updated;
}
}

View file

@ -25,7 +25,6 @@ use MediaWiki\Linter\Database;
use MediaWiki\Linter\LintError;
use MediaWiki\Linter\RecordLintJob;
use MediaWikiIntegrationTestCase;
use stdClass;
use Title;
use User;
use Wikimedia\Rdbms\SelectQueryBuilder;
@ -64,34 +63,32 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
* Get just the lint error linter_tag field value for a page
*
* @param int $pageId
* @return stdClass|bool
* @return mixed
*/
private static function getTagForPage( int $pageId ) {
return Database::getDBConnectionRef( DB_REPLICA )->selectRow(
'linter',
[
'linter_tag'
],
[ 'linter_page' => $pageId ],
__METHOD__
);
private function getTagForPage( int $pageId ) {
$queryPageTag = new SelectQueryBuilder( $this->db );
$queryPageTag
->select( 'linter_tag' )
->table( 'linter' )
->where( [ 'linter_page' => $pageId ] )
->caller( __METHOD__ );
return $queryPageTag->fetchField();
}
/**
* Get just the lint error linter_template field value for a page
*
* @param int $pageId
* @return stdClass|bool
* @return mixed
*/
private static function getTemplateForPage( int $pageId ) {
return Database::getDBConnectionRef( DB_REPLICA )->selectRow(
'linter',
[
'linter_template'
],
[ 'linter_page' => $pageId ],
__METHOD__
);
private function getTemplateForPage( int $pageId ) {
$queryPageTemplate = new SelectQueryBuilder( $this->db );
$queryPageTemplate
->select( 'linter_template' )
->table( 'linter' )
->where( [ 'linter_page' => $pageId ] )
->caller( __METHOD__ );
return $queryPageTemplate->fetchField();
}
/**
@ -123,7 +120,6 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
'revision' => $titleAndPage[ 'revID' ]
] );
$this->assertTrue( $job->run() );
$db = new Database( $titleAndPage[ 'pageID' ] );
/** @var LintError[] $errorsFromDb */
$errorsFromDb = array_values( $db->getForPage() );
@ -136,7 +132,6 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
public function testWriteTagAndTemplate() {
$this->overrideConfigValue( 'LinterWriteTagAndTemplateColumnsStage', true );
$error = [
'type' => 'obsolete-tag',
'location' => [ 0, 10 ],
@ -152,7 +147,6 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
'revision' => $titleAndPage[ 'revID' ]
] );
$this->assertTrue( $job->run() );
$pageId = $titleAndPage[ 'pageID' ];
$db = new Database( $pageId );
$errorsFromDb = array_values( $db->getForPage() );
@ -161,9 +155,9 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
$this->assertEquals( $error[ 'type' ], $errorsFromDb[0]->category );
$this->assertEquals( $error[ 'location' ], $errorsFromDb[0]->location );
$this->assertEquals( $error[ 'params' ], $errorsFromDb[0]->params );
$tag = self::getTagForPage( $pageId )->linter_tag ?? '';
$tag = $this->getTagForPage( $pageId );
$this->assertEquals( $error[ 'params' ][ 'name' ], $tag );
$template = self::getTemplateForPage( $pageId )->linter_template ?? '';
$template = $this->getTemplateForPage( $pageId );
$this->assertEquals( $error[ 'params' ][ 'templateInfo' ][ 'name' ], $template );
}
@ -240,6 +234,114 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
$this->checkPagesNamespace( $titleAndPages, $namespaceIds );
}
/**
* @param string $titleText
* @param array $error
* @return array
*/
private function createTitleAndPageForTagsAndRunJob( string $titleText, array $error ): array {
$titleAndPage = $this->createTitleAndPage( $titleText );
$job = new RecordLintJob( $titleAndPage[ 'title' ], [
'errors' => [ $error ],
'revision' => $titleAndPage[ 'revID' ]
] );
$this->assertTrue( $job->run() );
return $titleAndPage;
}
/**
* @param array $writeEnables
* @param array $error
* @return array
*/
private function createPagesWithTagAndTemplate( array $writeEnables, array $error ): array {
$titleAndPages = [];
foreach ( $writeEnables as $index => $enable ) {
// enable/disable writing the tag and template fields in the linter table during page creation
$this->overrideConfigValue( 'LinterWriteTagAndTemplateColumnsStage', $enable );
$titleAndPages[] = $this->createTitleAndPageForTagsAndRunJob( 'TestPage' . $index, $error );
}
return $titleAndPages;
}
/**
* @param array $pages
* @return void
*/
private function checkPagesTagAndTemplate( array $pages ) {
foreach ( $pages as $page ) {
$pageId = $page[ 'pageID' ];
$tag = $this->getTagForPage( $pageId );
$this->assertEquals( "center", $tag );
$template = $this->getTemplateForPage( $pageId );
$this->assertEquals( "Template:Echo", $template );
}
}
public function testMigrateTagAndTemplate() {
$this->overrideConfigValue( 'LinterMigrateTagAndTemplateColumnsStage', true );
$error = [
'type' => 'obsolete-tag',
'location' => [ 0, 10 ],
'params' => [ "name" => "center",
"templateInfo" => [ "name" => "Template:Echo" ] ],
'dbid' => null,
];
// Create groups of records that do not need migrating to ensure batching works properly
$writeEnables = [ false, true, true, true, false, false, true, true, false, false, false, true, false ];
$titleAndPages = $this->createPagesWithTagAndTemplate( $writeEnables, $error );
// Create special case test of migrate code encountering brackets - linter_params = '[]'
$this->overrideConfigValue( 'LinterWriteTagAndTemplateColumnsStage', false );
$error = [
'type' => 'wikilink-in-extlink',
'location' => [ 0, 10 ],
'params' => [],
'dbid' => null,
];
$titleAndPageBrackets = $this->createTitleAndPageForTagsAndRunJob(
'TestPageTagAndTemplateBrackets',
$error );
// Create special case test for migrate code encountering 'multi-part-template-block'
$error = [
'type' => 'obsolete-tag',
'location' => [ 0, 10 ],
'params' => '{"name":"center","templateInfo":{"multiPartTemplateBlock":true}}',
'dbid' => null,
];
$titleAndPageMultipart = $this->createTitleAndPageForTagsAndRunJob(
'TestPageTagAndTemplateMultipart',
$error );
// Verify the create page function did not populate the linter_tag and linter_template field for TestPage0
$pageId = $titleAndPages[ 0 ][ 'pageID' ];
$tag = $this->getTagForPage( $pageId );
$this->assertSame( "", $tag );
$template = $this->getTemplateForPage( $pageId );
$this->assertSame( "", $template );
// Migrate unpopulated tag and template info from the params field
Database::migrateTemplateAndTagInfo( 3, 0, false );
// Verify all linter records have the proper tag and template field info migrated from the params field
$this->checkPagesTagAndTemplate( $titleAndPages );
// Verify special case test of migrate code encountering brackets - linter_params = '[]'
$tag = $this->getTagForPage( $titleAndPageBrackets[ 'pageID' ] );
$this->assertSame( "", $tag );
$template = $this->getTemplateForPage( $titleAndPageBrackets[ 'pageID' ] );
$this->assertSame( "", $template );
// Verify special case test for migrate code encountering 'multi-part-template-block'
$tag = $this->getTagForPage( $titleAndPageMultipart[ 'pageID' ] );
$this->assertEquals( "center", $tag );
$template = $this->getTemplateForPage( $titleAndPageMultipart[ 'pageID' ] );
$this->assertEquals( "multi-part-template-block", $template );
}
public function testDropInlineMediaCaptionLints() {
$error = [
'type' => 'inline-media-caption',
@ -247,10 +349,10 @@ class RecordLintJobTest extends MediaWikiIntegrationTestCase {
'params' => [],
'dbid' => null,
];
$titleAndPage = $this->createTitleAndPage( 'TestPage3' );
$job = new RecordLintJob( $titleAndPage['title'], [
$titleAndPage = $this->createTitleAndPage( 'TestPageMediaCaption' );
$job = new RecordLintJob( $titleAndPage[ 'title' ], [
'errors' => [ $error ],
'revision' => $titleAndPage['revID']
'revision' => $titleAndPage[ 'revID' ]
] );
$this->assertTrue( $job->run() );
/** @var LintError[] $errorsFromDb */