mediawiki-extensions-Discus.../maintenance/persistRevisionThreadItems.php
daniel a6a12730fc Don't write to parser cache from maintenance script
The parser cache for parsoid output isn't yet ready for full load.
Don't flood it when running batch operations.

Change-Id: I77f3de30b0500f0e5c593f4d31dceef7720f848e
2022-12-13 16:26:03 +00:00

188 lines
5.5 KiB
PHP

<?php
namespace MediaWiki\Extension\DiscussionTools\Maintenance;
use IDatabase;
use Language;
use Maintenance;
use MediaWiki\Extension\DiscussionTools\Hooks\HookUtils;
use MediaWiki\Extension\DiscussionTools\ThreadItemStore;
use MediaWiki\MediaWikiServices;
use MediaWiki\Revision\RevisionStore;
use MediaWiki\Shell\Shell;
use MWExceptionRenderer;
use stdClass;
use Throwable;
use Title;
use Wikimedia\Rdbms\SelectQueryBuilder;
$IP = getenv( 'MW_INSTALL_PATH' );
if ( $IP === false ) {
$IP = __DIR__ . '/../../..';
}
require_once "$IP/maintenance/Maintenance.php";
class PersistRevisionThreadItems extends Maintenance {
private IDatabase $dbr;
private ThreadItemStore $itemStore;
private RevisionStore $revStore;
private Language $lang;
public function __construct() {
parent::__construct();
$this->requireExtension( 'DiscussionTools' );
$this->addDescription( 'Persist thread item information for the given pages/revisions' );
$this->addOption( 'rev', 'Revision ID to process', false, true, false, true );
$this->addOption( 'page', 'Page title to process', false, true, false, true );
$this->addOption( 'all', 'Process the whole wiki' );
$this->addOption( 'current', 'Process current revisions only' );
$this->addOption( 'start', 'Restart from this position (as printed by the script)', false, true );
$this->setBatchSize( 100 );
}
public function execute() {
$services = MediaWikiServices::getInstance();
$this->dbr = $this->getDB( DB_REPLICA );
$this->itemStore = $services->getService( 'DiscussionTools.ThreadItemStore' );
$this->revStore = $services->getRevisionStore();
$this->lang = $services->getLanguageFactory()->getLanguage( 'en' );
$qb = $this->dbr->newSelectQueryBuilder();
$qb->queryInfo( $this->revStore->getQueryInfo( [ 'page' ] ) );
if ( $this->getOption( 'all' ) ) {
// Do nothing
} elseif ( $this->getOption( 'page' ) ) {
$linkBatch = $services->getLinkBatchFactory()->newLinkBatch();
foreach ( $this->getOption( 'page' ) as $page ) {
$linkBatch->addObj( Title::newFromText( $page ) );
}
$pageIds = array_map( static function ( $page ) {
return $page->getId();
}, $linkBatch->getPageIdentities() );
$qb->where( [ 'rev_page' => $pageIds ] );
} elseif ( $this->getOption( 'rev' ) ) {
$qb->where( [ 'rev_id' => $this->getOption( 'rev' ) ] );
} else {
$this->error( "One of 'all', 'page', or 'rev' required" );
$this->maybeHelp( true );
return;
}
if ( $this->getOption( 'current' ) ) {
$qb->where( 'rev_id = page_latest' );
$index = [ 'page_id' ];
} else {
// Process in order by page and time to avoid confusing results while the script is running
$index = [ 'rev_page', 'rev_timestamp', 'rev_id' ];
}
$this->process( $qb, $index );
}
/**
* @param SelectQueryBuilder $qb
* @param array $index
*/
private function process( SelectQueryBuilder $qb, array $index ): void {
$start = microtime( true );
$qb->caller( __METHOD__ );
// estimateRowCount() refuses to work when fields are set, so we can't just call it on $qb
$countQueryInfo = $qb->getQueryInfo();
$count = $qb->newSubquery()
->rawTables( $countQueryInfo['tables'] )
->where( $countQueryInfo['conds'] )
->options( $countQueryInfo['options'] )
->joinConds( $countQueryInfo['join_conds'] )
->caller( __METHOD__ )
->estimateRowCount();
$this->output( "Processing... (estimated $count rows)\n" );
$processed = 0;
$updated = 0;
$qb->orderBy( $index );
$batchSize = $this->getBatchSize();
$qb->limit( $batchSize );
$batchStart = null;
if ( $this->getOption( 'start' ) ) {
$batchStart = json_decode( $this->getOption( 'start' ) );
if ( !$batchStart ) {
$this->error( "Invalid 'start'" );
}
}
while ( true ) {
$qbForBatch = clone $qb;
if ( $batchStart ) {
$batchStartCond = $this->dbr->buildComparison( '>', array_combine( $index, $batchStart ) );
$qbForBatch->where( $batchStartCond );
$batchStartOutput = Shell::escape( json_encode( $batchStart ) );
$this->output( "--start $batchStartOutput\n" );
}
$res = $qbForBatch->fetchResultSet();
foreach ( $res as $row ) {
$updated += (int)$this->processRow( $row );
}
$processed += $res->numRows();
$this->output( "Processed $processed (updated $updated) of $count rows\n" );
$this->waitForReplication();
if ( $res->numRows() < $batchSize || !isset( $row ) ) {
// Done
break;
}
// Update the conditions to select the next batch.
$batchStart = [];
foreach ( $index as $field ) {
$batchStart[] = $row->$field;
}
}
$duration = microtime( true ) - $start;
$durationFormatted = $this->lang->formatTimePeriod( $duration );
$this->output( "Finished in $durationFormatted\n" );
}
/**
* @param stdClass $row Database table row
* @return bool
*/
private function processRow( stdClass $row ): bool {
$changed = false;
try {
$rev = $this->revStore->newRevisionFromRow( $row );
$title = Title::newFromLinkTarget(
$rev->getPageAsLinkTarget()
);
if ( HookUtils::isAvailableForTitle( $title ) ) {
$threadItemSet = HookUtils::parseRevisionParsoidHtml( $rev, false );
// Store permalink data
$changed = $this->itemStore->insertThreadItems( $rev, $threadItemSet );
}
} catch ( Throwable $e ) {
$this->output( "Error while processing revid=$row->rev_id, pageid=$row->rev_page\n" );
MWExceptionRenderer::output( $e, MWExceptionRenderer::AS_RAW );
}
return $changed;
}
}
$maintClass = PersistRevisionThreadItems::class;
require_once RUN_MAINTENANCE_IF_MAIN;