2012-05-07 16:28:18 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
$IP = getenv( 'MW_INSTALL_PATH' );
|
|
|
|
if ( $IP === false ) {
|
2015-11-11 17:33:46 +00:00
|
|
|
$IP = __DIR__ . '/../../..';
|
2012-05-07 16:28:18 +00:00
|
|
|
}
|
2017-05-30 19:49:44 +00:00
|
|
|
require_once "$IP/maintenance/Maintenance.php";
|
2012-05-07 16:28:18 +00:00
|
|
|
|
2023-08-19 04:18:19 +00:00
|
|
|
use MediaWiki\Title\Title;
|
2016-12-05 17:45:34 +00:00
|
|
|
use PageImages\Job\InitImageDataJob;
|
2016-12-02 00:21:17 +00:00
|
|
|
|
2015-11-16 14:59:34 +00:00
|
|
|
/**
|
2018-05-25 04:42:29 +00:00
|
|
|
* @license WTFPL
|
2015-11-17 08:54:24 +00:00
|
|
|
* @author Max Semenik
|
2015-11-16 14:59:34 +00:00
|
|
|
*/
|
2012-05-07 16:28:18 +00:00
|
|
|
class InitImageData extends Maintenance {
|
|
|
|
public function __construct() {
|
|
|
|
parent::__construct();
|
2019-04-06 15:08:57 +00:00
|
|
|
$this->addDescription( 'Initializes PageImages data' );
|
2016-12-02 00:21:17 +00:00
|
|
|
$this->addOption( 'namespaces',
|
|
|
|
'Comma-separated list of namespace(s) to refresh', false, true );
|
|
|
|
$this->addOption( 'earlier-than',
|
2018-04-03 17:20:42 +00:00
|
|
|
'Run only on pages touched earlier than this timestamp', false, true );
|
|
|
|
$this->addOption( 'later-than',
|
|
|
|
'Run only on pages touched later than this timestamp', false, true );
|
2016-12-02 00:21:17 +00:00
|
|
|
$this->addOption( 'start', 'Starting page ID', false, true );
|
2017-05-30 19:49:44 +00:00
|
|
|
$this->addOption( 'queue-pressure', 'Maximum number of jobs to enqueue at a time. ' .
|
|
|
|
'If not provided or 0 will be run in-process.', false, true );
|
2016-12-05 17:45:34 +00:00
|
|
|
$this->addOption( 'quiet', "Don't report on job queue pressure" );
|
2016-12-02 00:21:17 +00:00
|
|
|
$this->setBatchSize( 100 );
|
2018-01-28 14:37:24 +00:00
|
|
|
|
|
|
|
$this->requireExtension( 'PageImages' );
|
2012-05-07 16:28:18 +00:00
|
|
|
}
|
|
|
|
|
2017-12-06 22:44:34 +00:00
|
|
|
/**
|
|
|
|
* Do the actual work of filling out page images
|
|
|
|
*/
|
2012-05-07 16:28:18 +00:00
|
|
|
public function execute() {
|
2016-12-05 17:45:34 +00:00
|
|
|
$lastId = $this->getOption( 'start', 0 );
|
|
|
|
$isQuiet = $this->getOption( 'quiet', false );
|
|
|
|
$queue = null;
|
|
|
|
$maxPressure = $this->getOption( 'queue-pressure', 0 );
|
|
|
|
if ( $maxPressure > 0 ) {
|
2023-10-15 11:52:58 +00:00
|
|
|
$queue = $this->getServiceContainer()->getJobQueueGroup();
|
2016-12-05 17:45:34 +00:00
|
|
|
}
|
2012-05-07 16:28:18 +00:00
|
|
|
|
|
|
|
do {
|
2023-10-15 18:48:50 +00:00
|
|
|
$dbr = $this->getServiceContainer()->getDBLoadBalancerFactory()
|
|
|
|
->getReplicaDatabase();
|
2024-04-21 10:41:53 +00:00
|
|
|
$queryBuilder = $dbr->newSelectQueryBuilder()
|
|
|
|
->select( 'page_id' )
|
|
|
|
->from( 'page' )
|
|
|
|
->leftJoin( 'imagelinks', null, 'page_id = il_from' )
|
|
|
|
->where( [
|
|
|
|
$dbr->expr( 'page_id', '>', (int)$lastId ),
|
|
|
|
$dbr->expr( 'il_from', '!=', null ),
|
|
|
|
'page_is_redirect' => 0,
|
|
|
|
] )
|
|
|
|
->orderBy( 'page_id' )
|
|
|
|
->groupBy( 'page_id' )
|
|
|
|
->limit( $this->mBatchSize )
|
|
|
|
->caller( __METHOD__ );
|
2012-05-07 16:28:18 +00:00
|
|
|
if ( $this->hasOption( 'namespaces' ) ) {
|
|
|
|
$ns = explode( ',', $this->getOption( 'namespaces' ) );
|
2024-04-21 10:41:53 +00:00
|
|
|
$queryBuilder->andWhere( [ 'page_namespace' => $ns ] );
|
2012-05-07 16:28:18 +00:00
|
|
|
} else {
|
2024-04-21 10:41:53 +00:00
|
|
|
$queryBuilder->andWhere( [
|
|
|
|
'page_namespace' => $this->getServiceContainer()->getMainConfig()->get( 'PageImagesNamespaces' )
|
|
|
|
] );
|
2012-05-07 16:28:18 +00:00
|
|
|
}
|
|
|
|
if ( $this->hasOption( 'earlier-than' ) ) {
|
2024-04-21 10:41:53 +00:00
|
|
|
$queryBuilder->andWhere(
|
|
|
|
$dbr->expr( 'page_touched', '<', $dbr->timestamp( $this->getOption( 'earlier-than' ) ) )
|
|
|
|
);
|
2012-05-07 16:28:18 +00:00
|
|
|
}
|
2018-04-03 17:20:42 +00:00
|
|
|
if ( $this->hasOption( 'later-than' ) ) {
|
2024-04-21 10:41:53 +00:00
|
|
|
$queryBuilder->andWhere(
|
|
|
|
$dbr->expr( 'page_touched', '>', $dbr->timestamp( $this->getOption( 'later-than' ) ) )
|
|
|
|
);
|
2012-05-07 16:28:18 +00:00
|
|
|
}
|
2024-04-21 10:41:53 +00:00
|
|
|
$pageIds = $queryBuilder->fetchFieldValues();
|
2023-10-15 12:18:57 +00:00
|
|
|
$job = new InitImageDataJob(
|
|
|
|
Title::newMainPage(),
|
|
|
|
[ 'page_ids' => $pageIds ],
|
|
|
|
$this->getServiceContainer()->getDBLoadBalancerFactory()
|
|
|
|
);
|
2016-12-05 17:45:34 +00:00
|
|
|
if ( $queue === null ) {
|
|
|
|
$job->run();
|
|
|
|
} else {
|
|
|
|
$queue->push( $job );
|
|
|
|
$this->waitForMaxPressure( $queue, $maxPressure, $isQuiet );
|
|
|
|
}
|
|
|
|
$lastId = end( $pageIds );
|
|
|
|
$this->output( "$lastId\n" );
|
2024-04-21 10:41:53 +00:00
|
|
|
} while ( $pageIds );
|
2012-05-07 16:28:18 +00:00
|
|
|
$this->output( "done\n" );
|
|
|
|
}
|
2016-12-05 17:45:34 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @param JobQueueGroup $queue The job queue to fetch pressure from
|
|
|
|
* @param int $maxPressure The maximum number of queued + active
|
|
|
|
* jobs that can exist when returning
|
|
|
|
* @param bool $isQuiet When false report on job queue pressure every 10s
|
|
|
|
*/
|
|
|
|
private function waitForMaxPressure( JobQueueGroup $queue, $maxPressure, $isQuiet ) {
|
|
|
|
$group = $queue->get( 'InitImageDataJob' );
|
|
|
|
$i = 0;
|
|
|
|
do {
|
|
|
|
sleep( 1 );
|
|
|
|
$queued = $group->getSize();
|
|
|
|
$running = $group->getAcquiredCount();
|
2016-12-08 19:46:46 +00:00
|
|
|
$abandoned = $group->getAbandonedCount();
|
|
|
|
|
2016-12-05 17:45:34 +00:00
|
|
|
if ( !$isQuiet && ++$i % 10 === 0 ) {
|
2017-05-30 19:49:44 +00:00
|
|
|
$now = date( 'Y-m-d H:i:s T' );
|
|
|
|
$this->output( "[$now] Queued: $queued Running: $running " .
|
|
|
|
"Abandoned: $abandoned Max: $maxPressure\n" );
|
2016-12-05 17:45:34 +00:00
|
|
|
}
|
2016-12-08 19:46:46 +00:00
|
|
|
} while ( $queued + $running - $abandoned >= $maxPressure );
|
2016-12-05 17:45:34 +00:00
|
|
|
}
|
2012-05-07 16:28:18 +00:00
|
|
|
}
|
|
|
|
|
2019-03-02 16:50:46 +00:00
|
|
|
$maintClass = InitImageData::class;
|
2018-03-17 20:08:55 +00:00
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|