Use ParserOutputAccess for LintUpdate job

This avoids a duplicate parse with DiscussionTools (T376325) and also
reduces some redundancy by using the metrics-gathering code from
ParserOutput instead of having to clone it here.  Finally, it allows
the parse to use the output of a previous parse for selective
update.

Bug: T376325
Follows-Up: I64a4556a74da4f735a5b562070c21310ecda36d1
Change-Id: I11386e307caaa9fce34870b08bd4dce4c5e6eb25
This commit is contained in:
C. Scott Ananian 2024-10-02 16:54:01 -04:00
parent 2c0f27bea2
commit e6a510fbed
4 changed files with 39 additions and 57 deletions

View file

@ -31,7 +31,7 @@
"LinkRenderer",
"JobQueueGroup",
"WikiPageFactory",
"StatsFactory",
"ParserOutputAccess",
"Linter.CategoryManager",
"Linter.TotalsLookup",
"Linter.Database",

View file

@ -37,6 +37,7 @@ use MediaWiki\Output\Hook\BeforePageDisplayHook;
use MediaWiki\Output\OutputPage;
use MediaWiki\Page\Hook\RevisionFromEditCompleteHook;
use MediaWiki\Page\Hook\WikiPageDeletionUpdatesHook;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Revision\RenderedRevision;
use MediaWiki\Revision\RevisionRecord;
@ -45,7 +46,6 @@ use MediaWiki\Storage\Hook\RevisionDataUpdatesHook;
use MediaWiki\Title\Title;
use MediaWiki\User\UserIdentity;
use Skin;
use Wikimedia\Stats\StatsFactory;
use WikiPage;
class Hooks implements
@ -60,7 +60,7 @@ class Hooks implements
private LinkRenderer $linkRenderer;
private JobQueueGroup $jobQueueGroup;
private WikiPageFactory $wikiPageFactory;
private StatsFactory $statsFactory;
private ParserOutputAccess $parserOutputAccess;
private CategoryManager $categoryManager;
private TotalsLookup $totalsLookup;
private Database $database;
@ -70,7 +70,7 @@ class Hooks implements
* @param LinkRenderer $linkRenderer
* @param JobQueueGroup $jobQueueGroup
* @param WikiPageFactory $wikiPageFactory
* @param StatsFactory $statsFactory
* @param ParserOutputAccess $parserOutputAccess
* @param CategoryManager $categoryManager
* @param TotalsLookup $totalsLookup
* @param Database $database
@ -79,7 +79,7 @@ class Hooks implements
LinkRenderer $linkRenderer,
JobQueueGroup $jobQueueGroup,
WikiPageFactory $wikiPageFactory,
StatsFactory $statsFactory,
ParserOutputAccess $parserOutputAccess,
CategoryManager $categoryManager,
TotalsLookup $totalsLookup,
Database $database,
@ -88,7 +88,7 @@ class Hooks implements
$this->linkRenderer = $linkRenderer;
$this->jobQueueGroup = $jobQueueGroup;
$this->wikiPageFactory = $wikiPageFactory;
$this->statsFactory = $statsFactory;
$this->parserOutputAccess = $parserOutputAccess;
$this->categoryManager = $categoryManager;
$this->totalsLookup = $totalsLookup;
$this->database = $database;
@ -364,7 +364,7 @@ class Hooks implements
$updates[] = new LintUpdate(
$this->wikiPageFactory,
$this->statsFactory,
$this->parserOutputAccess,
$renderedRevision,
);
}

View file

@ -20,32 +20,29 @@
namespace MediaWiki\Linter;
use MediaWiki\Content\Renderer\ContentParseParams;
use MediaWiki\Content\TextContent;
use MediaWiki\Deferred\DataUpdate;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Revision\RenderedRevision;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use Wikimedia\Stats\StatsFactory;
class LintUpdate extends DataUpdate {
private WikiPageFactory $wikiPageFactory;
private StatsFactory $statsFactory;
private ParserOutputAccess $parserOutputAccess;
private RenderedRevision $renderedRevision;
public function __construct(
WikiPageFactory $wikiPageFactory,
StatsFactory $statsFactory,
ParserOutputAccess $parserOutputAccess,
RenderedRevision $renderedRevision
) {
parent::__construct();
$this->wikiPageFactory = $wikiPageFactory;
$this->statsFactory = $statsFactory;
$this->parserOutputAccess = $parserOutputAccess;
$this->renderedRevision = $renderedRevision;
}
@ -70,9 +67,6 @@ class LintUpdate extends DataUpdate {
$pOptions->setUseParsoid();
$pOptions->setRenderReason( 'LintUpdate' );
// XXX no previous output available on this code path
$previousOutput = null;
LoggerFactory::getInstance( 'Linter' )->debug(
'{method}: Parsing {page}',
[
@ -86,41 +80,12 @@ class LintUpdate extends DataUpdate {
// This matches the behavior of RefreshLinksJob.
// However, unlike RefreshLinksJob, we don't parse if we already
// have the output in the cache. This avoids duplicating the effort
// of ParsoidCachePrewarmJob.
$cpoParams = new ContentParseParams(
$rev->getPage(),
$rev->getId(),
$pOptions,
// no need to generate HTML
false,
$previousOutput
// of ParsoidCachePrewarmJob / DiscussionTools
// (note that even with OPT_NO_UPDATE_CACHE we still update the
// *local* cache, which prevents wasting effort on duplicate parses)
$this->parserOutputAccess->getParserOutput(
$page, $pOptions, $rev,
ParserOutputAccess::OPT_NO_UPDATE_CACHE
);
$output = $content->getContentHandler()->getParserOutput( $content, $cpoParams );
// T371713: Temporary statistics collection code to determine
// feasibility of Parsoid selective update
$sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get(
MainConfigNames::ParsoidSelectiveUpdateSampleRate
);
$doSample = ( $sampleRate && mt_rand( 1, $sampleRate ) === 1 );
if ( $doSample ) {
$labels = [
'source' => 'LintUpdate',
'type' => 'full',
'reason' => $pOptions->getRenderReason(),
'parser' => 'parsoid',
'opportunistic' => 'false',
];
$totalStat = $this->statsFactory
->getCounter( 'parsercache_selective_total' );
$timeStat = $this->statsFactory
->getCounter( 'parsercache_selective_cpu_seconds' );
foreach ( $labels as $key => $value ) {
$totalStat->setLabel( $key, $value );
$timeStat->setLabel( $key, $value );
}
$totalStat->increment();
$timeStat->incrementBy( $output->getTimeProfile( 'cpu' ) );
}
}
}

View file

@ -32,7 +32,6 @@ use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWikiIntegrationTestCase;
use RefreshLinksJob;
use Wikimedia\Stats\StatsFactory;
use WikiPage;
use WikitextContent;
@ -45,10 +44,15 @@ class LintUpdateTest extends MediaWikiIntegrationTestCase {
protected function setUp(): void {
parent::setUp();
$this->overrideConfigValue( MainConfigNames::ParsoidSettings, [
'linting' => true
$this->overrideConfigValues( [
MainConfigNames::ParsoidSettings => [
'linting' => true
],
'LinterParseOnDerivedDataUpdate' => true,
// Ensure that parser cache contents don't
// affect tests.
'ParserCacheType' => CACHE_NONE,
] );
$this->overrideConfigValue( 'LinterParseOnDerivedDataUpdate', true );
}
/**
@ -94,6 +98,8 @@ class LintUpdateTest extends MediaWikiIntegrationTestCase {
new JavascriptContent( '{}' )
)
);
// Clear the local cache in the ParserOutputAccess
$this->resetServices();
$update = $this->newLintUpdate( $this->newRenderedRevision( $page, $rev ) );
$update->doUpdate();
@ -131,6 +137,9 @@ class LintUpdateTest extends MediaWikiIntegrationTestCase {
],
] + $contentHandlers );
// Clear the local cache in the ParserOutputAccess
$this->resetServices();
$update = $this->newLintUpdate( $newRev );
$update->doUpdate();
}
@ -142,6 +151,9 @@ class LintUpdateTest extends MediaWikiIntegrationTestCase {
* @covers \MediaWiki\Linter\Hooks::onRevisionDataUpdates
*/
public function testPageEditIntegration() {
// Clear the local cache in the ParserOutputAccess
$this->resetServices();
$hookCalled = 0;
$this->setTemporaryHook( 'ParserLogLinterData', static function () use ( &$hookCalled ) {
$hookCalled++;
@ -165,6 +177,8 @@ class LintUpdateTest extends MediaWikiIntegrationTestCase {
public function testRefreshLinksJobIntegration() {
// NOTE: This performs an edit, so do it before installing the temp hook below!
$page = $this->getExistingTestPage();
// Clear the local cache in the ParserOutputAccess
$this->resetServices();
$hookCalled = 0;
$this->setTemporaryHook( 'ParserLogLinterData', static function () use ( &$hookCalled ) {
@ -192,16 +206,19 @@ class LintUpdateTest extends MediaWikiIntegrationTestCase {
);
$rrev->setRevisionParserOutput( new ParserOutput( 'testing' ) );
// Clear the local cache in the ParserOutputAccess
$this->resetServices();
return $rrev;
}
private function newLintUpdate( RenderedRevision $renderedRevision ) {
$wikiPageFactory = $this->getServiceContainer()->getWikiPageFactory();
$parserOutputAccess = $this->getServiceContainer()->getParserOutputAccess();
return new LintUpdate(
$wikiPageFactory,
StatsFactory::newNull(),
$parserOutputAccess,
$renderedRevision
);
}