mediawiki-extensions-Linter/includes/LintUpdate.php
daniel 8b22ad5d78 Trigger Parsoid run when page metadata is being updated
When RESTBase is turned off, Parsoid runs will no longer be triggered
on template changes. This creates a new mechanism to do that, based on
the RevisionDataUpdates hook called by DerivedPageDataUpdater. The new
behavior is controlled by a feature flag, LinterParseOnDerivedDataUpdate,
which is enabled per default. In WMF production, this should be
turned off as long as we are still triggering Parsoid parses through
the pregeneration mechanism in RESTBase.

Note that this will not write ParserOutput to the ParserCache. On edits,
pages will get parsed with Parsoid twice, once to trigger the lint data
update, and once by ParsoidCachePrewarmJob to populate the ParserCache.
Both parses will trigger the ParserLogLinterData hook, the lint data
from the second parse is redundant.

However, while ParsoidCachePrewarmJob and RevisionDataUpdates get
triggered together on edits, they also get triggered separately:
ParsoidCachePrewarmJob by page views with parser cache misses; and
RevisionDataUpdates when pages get invalidated due to template changes.

Because ParsoidCachePrewarmJob and RevisionDataUpdates generally get
triggered in different situations, it seems cleaner to keep the two
mechanisms independent of each other, and live with the duplicate parse
on edit.

Bug: T361013
Change-Id: If53841ee583ce240dd245d640b9ea9c97e1eaa55
2024-06-03 16:50:17 -05:00

93 lines
2.7 KiB
PHP

<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace MediaWiki\Linter;
use MediaWiki\Content\TextContent;
use MediaWiki\Deferred\DataUpdate;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Parser\Parsoid\ParsoidParser;
use MediaWiki\Revision\RenderedRevision;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
class LintUpdate extends DataUpdate {
private ParsoidParser $parsoid;
private WikiPageFactory $wikiPageFactory;
private RenderedRevision $renderedRevision;
public function __construct(
ParsoidParser $parsoid,
WikiPageFactory $wikiPageFactory,
RenderedRevision $renderedRevision
) {
parent::__construct();
$this->parsoid = $parsoid;
$this->wikiPageFactory = $wikiPageFactory;
$this->renderedRevision = $renderedRevision;
}
public function doUpdate() {
$rev = $this->renderedRevision->getRevision();
$mainSlot = $rev->getSlot( SlotRecord::MAIN, RevisionRecord::RAW );
$page = $this->wikiPageFactory->newFromTitle( $rev->getPage() );
if ( $page->getLatest() !== $rev->getId() ) {
// The given revision is no longer the latest revision.
return;
}
$content = $mainSlot->getContent();
if ( !$content instanceof TextContent ) {
// Linting is only defined for text
return;
}
$pOptions = $page->makeParserOptions( 'canonical' );
$pOptions->setUseParsoid();
LoggerFactory::getInstance( 'Linter' )->debug(
'{method}: Parsing {page}',
[
'method' => __METHOD__,
'page' => $page->getTitle()->getPrefixedDBkey(),
'touched' => $page->getTouched()
]
);
// Don't update the parser cache, to avoid flooding it.
// This matches the behavior of RefreshLinksJob.
// However, unlike RefreshLinksJob, we don't parse if we already
// have the output in the cache. This avoids duplicating the effort
// of ParsoidCachePrewarmJob.
$this->parsoid->parse(
$content->getText(),
$rev->getPage(),
$pOptions,
true,
true,
$rev->getId()
);
}
}