mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-12-13 06:49:42 +00:00
209aaf6943
* DirectParsoidClient: Actually allow null to be provided. Previously we tried to call methods on it unconditionally. * VRSParsoidClient: Require the param to be provided (even if null). No reason to diverge from the interface, which requires it. Change-Id: Id9a450dc8b8eb3e82cf87718b96975e5a3c6180c
264 lines
9.3 KiB
PHP
264 lines
9.3 KiB
PHP
<?php
|
|
/**
|
|
* Helper functions for using the REST interface to Parsoid/RESTBase.
|
|
*
|
|
* @file
|
|
* @ingroup Extensions
|
|
* @copyright 2022 VisualEditor Team and others; see AUTHORS.txt
|
|
* @license MIT
|
|
*/
|
|
|
|
namespace MediaWiki\Extension\VisualEditor;
|
|
|
|
use Language;
|
|
use MediaWiki\Page\PageIdentity;
|
|
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
|
|
use MediaWiki\Revision\MutableRevisionRecord;
|
|
use MediaWiki\Revision\RevisionRecord;
|
|
use MediaWiki\Revision\SlotRecord;
|
|
use ParserOutput;
|
|
use RequestContext;
|
|
use Title;
|
|
use WikiMap;
|
|
use Wikimedia\Parsoid\Config\DataAccess;
|
|
use Wikimedia\Parsoid\Config\SiteConfig;
|
|
use Wikimedia\Parsoid\Core\SelserData;
|
|
use Wikimedia\Parsoid\Parsoid;
|
|
use Wikimedia\Parsoid\Utils\DOMUtils;
|
|
use Wikimedia\UUID\GlobalIdGenerator;
|
|
use WikitextContent;
|
|
|
|
class DirectParsoidClient implements ParsoidClient {
|
|
/**
|
|
* Requested Parsoid HTML version.
|
|
* Keep this in sync with the Accept: header in
|
|
* ve.init.mw.ArticleTargetLoader.js
|
|
*/
|
|
public const PARSOID_VERSION = '2.4.0';
|
|
|
|
/** @var array Parsoid-specific settings array from $config */
|
|
private $parsoidSettings;
|
|
|
|
/** @var SiteConfig */
|
|
protected $siteConfig;
|
|
|
|
/** @var PageConfigFactory */
|
|
protected $pageConfigFactory;
|
|
|
|
/** @var DataAccess */
|
|
protected $dataAccess;
|
|
|
|
/** @var GlobalIdGenerator */
|
|
private $globalIdGenerator;
|
|
|
|
/**
|
|
* @param array $parsoidSettings
|
|
* @param SiteConfig $siteConfig
|
|
* @param PageConfigFactory $pageConfigFactory
|
|
* @param DataAccess $dataAccess
|
|
* @param GlobalIdGenerator $globalIdGenerator
|
|
*/
|
|
public function __construct(
|
|
array $parsoidSettings,
|
|
SiteConfig $siteConfig,
|
|
PageConfigFactory $pageConfigFactory,
|
|
DataAccess $dataAccess,
|
|
GlobalIdGenerator $globalIdGenerator
|
|
) {
|
|
$this->parsoidSettings = $parsoidSettings;
|
|
$this->siteConfig = $siteConfig;
|
|
$this->pageConfigFactory = $pageConfigFactory;
|
|
$this->dataAccess = $dataAccess;
|
|
$this->globalIdGenerator = $globalIdGenerator;
|
|
}
|
|
|
|
/**
|
|
* Request page HTML
|
|
*
|
|
* @param RevisionRecord $revision Page revision
|
|
* @param ?Language $targetLanguage Desired output language
|
|
*
|
|
* @return array The response
|
|
*/
|
|
public function getPageHtml( RevisionRecord $revision, ?Language $targetLanguage ): array {
|
|
$page = $revision->getPage();
|
|
$title = Title::castFromPageIdentity( $page );
|
|
$targetLanguage = $targetLanguage ?: $title->getPageLanguage();
|
|
$oldid = $revision->getId();
|
|
$lang = $targetLanguage->getCode();
|
|
// This is /page/html/$title/$revision?redirect=false&stash=true
|
|
// With Accept-Language: $lang
|
|
$envOptions = [
|
|
// $attribs['envOptions'] is created in ParsoidHandler::getRequestAttributes()
|
|
'prefix' => WikiMap::getCurrentWikiId(),
|
|
'pageName' => $title->getPrefixedDBkey(),
|
|
'htmlVariantLanguage' => $lang,
|
|
'outputContentVersion' => Parsoid::resolveContentVersion(
|
|
self::PARSOID_VERSION
|
|
) ?? Parsoid::defaultHTMLVersion(),
|
|
];
|
|
// $pageConfig originally created in
|
|
// ParsoidHandler::tryToCreatePageConfig
|
|
$user = RequestContext::getMain()->getUser();
|
|
// Note: Parsoid by design isn't supposed to use the user
|
|
// context right now, and all user state is expected to be
|
|
// introduced as a post-parse transform. So although we pass a
|
|
// User here, it only currently affects the output in obscure
|
|
// corner cases; see PageConfigFactory::create() for more.
|
|
$pageConfig = $this->pageConfigFactory->create(
|
|
$page, $user, $revision, null, $lang, $this->parsoidSettings
|
|
);
|
|
if ( $pageConfig->getRevisionContent() === null ) {
|
|
throw new \LogicException( "Specified revision does not exist" );
|
|
}
|
|
$parsoid = new Parsoid( $this->siteConfig, $this->dataAccess );
|
|
$parserOutput = new ParserOutput();
|
|
// Note that $headers is an out parameter
|
|
// $envOptions originally included $opts['contentmodel'] here as well
|
|
$out = $parsoid->wikitext2html(
|
|
$pageConfig, $envOptions, $headers, $parserOutput
|
|
);
|
|
$tid = $this->globalIdGenerator->newUUIDv1();
|
|
$etag = "W/\"{$oldid}/{$tid}\"";
|
|
# XXX: we could cache this locally using the $etag as a key,
|
|
# then reuse it when transforming back to wikitext below.
|
|
return [
|
|
'body' => $out,
|
|
'headers' => $headers + [
|
|
'etag' => $etag,
|
|
],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Transform HTML to wikitext via Parsoid
|
|
*
|
|
* @param PageIdentity $page The page the content belongs to
|
|
* @param Language $targetLanguage The desired output language
|
|
* @param string $html The HTML of the page to be transformed
|
|
* @param ?int $oldid What oldid revision, if any, to base the request from (default: `null`)
|
|
* @param ?string $etag The ETag to set in the HTTP request header
|
|
*
|
|
* @return array The response, 'code', 'reason', 'headers' and 'body'
|
|
*/
|
|
public function transformHTML(
|
|
PageIdentity $page, Language $targetLanguage, string $html, ?int $oldid, ?string $etag
|
|
): array {
|
|
// This is POST /transform/html/to/wikitext/$title/$oldid
|
|
// with header If-Match: $etag
|
|
// and data: [ 'html' => $html ]
|
|
$lang = $targetLanguage->getCode();
|
|
// $pageConfig originally created in
|
|
// ParsoidHandler::tryToCreatePageConfig
|
|
$user = RequestContext::getMain()->getUser();
|
|
// Note: Parsoid by design isn't supposed to use the user
|
|
// context right now, and all user state is expected to be
|
|
// introduced as a post-parse transform. So although we pass a
|
|
// User here, it only currently affects the output in obscure
|
|
// corner cases; see PageConfigFactory::create() for more.
|
|
$pageConfig = $this->pageConfigFactory->create(
|
|
$page, $user, $oldid, null, $lang, $this->parsoidSettings
|
|
);
|
|
$doc = DOMUtils::parseHTML( $html, true );
|
|
$vEdited = DOMUtils::extractInlinedContentVersion( $doc ) ??
|
|
Parsoid::defaultHTMLVersion();
|
|
// T267990: This should be replaced by PET's ParserCache/stash
|
|
// mechanism. (RESTBase did the fetch based on the etag, and then
|
|
// compared vEdited to vOriginal to determine if it was usable.)
|
|
$oldHtml = null;
|
|
$selserData = ( $oldid === null ) ? null : new SelserData(
|
|
$pageConfig->getPageMainContent(), $oldHtml
|
|
);
|
|
$parsoid = new Parsoid( $this->siteConfig, $this->dataAccess );
|
|
$wikitext = $parsoid->dom2wikitext( $pageConfig, $doc, [
|
|
'inputContentVersion' => $vEdited,
|
|
'htmlSize' => mb_strlen( $html ),
|
|
], $selserData );
|
|
return [
|
|
'body' => $wikitext,
|
|
'headers' => [],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Transform wikitext to HTML via Parsoid.
|
|
*
|
|
* @param PageIdentity $page The page the content belongs to
|
|
* @param Language $targetLanguage The desired output language
|
|
* @param string $wikitext The wikitext fragment to parse
|
|
* @param bool $bodyOnly Whether to provide only the contents of the `<body>` tag
|
|
* @param ?int $oldid What oldid revision, if any, to base the request from (default: `null`)
|
|
* @param bool $stash Whether to stash the result in the server-side cache (default: `false`)
|
|
*
|
|
* @return array The response, 'code', 'reason', 'headers' and 'body'
|
|
*/
|
|
public function transformWikitext(
|
|
PageIdentity $page, Language $targetLanguage, string $wikitext,
|
|
bool $bodyOnly, ?int $oldid, bool $stash
|
|
): array {
|
|
$title = Title::castFromPageIdentity( $page );
|
|
|
|
// This is POST /transform/wikitext/to/html/$title/$oldid
|
|
// with data: [
|
|
// 'wikitext' => $wikitext,
|
|
// 'body_only' => $bodyOnly,
|
|
// 'stash' => $stash,
|
|
// ]
|
|
// T267990: Stashing features are not implemented in zero-conf mode;
|
|
// they will eventually be replaced by PET's ParserCache/stash mechanism
|
|
$lang = $targetLanguage->getCode();
|
|
$envOptions = [
|
|
// $attribs['envOptions'] is created in ParsoidHandler::getRequestAttributes()
|
|
'prefix' => WikiMap::getCurrentWikiId(),
|
|
'pageName' => $title->getPrefixedDBkey(),
|
|
'htmlVariantLanguage' => $lang,
|
|
'outputContentVersion' => Parsoid::resolveContentVersion(
|
|
self::PARSOID_VERSION
|
|
) ?? Parsoid::defaultHTMLVersion(),
|
|
'body_only' => $bodyOnly,
|
|
// When VE does a fragment expansion (for example when
|
|
// template arguments are edited and it wants an updated
|
|
// render of the template) it's not going to want section
|
|
// tags; in this case bodyOnly=true and wrapSections=false.
|
|
// (T181226)
|
|
// But on the other hand, VE doesn't do anything with section
|
|
// tags right now other than strip them, so we'll just always
|
|
// pass wrapSections=false for now.
|
|
'wrapSections' => false,
|
|
];
|
|
// $pageConfig originally created in
|
|
// ParsoidHandler::tryToCreatePageConfig
|
|
$user = RequestContext::getMain()->getUser();
|
|
// Note: Parsoid by design isn't supposed to use the user
|
|
// context right now, and all user state is expected to be
|
|
// introduced as a post-parse transform. So although we pass a
|
|
// User here, it only currently affects the output in obscure
|
|
// corner cases; see PageConfigFactory::create() for more.
|
|
|
|
// Create a mutable revision record and set to the desired wikitext.
|
|
$tmpRevision = new MutableRevisionRecord( $page );
|
|
$tmpRevision->setSlot(
|
|
SlotRecord::newUnsaved(
|
|
SlotRecord::MAIN,
|
|
new WikitextContent( $wikitext )
|
|
)
|
|
);
|
|
|
|
$pageConfig = $this->pageConfigFactory->create(
|
|
$page, $user, $tmpRevision, null, $lang, $this->parsoidSettings
|
|
);
|
|
$parsoid = new Parsoid( $this->siteConfig, $this->dataAccess );
|
|
$parserOutput = new ParserOutput();
|
|
// Note that $headers is an out parameter
|
|
$out = $parsoid->wikitext2html(
|
|
$pageConfig, $envOptions, $headers, $parserOutput
|
|
);
|
|
// No etag generation in this pathway, and no caching
|
|
// This is just used to update previews when you edit a template
|
|
return [
|
|
'body' => $out,
|
|
'headers' => $headers,
|
|
];
|
|
}
|
|
}
|