2022-05-11 01:08:54 +00:00
|
|
|
<?php
|
|
|
|
/**
|
2022-08-28 19:25:04 +00:00
|
|
|
* Helper functions for using the REST interface to Parsoid.
|
2022-05-11 01:08:54 +00:00
|
|
|
*
|
|
|
|
* @file
|
|
|
|
* @ingroup Extensions
|
|
|
|
* @copyright 2022 VisualEditor Team and others; see AUTHORS.txt
|
|
|
|
* @license MIT
|
|
|
|
*/
|
|
|
|
|
|
|
|
namespace MediaWiki\Extension\VisualEditor;
|
|
|
|
|
2024-10-20 12:01:32 +00:00
|
|
|
use MediaWiki\Content\WikitextContent;
|
2023-08-31 00:28:23 +00:00
|
|
|
use MediaWiki\MediaWikiServices;
|
2022-09-04 09:00:14 +00:00
|
|
|
use MediaWiki\Page\PageIdentity;
|
2022-08-28 19:25:04 +00:00
|
|
|
use MediaWiki\Permissions\Authority;
|
2023-01-16 13:11:20 +00:00
|
|
|
use MediaWiki\Rest\Handler\Helper\HtmlInputTransformHelper;
|
|
|
|
use MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper;
|
|
|
|
use MediaWiki\Rest\Handler\Helper\PageRestHelperFactory;
|
2022-08-03 19:56:26 +00:00
|
|
|
use MediaWiki\Revision\MutableRevisionRecord;
|
2022-05-11 01:08:54 +00:00
|
|
|
use MediaWiki\Revision\RevisionRecord;
|
2022-08-03 19:56:26 +00:00
|
|
|
use MediaWiki\Revision\SlotRecord;
|
2024-06-10 20:35:51 +00:00
|
|
|
use MediaWiki\User\User;
|
2023-07-07 07:59:23 +00:00
|
|
|
use Wikimedia\Bcp47Code\Bcp47Code;
|
2022-05-11 01:08:54 +00:00
|
|
|
|
2022-09-04 09:00:14 +00:00
|
|
|
class DirectParsoidClient implements ParsoidClient {
|
2022-11-18 19:46:23 +00:00
|
|
|
|
2022-05-11 01:08:54 +00:00
|
|
|
/**
|
|
|
|
* Requested Parsoid HTML version.
|
|
|
|
* Keep this in sync with the Accept: header in
|
|
|
|
* ve.init.mw.ArticleTargetLoader.js
|
|
|
|
*/
|
2023-05-26 18:29:28 +00:00
|
|
|
public const PARSOID_VERSION = '2.8.0';
|
2022-05-11 01:08:54 +00:00
|
|
|
|
2022-11-18 19:46:23 +00:00
|
|
|
private const FLAVOR_DEFAULT = 'view';
|
2022-05-11 01:08:54 +00:00
|
|
|
|
2024-02-20 10:27:15 +00:00
|
|
|
private PageRestHelperFactory $helperFactory;
|
|
|
|
private Authority $performer;
|
2022-05-11 01:08:54 +00:00
|
|
|
|
|
|
|
public function __construct(
|
2022-11-18 19:46:23 +00:00
|
|
|
PageRestHelperFactory $helperFactory,
|
2022-08-28 19:25:04 +00:00
|
|
|
Authority $performer
|
2022-05-11 01:08:54 +00:00
|
|
|
) {
|
2022-08-28 19:25:04 +00:00
|
|
|
$this->performer = $performer;
|
2022-11-18 19:46:23 +00:00
|
|
|
$this->helperFactory = $helperFactory;
|
2022-05-11 01:08:54 +00:00
|
|
|
}
|
|
|
|
|
2022-08-28 19:25:04 +00:00
|
|
|
private function getHtmlOutputRendererHelper(
|
|
|
|
PageIdentity $page,
|
|
|
|
?RevisionRecord $revision = null,
|
2024-10-26 13:18:21 +00:00
|
|
|
?Bcp47Code $pageLanguage = null,
|
2022-08-28 19:25:04 +00:00
|
|
|
bool $stash = false,
|
|
|
|
string $flavor = self::FLAVOR_DEFAULT
|
|
|
|
): HtmlOutputRendererHelper {
|
2022-10-07 14:39:43 +00:00
|
|
|
// TODO: remove this once we no longer need a User object for rate limiting (T310476).
|
|
|
|
if ( $this->performer instanceof User ) {
|
|
|
|
$user = $this->performer;
|
|
|
|
} else {
|
|
|
|
$user = User::newFromIdentity( $this->performer->getUser() );
|
|
|
|
}
|
|
|
|
|
2024-06-05 16:22:11 +00:00
|
|
|
$helper = $this->helperFactory->newHtmlOutputRendererHelper( $page, [], $user, $revision );
|
2022-10-07 14:39:43 +00:00
|
|
|
|
2023-05-31 16:08:28 +00:00
|
|
|
// Ensure we get a compatible version, not just the default
|
|
|
|
$helper->setOutputProfileVersion( self::PARSOID_VERSION );
|
|
|
|
|
2022-10-07 14:39:43 +00:00
|
|
|
$helper->setStashingEnabled( $stash );
|
2023-04-06 10:17:11 +00:00
|
|
|
if ( !$stash ) {
|
|
|
|
$helper->setFlavor( $flavor );
|
|
|
|
}
|
2022-10-07 14:39:43 +00:00
|
|
|
|
|
|
|
if ( $revision ) {
|
|
|
|
$helper->setRevision( $revision );
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( $pageLanguage ) {
|
|
|
|
$helper->setPageLanguage( $pageLanguage );
|
|
|
|
}
|
2022-08-28 19:25:04 +00:00
|
|
|
|
|
|
|
return $helper;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function getHtmlInputTransformHelper(
|
|
|
|
PageIdentity $page,
|
|
|
|
string $html,
|
2024-10-26 13:18:21 +00:00
|
|
|
?int $oldid = null,
|
|
|
|
?string $etag = null,
|
|
|
|
?Bcp47Code $pageLanguage = null
|
2022-08-28 19:25:04 +00:00
|
|
|
): HtmlInputTransformHelper {
|
|
|
|
// Fake REST body
|
|
|
|
$body = [
|
|
|
|
'html' => [
|
|
|
|
'body' => $html,
|
|
|
|
]
|
2022-05-11 01:08:54 +00:00
|
|
|
];
|
2022-08-28 19:25:04 +00:00
|
|
|
|
2024-03-19 06:41:46 +00:00
|
|
|
if ( $oldid || $etag ) {
|
|
|
|
$body['original']['revid'] = $oldid;
|
|
|
|
$body['original']['renderid'] = $etag;
|
2022-10-07 14:39:43 +00:00
|
|
|
}
|
|
|
|
|
2024-06-05 16:22:11 +00:00
|
|
|
$helper = $this->helperFactory->newHtmlInputTransformHelper(
|
|
|
|
/* envOptions: */ [],
|
|
|
|
$page,
|
|
|
|
$body,
|
|
|
|
/* parameters: */ [],
|
|
|
|
/* originalRevision: */ null,
|
|
|
|
$pageLanguage
|
|
|
|
);
|
|
|
|
|
2024-09-18 14:13:25 +00:00
|
|
|
$statsFactory = MediaWikiServices::getInstance()->getParsoidSiteConfig()->prefixedStatsFactory();
|
|
|
|
$helper->setMetrics( $statsFactory );
|
2024-03-19 06:41:46 +00:00
|
|
|
|
2022-08-28 19:25:04 +00:00
|
|
|
return $helper;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Request page HTML from Parsoid.
|
|
|
|
*
|
|
|
|
* @param RevisionRecord $revision Page revision
|
2023-07-07 08:11:15 +00:00
|
|
|
* @param ?Bcp47Code $targetLanguage Page language (default: `null`)
|
2022-08-28 19:25:04 +00:00
|
|
|
*
|
2023-08-14 15:14:12 +00:00
|
|
|
* @return array An array mimicking a RESTbase server's response, with keys: 'headers' and 'body'
|
|
|
|
* @phan-return array{body:string,headers:array<string,string>}
|
2022-08-28 19:25:04 +00:00
|
|
|
*/
|
2023-07-07 08:11:15 +00:00
|
|
|
public function getPageHtml( RevisionRecord $revision, ?Bcp47Code $targetLanguage = null ): array {
|
2022-08-28 19:25:04 +00:00
|
|
|
// In the VE client, we always want to stash.
|
|
|
|
$page = $revision->getPage();
|
|
|
|
|
2023-08-14 15:14:12 +00:00
|
|
|
$helper = $this->getHtmlOutputRendererHelper( $page, $revision, $targetLanguage, true );
|
|
|
|
$parserOutput = $helper->getHtml();
|
2022-08-28 19:25:04 +00:00
|
|
|
|
2023-08-14 15:14:12 +00:00
|
|
|
return $this->fakeRESTbaseHTMLResponse( $parserOutput->getRawText(), $helper );
|
2022-05-11 01:08:54 +00:00
|
|
|
}
|
|
|
|
|
2022-08-28 19:25:04 +00:00
|
|
|
private function makeFakeRevision(
|
|
|
|
PageIdentity $page,
|
|
|
|
string $wikitext
|
|
|
|
): RevisionRecord {
|
|
|
|
$rev = new MutableRevisionRecord( $page );
|
|
|
|
$rev->setId( 0 );
|
|
|
|
$rev->setPageId( $page->getId() );
|
|
|
|
|
|
|
|
$rev->setContent( SlotRecord::MAIN, new WikitextContent( $wikitext ) );
|
|
|
|
|
|
|
|
return $rev;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2023-08-14 15:14:12 +00:00
|
|
|
* Transform wikitext to HTML with Parsoid.
|
2022-08-28 19:25:04 +00:00
|
|
|
*
|
|
|
|
* @param PageIdentity $page The page the content belongs to use as the parsing context
|
2023-07-07 08:11:15 +00:00
|
|
|
* @param Bcp47Code $targetLanguage Page language
|
2022-08-28 19:25:04 +00:00
|
|
|
* @param string $wikitext The wikitext fragment to parse
|
|
|
|
* @param bool $bodyOnly Whether to provide only the contents of the `<body>` tag
|
|
|
|
* @param int|null $oldid What oldid revision, if any, to base the request from (default: `null`)
|
|
|
|
* @param bool $stash Whether to stash the result in the server-side cache (default: `false`)
|
|
|
|
*
|
2023-08-14 15:14:12 +00:00
|
|
|
* @return array An array mimicking a RESTbase server's response, with keys: 'headers' and 'body'
|
|
|
|
* @phan-return array{body:string,headers:array<string,string>}
|
2022-08-28 19:25:04 +00:00
|
|
|
*/
|
|
|
|
public function transformWikitext(
|
|
|
|
PageIdentity $page,
|
2023-07-07 08:11:15 +00:00
|
|
|
Bcp47Code $targetLanguage,
|
2022-08-28 19:25:04 +00:00
|
|
|
string $wikitext,
|
|
|
|
bool $bodyOnly,
|
|
|
|
?int $oldid,
|
|
|
|
bool $stash
|
|
|
|
): array {
|
|
|
|
$revision = $this->makeFakeRevision( $page, $wikitext );
|
|
|
|
|
2023-08-14 15:14:12 +00:00
|
|
|
$helper = $this->getHtmlOutputRendererHelper( $page, $revision, $targetLanguage, $stash );
|
2022-12-15 17:25:38 +00:00
|
|
|
|
2023-08-14 15:14:12 +00:00
|
|
|
if ( $bodyOnly ) {
|
|
|
|
$helper->setFlavor( 'fragment' );
|
|
|
|
}
|
2022-12-15 17:25:38 +00:00
|
|
|
|
2023-08-14 15:14:12 +00:00
|
|
|
$parserOutput = $helper->getHtml();
|
|
|
|
$html = $parserOutput->getRawText();
|
2022-08-28 19:25:04 +00:00
|
|
|
|
2023-08-14 15:14:12 +00:00
|
|
|
return $this->fakeRESTbaseHTMLResponse( $html, $helper );
|
2022-08-28 19:25:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transform HTML to wikitext with Parsoid
|
2022-05-11 01:08:54 +00:00
|
|
|
*
|
2022-09-04 09:00:14 +00:00
|
|
|
* @param PageIdentity $page The page the content belongs to
|
2023-07-07 08:11:15 +00:00
|
|
|
* @param Bcp47Code $targetLanguage The desired output language
|
2022-05-11 01:08:54 +00:00
|
|
|
* @param string $html The HTML of the page to be transformed
|
|
|
|
* @param ?int $oldid What oldid revision, if any, to base the request from (default: `null`)
|
|
|
|
* @param ?string $etag The ETag to set in the HTTP request header
|
2022-09-26 17:46:37 +00:00
|
|
|
*
|
2023-08-14 15:14:12 +00:00
|
|
|
* @return array An array mimicking a RESTbase server's response, with keys: 'headers' and 'body'
|
|
|
|
* @phan-return array{body:string,headers:array<string,string>}
|
2022-05-11 01:08:54 +00:00
|
|
|
*/
|
|
|
|
public function transformHTML(
|
2023-07-07 08:11:15 +00:00
|
|
|
PageIdentity $page, Bcp47Code $targetLanguage, string $html, ?int $oldid, ?string $etag
|
2022-05-11 01:08:54 +00:00
|
|
|
): array {
|
2023-08-14 15:14:12 +00:00
|
|
|
$helper = $this->getHtmlInputTransformHelper( $page, $html, $oldid, $etag, $targetLanguage );
|
|
|
|
|
|
|
|
$content = $helper->getContent();
|
|
|
|
$format = $content->getDefaultFormat();
|
|
|
|
|
|
|
|
return [
|
|
|
|
'headers' => [
|
|
|
|
'Content-Type' => $format,
|
|
|
|
],
|
|
|
|
'body' => $content->serialize( $format ),
|
|
|
|
];
|
2022-08-28 19:25:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param mixed $data
|
|
|
|
* @param HtmlOutputRendererHelper $helper
|
|
|
|
*
|
|
|
|
* @return array
|
|
|
|
*/
|
|
|
|
private function fakeRESTbaseHTMLResponse( $data, HtmlOutputRendererHelper $helper ): array {
|
2023-01-31 19:27:26 +00:00
|
|
|
$contentLanguage = $helper->getHtmlOutputContentLanguage();
|
2022-05-11 01:08:54 +00:00
|
|
|
return [
|
2022-08-28 19:25:04 +00:00
|
|
|
'headers' => [
|
2023-01-31 19:27:26 +00:00
|
|
|
'content-language' => $contentLanguage->toBcp47Code(),
|
2022-08-28 19:25:04 +00:00
|
|
|
'etag' => $helper->getETag()
|
|
|
|
],
|
|
|
|
'body' => $data,
|
2022-05-11 01:08:54 +00:00
|
|
|
];
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|