Cite: Remove more Parsoid internals knowledge

* Remove use of $env from ReferencesData and RefGroup by
  providing high-level helpers in ParsoidExtensionAPI.

  - Given a fragment id, provide helpers to fetch fragment DOM
    or fragment HTML
  - Fetch the URI for the current page (being parsed)

* There is still a lot of subtle knowledge Cite has about
  how data-parsoid and data-mw attributes are held off to the
  side in a bag and all the pp* and load/store manipulation
  of those attributes. It would be an interesting exercise
  to purge this implementation of those notions OR figure out
  high-level concepts that we document as being part of Parsoid
  reality that we'll forever support.

Bug: T242746
Change-Id: I29ff154f2f17123b9756dfd2f3b422f0b30222b1
This commit is contained in:
Subramanya Sastry 2020-02-07 11:17:42 -05:00 committed by jenkins-bot
parent 1f87104378
commit d0a9c42c98
4 changed files with 43 additions and 61 deletions

View file

@ -6,10 +6,9 @@ namespace Wikimedia\Parsoid\Ext\Cite;
use DOMDocument; use DOMDocument;
use DOMElement; use DOMElement;
use stdClass; use stdClass;
use Wikimedia\Parsoid\Config\Env; use Wikimedia\Parsoid\Config\ParsoidExtensionAPI;
use Wikimedia\Parsoid\Utils\DOMDataUtils; use Wikimedia\Parsoid\Utils\DOMDataUtils;
use Wikimedia\Parsoid\Utils\DOMUtils; use Wikimedia\Parsoid\Utils\DOMUtils;
use Wikimedia\Parsoid\Utils\Title;
/** /**
* Helper class used by `<references>` implementation. * Helper class used by `<references>` implementation.
@ -43,24 +42,21 @@ class RefGroup {
/** /**
* Generate leading linkbacks * Generate leading linkbacks
* @param ParsoidExtensionAPI $extApi
* @param string $href * @param string $href
* @param string|null $group * @param string|null $group
* @param string $text * @param string $text
* @param DOMDocument $ownerDoc * @param DOMDocument $ownerDoc
* @param Env $env
* @return DOMElement * @return DOMElement
*/ */
private static function createLinkback( private static function createLinkback(
string $href, ?string $group, string $text, DOMDocument $ownerDoc, Env $env ParsoidExtensionAPI $extApi,
string $href, ?string $group, string $text, DOMDocument $ownerDoc
): DOMElement { ): DOMElement {
$a = $ownerDoc->createElement( 'a' ); $a = $ownerDoc->createElement( 'a' );
$s = $ownerDoc->createElement( 'span' ); $s = $ownerDoc->createElement( 'span' );
$textNode = $ownerDoc->createTextNode( $text . ' ' ); $textNode = $ownerDoc->createTextNode( $text . ' ' );
$title = Title::newFromText( $a->setAttribute( 'href', $extApi->getPageUri() . '#' . $href );
$env->getPageConfig()->getTitle(),
$env->getSiteConfig()
);
$a->setAttribute( 'href', $env->makeLink( $title ) . '#' . $href );
$s->setAttribute( 'class', 'mw-linkback-text' ); $s->setAttribute( 'class', 'mw-linkback-text' );
if ( $group ) { if ( $group ) {
$a->setAttribute( 'data-mw-group', $group ); $a->setAttribute( 'data-mw-group', $group );
@ -71,11 +67,13 @@ class RefGroup {
} }
/** /**
* @param Env $env * @param ParsoidExtensionAPI $extApi
* @param DOMElement $refsList * @param DOMElement $refsList
* @param stdClass $ref * @param stdClass $ref
*/ */
public function renderLine( Env $env, DOMElement $refsList, stdClass $ref ): void { public function renderLine(
ParsoidExtensionAPI $extApi, DOMElement $refsList, stdClass $ref
): void {
$ownerDoc = $refsList->ownerDocument; $ownerDoc = $refsList->ownerDocument;
// Generate the li and set ref content first, so the HTML gets parsed. // Generate the li and set ref content first, so the HTML gets parsed.
@ -83,7 +81,7 @@ class RefGroup {
$li = $ownerDoc->createElement( 'li' ); $li = $ownerDoc->createElement( 'li' );
$refDir = $ref->dir; $refDir = $ref->dir;
$refTarget = $ref->target; $refTarget = $ref->target;
$refContent = $ref->content; $refContentId = $ref->contentId;
$refGroup = $ref->group; $refGroup = $ref->group;
DOMDataUtils::addAttributes( $li, [ DOMDataUtils::addAttributes( $li, [
'about' => '#' . $refTarget, 'about' => '#' . $refTarget,
@ -99,15 +97,15 @@ class RefGroup {
'class' => 'mw-reference-text', 'class' => 'mw-reference-text',
] ]
); );
if ( $refContent ) { if ( $refContentId ) {
$content = $env->getFragment( $refContent )[0]; $content = $extApi->getContentDOM( $refContentId );
DOMUtils::migrateChildrenBetweenDocs( $content, $reftextSpan ); DOMUtils::migrateChildrenBetweenDocs( $content, $reftextSpan );
DOMDataUtils::visitAndLoadDataAttribs( $reftextSpan ); DOMDataUtils::visitAndLoadDataAttribs( $reftextSpan );
} }
$li->appendChild( $reftextSpan ); $li->appendChild( $reftextSpan );
if ( count( $ref->linkbacks ) === 1 ) { if ( count( $ref->linkbacks ) === 1 ) {
$linkback = self::createLinkback( $ref->id, $refGroup, "", $ownerDoc, $env ); $linkback = self::createLinkback( $extApi, $ref->id, $refGroup, "", $ownerDoc );
$linkback->setAttribute( 'rel', 'mw:referencedBy' ); $linkback->setAttribute( 'rel', 'mw:referencedBy' );
$li->insertBefore( $linkback, $reftextSpan ); $li->insertBefore( $linkback, $reftextSpan );
} else { } else {
@ -118,7 +116,7 @@ class RefGroup {
foreach ( $ref->linkbacks as $i => $lb ) { foreach ( $ref->linkbacks as $i => $lb ) {
$span->appendChild( $span->appendChild(
self::createLinkback( $lb, $refGroup, (string)( $i + 1 ), $ownerDoc, $env ) self::createLinkback( $extApi, $lb, $refGroup, (string)( $i + 1 ), $ownerDoc )
); );
} }
} }

View file

@ -4,7 +4,6 @@ declare( strict_types = 1 );
namespace Wikimedia\Parsoid\Ext\Cite; namespace Wikimedia\Parsoid\Ext\Cite;
use DOMElement; use DOMElement;
use Wikimedia\Parsoid\Config\Env;
use Wikimedia\Parsoid\Config\ParsoidExtensionAPI; use Wikimedia\Parsoid\Config\ParsoidExtensionAPI;
/** /**
@ -23,17 +22,17 @@ class RefProcessor {
/** /**
* @param DOMElement $body * @param DOMElement $body
* @param Env $env * @param mixed $unused unused Env object FIXME: stop passing this through?
* @param array $options * @param array $options
* @param bool $atTopLevel * @param bool $atTopLevel
*/ */
public function run( public function run(
DOMElement $body, Env $env, array $options = [], bool $atTopLevel = false DOMElement $body, $unused, array $options = [], bool $atTopLevel = false
): void { ): void {
if ( $atTopLevel ) { if ( $atTopLevel ) {
$refsData = new ReferencesData( $env ); $refsData = new ReferencesData();
References::processRefs( $this->extApi, $refsData, $body ); References::processRefs( $this->extApi, $refsData, $body );
References::insertMissingReferencesIntoDOM( $refsData, $body ); References::insertMissingReferencesIntoDOM( $this->extApi, $refsData, $body );
} }
} }
} }

View file

@ -104,7 +104,7 @@ class References extends ExtensionTag {
DOMElement $node, ReferencesData $refsData, ?string $referencesAboutId = null, DOMElement $node, ReferencesData $refsData, ?string $referencesAboutId = null,
?string $referencesGroup = '', array &$nestedRefsHTML = [] ?string $referencesGroup = '', array &$nestedRefsHTML = []
): void { ): void {
$env = $refsData->getEnv(); $env = $extApi->getEnv();
$doc = $node->ownerDocument; $doc = $node->ownerDocument;
$nestedInReferences = $referencesAboutId !== null; $nestedInReferences = $referencesAboutId !== null;
@ -114,12 +114,12 @@ class References extends ExtensionTag {
$typeOf = $node->getAttribute( 'typeof' ); $typeOf = $node->getAttribute( 'typeof' );
$isTplWrapper = DOMUtils::matchTypeOf( $node, '/^mw:Transclusion$/' ); $isTplWrapper = DOMUtils::matchTypeOf( $node, '/^mw:Transclusion$/' );
$nodeType = preg_replace( '#mw:DOMFragment/sealed/ref#', '', $typeOf, 1 ); $nodeType = preg_replace( '#mw:DOMFragment/sealed/ref#', '', $typeOf, 1 );
$content = $nodeDp->html; $contentId = $nodeDp->html;
$tplDmw = $isTplWrapper ? DOMDataUtils::getDataMw( $node ) : null; $tplDmw = $isTplWrapper ? DOMDataUtils::getDataMw( $node ) : null;
// This is the <sup> that's the meat of the sealed fragment // This is the <sup> that's the meat of the sealed fragment
/** @var DOMElement $c */ /** @var DOMElement $c */
$c = $env->getFragment( $content )[0]; $c = $extApi->getContentDOM( $contentId );
DOMUtils::assertElt( $c ); DOMUtils::assertElt( $c );
// All the actions that require loaded data-attributes on `c` are done // All the actions that require loaded data-attributes on `c` are done
// here so that we can quickly store those away for later. // here so that we can quickly store those away for later.
@ -142,7 +142,7 @@ class References extends ExtensionTag {
// elt has a group attribute, what takes precedence? // elt has a group attribute, what takes precedence?
$group = $refDmw->attrs->group ?? $referencesGroup ?? ''; $group = $refDmw->attrs->group ?? $referencesGroup ?? '';
$refName = $refDmw->attrs->name ?? ''; $refName = $refDmw->attrs->name ?? '';
$ref = $refsData->add( $env, $group, $refName, $about, $nestedInReferences ); $ref = $refsData->add( $extApi, $group, $refName, $about, $nestedInReferences );
// Add ref-index linkback // Add ref-index linkback
$linkBack = $doc->createElement( 'sup' ); $linkBack = $doc->createElement( 'sup' );
@ -150,7 +150,7 @@ class References extends ExtensionTag {
// FIXME: Lot of useless work for an edge case // FIXME: Lot of useless work for an edge case
if ( !empty( $cDp->empty ) ) { if ( !empty( $cDp->empty ) ) {
// Discard wrapper if there was no input wikitext // Discard wrapper if there was no input wikitext
$content = null; $contentId = null;
if ( !empty( $cDp->selfClose ) ) { if ( !empty( $cDp->selfClose ) ) {
unset( $refDmw->body ); unset( $refDmw->body );
} else { } else {
@ -234,27 +234,29 @@ class References extends ExtensionTag {
$node->parentNode->replaceChild( $linkBack, $node ); $node->parentNode->replaceChild( $linkBack, $node );
} else { } else {
// We don't need to delete the node now since it'll be removed in // We don't need to delete the node now since it'll be removed in
// `insertReferencesIntoDOM` when all the children all cleaned out. // `insertReferencesIntoDOM` when all the children are cleaned out.
array_push( $nestedRefsHTML, ContentUtils::ppToXML( $linkBack ), "\n" ); array_push( $nestedRefsHTML, ContentUtils::ppToXML( $linkBack ), "\n" );
} }
// Keep the first content to compare multiple <ref>s with the same name. // Keep the first content to compare multiple <ref>s with the same name.
if ( !$ref->content ) { if ( !$ref->contentId ) {
$ref->content = $content; $ref->contentId = $contentId;
$ref->dir = strtolower( $refDmw->attrs->dir ?? '' ); $ref->dir = strtolower( $refDmw->attrs->dir ?? '' );
} }
} }
/** /**
* @param ParsoidExtensionAPI $extApi
* @param DOMElement $refsNode * @param DOMElement $refsNode
* @param ReferencesData $refsData * @param ReferencesData $refsData
* @param array $nestedRefsHTML * @param array $nestedRefsHTML
* @param bool $autoGenerated * @param bool $autoGenerated
*/ */
private static function insertReferencesIntoDOM( private static function insertReferencesIntoDOM(
DOMElement $refsNode, ReferencesData $refsData, array $nestedRefsHTML, bool $autoGenerated = false ParsoidExtensionAPI $extApi, DOMElement $refsNode,
ReferencesData $refsData, array $nestedRefsHTML, bool $autoGenerated = false
): void { ): void {
$env = $refsData->getEnv(); $env = $extApi->getEnv();
$isTplWrapper = DOMUtils::matchTypeOf( $refsNode, '/^mw:Transclusion$/' ); $isTplWrapper = DOMUtils::matchTypeOf( $refsNode, '/^mw:Transclusion$/' );
$dp = DOMDataUtils::getDataParsoid( $refsNode ); $dp = DOMDataUtils::getDataParsoid( $refsNode );
$group = $dp->group ?? ''; $group = $dp->group ?? '';
@ -311,7 +313,7 @@ class References extends ExtensionTag {
if ( $refGroup ) { if ( $refGroup ) {
foreach ( $refGroup->refs as $ref ) { foreach ( $refGroup->refs as $ref ) {
$refGroup->renderLine( $env, $refsNode, $ref ); $refGroup->renderLine( $extApi, $refsNode, $ref );
} }
} }
@ -324,13 +326,14 @@ class References extends ExtensionTag {
* We process them as if there was an implicit `<references />` tag at * We process them as if there was an implicit `<references />` tag at
* the end of the DOM. * the end of the DOM.
* *
* @param ParsoidExtensionAPI $extApi
* @param ReferencesData $refsData * @param ReferencesData $refsData
* @param DOMNode $node * @param DOMNode $node
*/ */
public static function insertMissingReferencesIntoDOM( public static function insertMissingReferencesIntoDOM(
ReferencesData $refsData, DOMNode $node ParsoidExtensionAPI $extApi, ReferencesData $refsData, DOMNode $node
): void { ): void {
$env = $refsData->getEnv(); $env = $extApi->getEnv();
$doc = $node->ownerDocument; $doc = $node->ownerDocument;
foreach ( $refsData->getRefGroups() as $groupName => $refsGroup ) { foreach ( $refsData->getRefGroups() as $groupName => $refsGroup ) {
@ -360,7 +363,7 @@ class References extends ExtensionTag {
$node->appendChild( $doc->createTextNode( "\n" ) ); $node->appendChild( $doc->createTextNode( "\n" ) );
$node->appendChild( $frag ); $node->appendChild( $frag );
self::insertReferencesIntoDOM( $frag, $refsData, [ '' ], true ); self::insertReferencesIntoDOM( $extApi, $frag, $refsData, [ '' ], true );
} }
} }
@ -398,7 +401,7 @@ class References extends ExtensionTag {
$referencesGroup, $referencesGroup,
$nestedRefsHTML $nestedRefsHTML
); );
self::insertReferencesIntoDOM( $child, $refsData, $nestedRefsHTML ); self::insertReferencesIntoDOM( $extApi, $child, $refsData, $nestedRefsHTML );
} else { } else {
// Look for <ref>s embedded in data attributes // Look for <ref>s embedded in data attributes
$extApi->processHiddenHTMLInDataAttributes( $child, $extApi->processHiddenHTMLInDataAttributes( $child,

View file

@ -4,17 +4,11 @@ declare( strict_types = 1 );
namespace Wikimedia\Parsoid\Ext\Cite; namespace Wikimedia\Parsoid\Ext\Cite;
use stdClass; use stdClass;
use Wikimedia\Parsoid\Config\Env; use Wikimedia\Parsoid\Config\ParsoidExtensionAPI;
use Wikimedia\Parsoid\Utils\ContentUtils;
use Wikimedia\Parsoid\Wt2Html\TT\Sanitizer; use Wikimedia\Parsoid\Wt2Html\TT\Sanitizer;
class ReferencesData { class ReferencesData {
/**
* @var Env
*/
private $env;
/** /**
* @var int * @var int
*/ */
@ -27,11 +21,9 @@ class ReferencesData {
/** /**
* ReferencesData constructor. * ReferencesData constructor.
* @param Env $env
*/ */
public function __construct( Env $env ) { public function __construct() {
$this->index = 0; $this->index = 0;
$this->env = $env;
$this->refGroups = []; $this->refGroups = [];
} }
@ -73,7 +65,7 @@ class ReferencesData {
} }
/** /**
* @param Env $env * @param ParsoidExtensionAPI $extApi
* @param string $groupName * @param string $groupName
* @param string $refName * @param string $refName
* @param string $about * @param string $about
@ -81,7 +73,7 @@ class ReferencesData {
* @return stdClass * @return stdClass
*/ */
public function add( public function add(
Env $env, string $groupName, string $refName, string $about, bool $skipLinkback ParsoidExtensionAPI $extApi, string $groupName, string $refName, string $about, bool $skipLinkback
): stdClass { ): stdClass {
$group = $this->getRefGroup( $groupName, true ); $group = $this->getRefGroup( $groupName, true );
$refName = $this->makeValidIdAttr( $refName ); $refName = $this->makeValidIdAttr( $refName );
@ -89,14 +81,11 @@ class ReferencesData {
if ( $hasRefName && isset( $group->indexByName[$refName] ) ) { if ( $hasRefName && isset( $group->indexByName[$refName] ) ) {
$ref = $group->indexByName[$refName]; $ref = $group->indexByName[$refName];
if ( $ref->content && !$ref->hasMultiples ) { if ( $ref->contentId && !$ref->hasMultiples ) {
$ref->hasMultiples = true; $ref->hasMultiples = true;
// Use the non-pp version here since we've already stored attribs // Use the non-pp version here since we've already stored attribs
// before putting them in the map. // before putting them in the map.
$ref->cachedHtml = ContentUtils::toXML( $ref->cachedHtml = $extApi->getContentHTML( $ref->contentId );
$env->getFragment( $ref->content )[0],
[ 'innerXML' => true ]
);
} }
} else { } else {
// The ids produced Cite.php have some particulars: // The ids produced Cite.php have some particulars:
@ -114,7 +103,7 @@ class ReferencesData {
$ref = (object)[ $ref = (object)[
'about' => $about, 'about' => $about,
'content' => null, 'contentId' => null,
'dir' => '', 'dir' => '',
'group' => $group->name, 'group' => $group->name,
'groupIndex' => count( $group->refs ) + 1, 'groupIndex' => count( $group->refs ) + 1,
@ -140,13 +129,6 @@ class ReferencesData {
return $ref; return $ref;
} }
/**
* @return Env
*/
public function getEnv(): Env {
return $this->env;
}
/** /**
* @return RefGroup[] * @return RefGroup[]
*/ */