Create ImmutableRange class in PHP

TODO: Create one in JS as well

Change-Id: I6c9dc2455afcb8d0b68674a2985c5e43dd94b6fb
This commit is contained in:
Ed Sanders 2020-05-22 14:47:21 +01:00
parent 16cf49e7e4
commit b3ca37c1c5
3 changed files with 148 additions and 37 deletions

View file

@ -18,7 +18,6 @@ use MWException;
use stdClass;
use Title;
// TODO maybe make a class for ranges?
// TODO make a class for comments
// TODO clean up static vs non-static
@ -690,18 +689,17 @@ class CommentParser {
*
* The elements of the array are stdClass objects with the following fields:
* - 'type' (string): 'heading' or 'comment'
* - 'range' (array): The extent of the comment, including the signature and timestamp.
* Comments can start or end in the middle of a DOM node.
* Keys: 'startContainer', 'startOffset', 'endContainer' and 'endOffset'
* - 'signatureRanges' (array): The extents of the comment's signatures (plus timestamps).
* There is always at least one signature, but there may be multiple.
* The author and timestamp of the comment is determined from the
* first signature. The last node in every signature range is the
* text node containing the timestamp.
* - 'range' (ImmutableRange): The extent of the comment, including the signature and timestamp.
* Comments can start or end in the middle of a DOM node.
* - 'signatureRanges' (ImmutableRange): The extents of the comment's signatures (plus timestamps).
* There is always at least one signature, but there may be multiple.
* The author and timestamp of the comment is determined from the
* first signature. The last node in every signature range is the
* text node containing the timestamp.
* - 'level' (int): Indentation level of the comment. Headings are 0, comments start at 1.
* - 'timestamp' (string): ISO 8601 timestamp in UTC (ending in 'Z'). Not set for headings.
* - 'author' (string|null): Comment author's username, null for unsigned comments.
* Not set for headings.
* Not set for headings.
*
* @param DOMElement $rootNode
* @return stdClass[] Results. Each result is an object.
@ -716,12 +714,7 @@ class CommentParser {
$dfParser = $this->getLocalTimestampParser();
// Placeholder heading in case there are comments in the 0th section
$range = (object)[
'startContainer' => $rootNode,
'startOffset' => 0,
'endContainer' => $rootNode,
'endOffset' => 0
];
$range = new ImmutableRange( $rootNode, 0, $rootNode, 0 );
$fakeHeading = (object)[
'placeholderHeading' => true,
'type' => 'heading',
@ -739,12 +732,7 @@ class CommentParser {
}
if ( $node->nodeType === XML_ELEMENT_NODE && preg_match( '/^h[1-6]$/i', $node->nodeName ) ) {
$range = (object)[
'startContainer' => $node,
'startOffset' => 0,
'endContainer' => $node,
'endOffset' => $node->childNodes->length
];
$range = new ImmutableRange( $node, 0, $node, $node->childNodes->length );
$curComment = (object)[
'type' => 'heading',
'range' => $range,
@ -767,18 +755,18 @@ class CommentParser {
// Everything from the last comment up to here is the next comment
$startNode = $this->nextInterestingLeafNode( $curComment->range->endContainer, $rootNode );
$match = $timestamps[$nextTimestamp][1];
$range = (object)[
'startContainer' => $startNode->parentNode,
'startOffset' => CommentUtils::childIndexOf( $startNode ),
'endContainer' => $node,
'endOffset' => $match[0][1] + strlen( $match[0][0] )
];
$sigRange = (object)[
'startContainer' => $firstSigNode->parentNode,
'startOffset' => CommentUtils::childIndexOf( $firstSigNode ),
'endContainer' => $node,
'endOffset' => $match[0][1] + strlen( $match[0][0] )
];
$range = new ImmutableRange(
$startNode->parentNode,
CommentUtils::childIndexOf( $startNode ),
$node,
$match[0][1] + strlen( $match[0][0] )
);
$sigRange = new ImmutableRange(
$firstSigNode->parentNode,
CommentUtils::childIndexOf( $firstSigNode ),
$node,
$match[0][1] + strlen( $match[0][0] )
);
$startLevel = $this->getIndentLevel( $startNode, $rootNode ) + 1;
$endLevel = $this->getIndentLevel( $node, $rootNode ) + 1;
@ -806,8 +794,7 @@ class CommentParser {
)
) {
// Merge this with the previous comment. Use that comment's author and timestamp.
$curComment->range->endContainer = $range->endContainer;
$curComment->range->endOffset = $range->endOffset;
$curComment->range = $curComment->range->setEnd( $range->endContainer, $range->endOffset );
$curComment->signatureRanges[] = $sigRange;
$curComment->level = min( min( $startLevel, $endLevel ), $curComment->level );

123
includes/ImmutableRange.php Normal file
View file

@ -0,0 +1,123 @@
<?php
namespace MediaWiki\Extension\DiscussionTools;
use DOMNode;
/**
* ImmutableRange has a similar API to the DOM Range class.
*
* start/endContainer and offsets can be accessed, as can commonAncestorContainer
* which is lazy evaluated.
*
* setStart and setEnd are still available but return a cloned range.
*/
class ImmutableRange {
private $mCollapsed;
private $mCommonAncestorContainer;
private $mEndContainer;
private $mEndOffset;
private $mStartContainer;
private $mStartOffset;
/**
* Find the common ancestor container of two nodes
*
* @param DOMNode $a
* @param DOMNode $b
* @return DOMNode Common ancestor container
*/
private static function findCommonAncestorContainer( DOMNode $a, DOMNode $b ) : DOMNode {
$ancestorsA = [];
$ancestorsB = [];
do {
$ancestorsA[] = $a;
} while ( ( $a = $a->parentNode ) );
do {
$ancestorsB[] = $b;
} while ( ( $b = $b->parentNode ) );
$node = null;
while ( $ancestorsA && $ancestorsB && end( $ancestorsA ) === end( $ancestorsB ) ) {
$node = end( $ancestorsA );
array_pop( $ancestorsA );
array_pop( $ancestorsB );
}
if ( !$node ) {
throw new Error( 'Nodes are not in the same document' );
}
return $node;
}
/**
* @param DOMNode $startNode Start node
* @param int $startOffset Start offset
* @param DOMNode $endNode End node
* @param int $endOffset End offset
*/
public function __construct(
DOMNode $startNode, int $startOffset, DOMNode $endNode, int $endOffset
) {
$this->mStartContainer = $startNode;
$this->mStartOffset = $startOffset;
$this->mEndContainer = $endNode;
$this->mEndOffset = $endOffset;
}
/**
* @param string $field Field name
* @return mixed
*/
public function __get( string $field ) {
switch ( $field ) {
case 'collapsed':
return $this->mStartContainer === $this->mEndContainer &&
$this->mStartOffset === $this->mEndOffset;
case 'commonAncestorContainer':
if ( !$this->mCommonAncestorContainer ) {
$this->mCommonAncestorContainer =
self::findCommonAncestorContainer( $this->mStartContainer, $this->mEndContainer );
}
return $this->mCommonAncestorContainer;
case 'endContainer':
return $this->mEndContainer;
case 'endOffset':
return $this->mEndOffset;
case 'startContainer':
return $this->mStartContainer;
case 'startOffset':
return $this->mStartOffset;
default:
throw new Exception( 'Invalid property: ' . $field );
}
}
/**
* Clone range with a new start position
*
* @param DOMNode $startNode Start node
* @param int $startOffset Start offset
* @return self
*/
public function setStart( DOMNode $startNode, int $startOffset ) : self {
return new self(
$startNode, $startOffset, $this->mEndContainer, $this->mEndOffset
);
}
/**
* Clone range with a new end position
*
* @param DOMNode $endNode End node
* @param int $endOffset End offset
* @return self
*/
public function setEnd( DOMNode $endNode, int $endOffset ) : self {
return new self(
$this->mStartContainer, $this->mStartOffset, $endNode, $endOffset
);
}
}

View file

@ -7,6 +7,7 @@ use DOMElement;
use DOMNode;
use MediaWiki\Extension\DiscussionTools\CommentParser;
use MediaWiki\Extension\DiscussionTools\CommentUtils;
use MediaWiki\Extension\DiscussionTools\ImmutableRange;
use stdClass;
use Wikimedia\TestingAccessWrapper;
@ -89,7 +90,7 @@ class CommentParserTest extends CommentTestCase {
self::getOffsetPath( $root, $parent->range->endContainer, $parent->range->endOffset )
];
if ( isset( $parent->signatureRanges ) ) {
$parent->signatureRanges = array_map( function ( $range ) use ( $root ) {
$parent->signatureRanges = array_map( function ( ImmutableRange $range ) use ( $root ) {
return [
self::getOffsetPath( $root, $range->startContainer, $range->startOffset ),
self::getOffsetPath( $root, $range->endContainer, $range->endOffset )