mediawiki-extensions-Discus.../includes/CommentUtils.php
Bartosz Dziewoński 31b26a5bec Fix indentation level when replying to comments with mixed indentation
When adding a reply, we take a node at the end of the previous comment,
compare that comment's indentation level to the expected indentation level
of the reply, and add (or remove) that number of wrapper lists.

The existing code did not consider that comments may have lists within
them, and so the indentation of that node may not match the indentation
of the comment.

Bug: T252702
Change-Id: Icc5ff19783d2b213bff99f283cb0599a8b5c1ab4
2020-08-06 01:25:33 +02:00

306 lines
8.2 KiB
PHP

<?php
namespace MediaWiki\Extension\DiscussionTools;
use DOMElement;
use DOMNode;
use DOMXPath;
use MediaWiki\MediaWikiServices;
use Title;
class CommentUtils {
private function __construct() {
}
/**
* Get the index of $child in its parent
*
* @param DOMNode $child
* @return int
*/
public static function childIndexOf( DOMNode $child ) : int {
$i = 0;
while ( ( $child = $child->previousSibling ) ) {
$i++;
}
return $i;
}
/**
* Check whether a DOMNode contains (is an ancestor of) another DOMNode (or is the same node)
*
* @param DOMNode $ancestor
* @param DOMNode $descendant
* @return bool
*/
public static function contains( DOMNode $ancestor, DOMNode $descendant ) : bool {
// TODO can we use DOMNode->compareDocumentPosition() here maybe?
$node = $descendant;
while ( $node && $node !== $ancestor ) {
$node = $node->parentNode;
}
return $node === $ancestor;
}
/**
* Find closest ancestor element using one of the given tag names.
*
* @param DOMNode $node
* @param string[] $tagNames
* @return DOMElement|null
*/
public static function closestElement( DOMNode $node, array $tagNames ) : ?DOMElement {
do {
if (
$node->nodeType === XML_ELEMENT_NODE &&
in_array( strtolower( $node->nodeName ), $tagNames )
) {
return $node;
}
$node = $node->parentNode;
} while ( $node );
return null;
}
/**
* Find the transclusion node which rendered the current node, if it exists.
*
* 1. Find the closest ancestor with an 'about' attribute
* 2. Find the main node of the about-group (first sibling with the same 'about' attribute)
* 3. If this is an mw:Transclusion node, return it; otherwise, go to step 1
*
* @param DOMNode $node
* @return DOMElement|null Translcusion node, null if not found
*/
public static function getTranscludedFromElement( DOMNode $node ) : ?DOMElement {
while ( $node ) {
// 1.
if (
$node instanceof DOMElement &&
$node->getAttribute( 'about' ) &&
preg_match( '/^#mwt\d+$/', $node->getAttribute( 'about' ) )
) {
$about = $node->getAttribute( 'about' );
// 2.
while (
( $previousSibling = $node->previousSibling ) &&
$previousSibling instanceof DOMElement &&
$previousSibling->getAttribute( 'about' ) === $about
) {
$node = $previousSibling;
}
// 3.
if (
$node->getAttribute( 'typeof' ) &&
in_array( 'mw:Transclusion', explode( ' ', $node->getAttribute( 'typeof' ) ) )
) {
break;
}
}
$node = $node->parentNode;
}
return $node;
}
/**
* Trim ASCII whitespace, as defined in the HTML spec.
*
* @param string $str
* @return string
*/
public static function htmlTrim( string $str ) : string {
// https://infra.spec.whatwg.org/#ascii-whitespace
return trim( $str, "\t\n\f\r " );
}
/**
* Get the indent level of $node, relative to $rootNode.
*
* The indent level is the number of lists inside of which it is nested.
*
* @param DOMNode $node
* @param DOMNode $rootNode
* @return int
*/
public static function getIndentLevel( DOMNode $node, DOMNode $rootNode ) : int {
$indent = 0;
while ( $node ) {
if ( $node === $rootNode ) {
break;
}
$nodeName = strtolower( $node->nodeName );
if ( $nodeName === 'li' || $nodeName === 'dd' ) {
$indent++;
}
$node = $node->parentNode;
}
return $indent;
}
/**
* Get an array of sibling nodes that contain parts of the given thread item.
*
* @param ThreadItem $item Thread item
* @return DOMElement[]
*/
private static function getCoveredSiblings( ThreadItem $item ) : array {
$range = $item->getRange();
$ancestor = $range->commonAncestorContainer;
if ( $ancestor === $range->startContainer || $ancestor === $range->endContainer ) {
return [ $ancestor ];
}
// Convert to array early because apparently DOMNodeList acts like a linked list
// and accessing items by index is slow
$siblings = iterator_to_array( $ancestor->childNodes );
$start = 0;
$end = count( $siblings ) - 1;
// Find first of the siblings that contains the item
while ( !self::contains( $siblings[ $start ], $range->startContainer ) ) {
$start++;
}
// Find last of the siblings that contains the item
while ( !self::contains( $siblings[ $end ], $range->endContainer ) ) {
$end--;
}
return array_slice( $siblings, $start, $end - $start + 1 );
}
/**
* Get the nodes (if any) that contain the given thread item, and nothing else.
*
* @param ThreadItem $item Thread item
* @return DOMElement[]|null
*/
public static function getFullyCoveredSiblings( ThreadItem $item ) : ?array {
$siblings = self::getCoveredSiblings( $item );
$isIgnored = function ( $node ) {
// Ignore empty text nodes
return $node->nodeType === XML_TEXT_NODE && CommentUtils::htmlTrim( $node->nodeValue ) === '';
};
$firstNonemptyChild = function ( $node ) use ( $isIgnored ) {
$node = $node->firstChild;
while ( $node && $isIgnored( $node ) ) {
$node = $node->nextSibling;
}
return $node;
};
$lastNonemptyChild = function ( $node ) use ( $isIgnored ) {
$node = $node->lastChild;
while ( $node && $isIgnored( $node ) ) {
$node = $node->previousSibling;
}
return $node;
};
$startMatches = false;
$node = $siblings[ 0 ];
while ( $node ) {
if ( $item->getRange()->startContainer === $node && $item->getRange()->startOffset === 0 ) {
$startMatches = true;
break;
}
$node = $firstNonemptyChild( $node );
}
$endMatches = false;
$node = end( $siblings );
while ( $node ) {
$length = ( $node->nodeType === XML_TEXT_NODE ) ?
strlen( rtrim( $node->nodeValue, "\t\n\f\r " ) ) :
// PHP bug: childNodes can be null for comment nodes
// (it should always be a DOMNodeList, even if the node can't have children)
( $node->childNodes ? $node->childNodes->length : 0 );
if ( $item->getRange()->endContainer === $node && $item->getRange()->endOffset === $length ) {
$endMatches = true;
break;
}
$node = $lastNonemptyChild( $node );
}
if ( $startMatches && $endMatches ) {
// If these are all of the children (or the only child), go up one more level
while (
( $parent = $siblings[ 0 ]->parentNode ) &&
$firstNonemptyChild( $parent ) === $siblings[ 0 ] &&
$lastNonemptyChild( $parent ) === end( $siblings )
) {
$siblings = [ $parent ];
}
return $siblings;
}
return null;
}
/**
* Unwrap Parsoid sections
*
* @param DOMElement $element Parent element, e.g. document body
* @param string|null $keepSection Section to keep
*/
public static function unwrapParsoidSections(
DOMElement $element, string $keepSection = null
) : void {
$xpath = new DOMXPath( $element->ownerDocument );
$sections = $xpath->query( '//section[@data-mw-section-id]', $element );
foreach ( $sections as $section ) {
$parent = $section->parentNode;
$sectionId = $section->getAttribute( 'data-mw-section-id' );
// Copy section ID to first child (should be a heading)
if ( $sectionId !== '' && intval( $sectionId ) > 0 ) {
$section->firstChild->setAttribute( 'data-mw-section-id', $sectionId );
}
if ( $keepSection !== null && $sectionId === $keepSection ) {
return;
}
while ( $section->firstChild ) {
$parent->insertBefore( $section->firstChild, $section );
}
$parent->removeChild( $section );
}
}
/**
* Get a MediaWiki page title from a URL
*
* @param string $url
* @return Title|null
*/
public static function getTitleFromUrl( string $url ) : ?Title {
$config = MediaWikiServices::getInstance()->getMainConfig();
// TODO: Set the correct base in the document?
if ( strpos( $url, './' ) === 0 ) {
$url = 'https://local' . str_replace( '$1', substr( $url, 2 ), $config->get( 'ArticlePath' ) );
} elseif ( strpos( $url, '://' ) === false ) {
$url = 'https://local' . $url;
}
$bits = wfParseUrl( $url );
$query = wfCgiToArray( $bits['query'] ?? '' );
if ( isset( $query['title'] ) ) {
return Title::newFromText( $query['title'] );
}
$articlePathRegexp = '/' . str_replace(
preg_quote( '$1', '/' ),
'(.*)',
preg_quote( $config->get( 'ArticlePath' ), '/' )
) . '/';
$matches = null;
if ( preg_match( $articlePathRegexp, $url, $matches ) ) {
return Title::newFromText( urldecode( $matches[1] ) );
}
return null;
}
}