mediawiki-extensions-Discus.../tests/phpunit/DiscussionToolsCommentParserTest.php
Bartosz Dziewoński b8d7a75c34 Fix performance of DiscussionToolsCommentParser::childIndexOf()
Profiling reveals that >87% of the run time of our test suite is spent
in this tiny method. Apparently, DOMNodeList::item() is extremely slow
(possibly it's linear time instead of constant time?).

Profiled using XDebug and KCacheGrind:
https://phabricator.wikimedia.org/F31815264

We can calculate the child's index in its parent by counting its
precending siblings instead, which turns out to be much faster.

Before:
 1. 275444ms to run DiscussionToolsCommentParserTest:testGetComments with data set #2
 2. 12668ms to run DiscussionToolsCommentParserTest:testGetComments with data set #3
 ...
After:
 1. 9545ms to run DiscussionToolsCommentParserTest:testGetComments with data set #2
 2. 5549ms to run DiscussionToolsCommentParserTest:testGetComments with data set #3
 ...

That's still kind of slow but now it's bearable to run the test suite.

Change-Id: I49155f7aa2e231a9a20bf282cf6aaa28fc902e0b
2020-05-13 02:56:39 +02:00

222 lines
6.5 KiB
PHP

<?php
use Wikimedia\TestingAccessWrapper;
/**
* @coversDefaultClass DiscussionToolsCommentParser
*/
class DiscussionToolsCommentParserTest extends DiscussionToolsTestCase {
private static function getOffsetPath( $ancestor, $node, $nodeOffset ) {
$path = [ $nodeOffset ];
while ( $node !== $ancestor ) {
if ( !$node->parentNode ) {
throw new Error( 'Not a descendant' );
}
array_unshift( $path, DiscussionToolsCommentParser::childIndexOf( $node ) );
$node = $node->parentNode;
}
return implode( '/', $path );
}
private static function simplify( $parent ) {
unset( $parent['range'] );
unset( $parent['signatureRanges'] );
foreach ( $parent['replies'] as $i => $reply ) {
$parent['replies'][$i] = self::simplify( $reply );
}
return $parent;
}
private static function serializeComments( &$parent, $root ) {
unset( $parent->parent );
// Can't serialize the DOM nodes involved in the range,
// instead use their offsets within their parent nodes
$parent->range = [
self::getOffsetPath( $root, $parent->range->startContainer, $parent->range->startOffset ),
self::getOffsetPath( $root, $parent->range->endContainer, $parent->range->endOffset )
];
if ( isset( $parent->signatureRanges ) ) {
$parent->signatureRanges = array_map( function ( $range ) use ( $root ) {
return [
self::getOffsetPath( $root, $range->startContainer, $range->startOffset ),
self::getOffsetPath( $root, $range->endContainer, $range->endOffset )
];
}, $parent->signatureRanges );
}
foreach ( $parent->replies as $reply ) {
self::serializeComments( $reply, $root );
}
}
/**
* @dataProvider provideTimestampRegexps
* @covers ::getTimestampRegexp
*/
public function testGetTimestampRegexp( $format, $expected, $message ) {
$parser = TestingAccessWrapper::newFromObject(
DiscussionToolsCommentParser::newFromGlobalState()
);
// HACK: Fix differences between JS & PHP regexes
// TODO: We may just have to have two version in the test data
$expected = preg_replace( '/\\\\u([0-9A-F]+)/', '\\\\x{$1}', $expected );
$expected = str_replace( ':', '\:', $expected );
$expected = '/' . $expected . '/u';
$result = $parser->getTimestampRegexp( $format, '\\d', [ 'UTC' => 'UTC' ] );
self::assertSame( $expected, $result, $message );
}
public function provideTimestampRegexps() {
return self::getJson( './cases/timestamp-regex.json' );
}
/**
* @dataProvider provideTimestampParser
* @covers ::getTimestampParser
*/
public function testGetTimestampParser( $format, $data, $expected, $message ) {
$parser = TestingAccessWrapper::newFromObject(
DiscussionToolsCommentParser::newFromGlobalState()
);
$expected = new DateTimeImmutable( $expected );
$tsParser = $parser->getTimestampParser( $format, null, 'UTC', [ 'UTC' => 'UTC' ] );
self::assertEquals( $expected, $tsParser( $data ), $message );
}
public function provideTimestampParser() {
return self::getJson( './cases/timestamp-parser.json' );
}
/**
* @dataProvider provideTimestampParserDST
* @covers ::getTimestampParser
*/
public function testGetTimestampParserDST(
$sample, $expected, $expectedUtc, $format, $timezone, $timezoneAbbrs, $message
) {
$parser = TestingAccessWrapper::newFromObject(
DiscussionToolsCommentParser::newFromGlobalState()
);
$regexp = $parser->getTimestampRegexp( $format, '\\d', $timezoneAbbrs );
$tsParser = $parser->getTimestampParser( $format, null, $timezone, $timezoneAbbrs );
$expected = new DateTimeImmutable( $expected );
$expectedUtc = new DateTimeImmutable( $expectedUtc );
preg_match( $regexp, $sample, $match, PREG_OFFSET_CAPTURE );
$date = $tsParser( $match );
self::assertEquals( $expected, $date, $message );
self::assertEquals( $expectedUtc, $date, $message );
}
public function provideTimestampParserDST() {
return self::getJson( './cases/timestamp-parser-dst.json' );
}
/**
* @dataProvider provideAuthors
* @covers ::getAuthors
*/
public function testGetAuthors( $thread, $expected ) {
$parser = DiscussionToolsCommentParser::newFromGlobalState();
self::assertEquals( $expected, $parser->getAuthors( $thread ) );
}
public function provideAuthors() {
return [
[
'thread' => (object)[
'replies' => [
(object)[
'author' => 'Eve',
'replies' => []
],
(object)[
'author' => 'Bob',
'replies' => [
(object)[
'author' => 'Alice',
'replies' => []
]
]
]
]
],
'expected' => [ 'Alice', 'Bob', 'Eve' ]
]
];
}
/**
* @dataProvider provideComments
* @covers ::getComments
* @covers ::groupThreads
*/
public function testGetComments( $name, $dom, $expected, $config, $data ) {
$dom = self::getHtml( $dom );
$expected = self::getJson( $expected );
$config = self::getJson( $config );
$data = self::getJson( $data );
$this->setupEnv( $config, $data );
$parserOrig = self::createParser( $data );
$parser = TestingAccessWrapper::newFromObject( $parserOrig );
$doc = self::createDocument( $dom );
$container = $doc->documentElement->childNodes[0];
$comments = $parserOrig->getComments( $container );
$threads = $parserOrig->groupThreads( $comments );
$processedThreads = [];
foreach ( $threads as $i => $thread ) {
self::serializeComments( $thread, $container );
$thread = json_decode( json_encode( $thread ), true );
// Ignore ranges for now
$thread = self::simplify( $thread );
$expected[$i] = self::simplify( $expected[$i] );
$processedThreads[] = $thread;
self::assertEquals( $expected[$i], $processedThreads[$i], $name . ' section ' . $i );
}
}
public function provideComments() {
return [
// passes with ranges
self::getJson( './cases/comments.json' )[0],
// passes without ranges
self::getJson( './cases/comments.json' )[1],
// passes without ranges but very slow
self::getJson( './cases/comments.json' )[2],
// passes without ranges but very slow
self::getJson( './cases/comments.json' )[3],
// passes with ranges
self::getJson( './cases/comments.json' )[4],
// passes without ranges
self::getJson( './cases/comments.json' )[5],
// passes without ranges
self::getJson( './cases/comments.json' )[6],
// passes without ranges
self::getJson( './cases/comments.json' )[7],
// passes with ranges
self::getJson( './cases/comments.json' )[8],
// passes with ranges
self::getJson( './cases/comments.json' )[9],
// passes with ranges
self::getJson( './cases/comments.json' )[10]
];
}
}