2020-05-11 15:52:06 +00:00
|
|
|
<?php
|
|
|
|
|
2020-05-14 22:44:49 +00:00
|
|
|
namespace MediaWiki\Extension\DiscussionTools\Tests;
|
|
|
|
|
2022-10-11 22:41:17 +00:00
|
|
|
use FormatJson;
|
2020-05-14 22:44:49 +00:00
|
|
|
use MediaWiki\Extension\DiscussionTools\CommentParser;
|
2020-05-11 15:52:06 +00:00
|
|
|
use MediaWiki\MediaWikiServices;
|
2021-07-29 02:16:15 +00:00
|
|
|
use Wikimedia\Parsoid\DOM\Document;
|
2022-03-09 19:32:51 +00:00
|
|
|
use Wikimedia\Parsoid\DOM\Element;
|
2020-07-30 23:34:56 +00:00
|
|
|
use Wikimedia\Parsoid\Utils\DOMCompat;
|
2020-06-16 20:13:31 +00:00
|
|
|
use Wikimedia\Parsoid\Utils\DOMUtils;
|
2020-05-11 15:52:06 +00:00
|
|
|
|
2021-02-02 14:12:51 +00:00
|
|
|
trait TestUtils {
|
2020-05-11 15:52:06 +00:00
|
|
|
|
|
|
|
/**
|
2022-03-09 19:32:51 +00:00
|
|
|
* Create a Document from a string.
|
2020-05-11 15:52:06 +00:00
|
|
|
*
|
|
|
|
* @param string $html
|
2021-07-29 02:16:15 +00:00
|
|
|
* @return Document
|
2020-05-11 15:52:06 +00:00
|
|
|
*/
|
2021-07-29 02:16:15 +00:00
|
|
|
protected static function createDocument( string $html ): Document {
|
2022-03-09 19:32:51 +00:00
|
|
|
return DOMUtils::parseHTML( $html );
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the node that is expected to contain thread items.
|
|
|
|
*
|
|
|
|
* @param Document $doc
|
|
|
|
* @return Element
|
|
|
|
*/
|
|
|
|
protected static function getThreadContainer( Document $doc ): Element {
|
|
|
|
// In tests created from Parsoid output, comments are contained directly in <body>.
|
|
|
|
// In tests created from old parser output, comments are contained in <div class="mw-parser-output">.
|
|
|
|
$body = DOMCompat::getBody( $doc );
|
|
|
|
$wrapper = DOMCompat::querySelector( $body, 'div.mw-parser-output' );
|
|
|
|
return $wrapper ?: $body;
|
2020-05-11 15:52:06 +00:00
|
|
|
}
|
|
|
|
|
2021-02-27 00:15:42 +00:00
|
|
|
/**
|
|
|
|
* Get text from path
|
|
|
|
*
|
|
|
|
* @param string $relativePath
|
|
|
|
* @return string
|
|
|
|
*/
|
2021-07-22 07:25:13 +00:00
|
|
|
protected static function getText( string $relativePath ): string {
|
2021-02-27 00:15:42 +00:00
|
|
|
return file_get_contents( __DIR__ . '/../' . $relativePath );
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Write text to path
|
|
|
|
*
|
|
|
|
* @param string $relativePath
|
|
|
|
* @param string $text
|
|
|
|
*/
|
2021-07-22 07:25:13 +00:00
|
|
|
protected static function overwriteTextFile( string $relativePath, string $text ): void {
|
2021-02-27 00:15:42 +00:00
|
|
|
file_put_contents( __DIR__ . '/../' . $relativePath, $text );
|
|
|
|
}
|
|
|
|
|
2020-05-11 15:52:06 +00:00
|
|
|
/**
|
|
|
|
* Get parsed JSON from path
|
|
|
|
*
|
|
|
|
* @param string $relativePath
|
2020-05-19 19:01:03 +00:00
|
|
|
* @param bool $assoc See json_decode()
|
2020-05-11 15:52:06 +00:00
|
|
|
* @return array
|
|
|
|
*/
|
2021-07-22 07:25:13 +00:00
|
|
|
protected static function getJson( string $relativePath, bool $assoc = true ): array {
|
2020-05-11 15:52:06 +00:00
|
|
|
$json = json_decode(
|
2020-05-18 20:07:00 +00:00
|
|
|
file_get_contents( __DIR__ . '/' . $relativePath ),
|
2020-05-19 19:01:03 +00:00
|
|
|
$assoc
|
2020-05-11 15:52:06 +00:00
|
|
|
);
|
|
|
|
return $json;
|
|
|
|
}
|
|
|
|
|
2020-07-30 23:34:56 +00:00
|
|
|
/**
|
|
|
|
* Write JSON to path
|
|
|
|
*
|
|
|
|
* @param string $relativePath
|
|
|
|
* @param array $data
|
|
|
|
*/
|
2021-07-22 07:25:13 +00:00
|
|
|
protected static function overwriteJsonFile( string $relativePath, array $data ): void {
|
2022-10-11 22:41:17 +00:00
|
|
|
$json = FormatJson::encode( $data, "\t", FormatJson::ALL_OK );
|
2020-07-30 23:34:56 +00:00
|
|
|
file_put_contents( __DIR__ . '/' . $relativePath, $json . "\n" );
|
|
|
|
}
|
|
|
|
|
2020-05-11 15:52:06 +00:00
|
|
|
/**
|
|
|
|
* Get HTML from path
|
|
|
|
*
|
|
|
|
* @param string $relativePath
|
|
|
|
* @return string
|
|
|
|
*/
|
2021-07-22 07:25:13 +00:00
|
|
|
protected static function getHtml( string $relativePath ): string {
|
2022-03-09 19:32:51 +00:00
|
|
|
return file_get_contents( __DIR__ . '/../' . $relativePath );
|
2020-05-11 15:52:06 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 23:34:56 +00:00
|
|
|
/**
|
|
|
|
* Write HTML to path
|
|
|
|
*
|
|
|
|
* @param string $relPath
|
2022-03-09 19:32:51 +00:00
|
|
|
* @param Element $container
|
2020-07-30 23:34:56 +00:00
|
|
|
* @param string $origRelPath
|
|
|
|
*/
|
2022-03-09 19:32:51 +00:00
|
|
|
protected static function overwriteHtmlFile( string $relPath, Element $container, string $origRelPath ): void {
|
2020-07-30 23:34:56 +00:00
|
|
|
// Do not use $doc->saveHtml(), it outputs an awful soup of HTML entities for documents with
|
|
|
|
// non-ASCII characters
|
2020-08-10 21:31:49 +00:00
|
|
|
$html = file_get_contents( __DIR__ . '/../' . $origRelPath );
|
2020-07-30 23:34:56 +00:00
|
|
|
|
2022-03-09 19:32:51 +00:00
|
|
|
$newInnerHtml = DOMCompat::getInnerHTML( $container );
|
|
|
|
|
|
|
|
if ( strtolower( $container->tagName ) === 'body' ) {
|
|
|
|
// Apparently <body> innerHTML always has a trailing newline, even if the source HTML did not,
|
|
|
|
// and we need to preserve whatever whitespace was there to avoid test failures
|
|
|
|
preg_match( '`(\s*)(</body>|\z)`s', $html, $matches );
|
|
|
|
$newInnerHtml = rtrim( $newInnerHtml ) . $matches[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Quote \ and $ in the replacement text
|
|
|
|
$quotedNewInnerHtml = strtr( $newInnerHtml, [ '\\' => '\\\\', '$' => '\\$' ] );
|
|
|
|
|
|
|
|
if ( strtolower( $container->tagName ) === 'body' ) {
|
2023-05-19 07:36:50 +00:00
|
|
|
if ( str_contains( $html, '<body' ) ) {
|
2022-03-09 19:32:51 +00:00
|
|
|
$html = preg_replace(
|
|
|
|
'`(<body[^>]*>)(.*)(</body>)`s',
|
|
|
|
'$1' . $quotedNewInnerHtml . '$3',
|
|
|
|
$html
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
$html = $newInnerHtml;
|
|
|
|
}
|
|
|
|
} else {
|
2020-07-30 23:34:56 +00:00
|
|
|
$html = preg_replace(
|
2022-03-09 19:32:51 +00:00
|
|
|
'`(<div class="mw-parser-output">)(.*)(</div>)`s',
|
|
|
|
'$1' . $quotedNewInnerHtml . '$3',
|
2020-07-30 23:34:56 +00:00
|
|
|
$html
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-08-10 21:31:49 +00:00
|
|
|
file_put_contents( __DIR__ . '/../' . $relPath, $html );
|
2020-07-30 23:34:56 +00:00
|
|
|
}
|
|
|
|
|
2020-05-11 15:52:06 +00:00
|
|
|
/**
|
2020-10-14 20:14:59 +00:00
|
|
|
* Create a comment parser
|
2020-05-11 15:52:06 +00:00
|
|
|
*
|
|
|
|
* @param array $data
|
2020-05-14 22:44:49 +00:00
|
|
|
* @return CommentParser
|
2020-05-11 15:52:06 +00:00
|
|
|
*/
|
Change CommentParser into a service
Goal:
-----
To have a method like CommentParser::parse(), which just takes a node
to parse and a title and returns plain data, so that we don't need to
keep track of the config to construct a CommentParser object (the
required config like content language is provided by services) and
we don't need to keep that object around after parsing.
Changes:
--------
CommentParser.php:
* …is now a service. Constructor only takes services as arguments.
The node and title are passed to a new parse() method.
* parse() should return plain data, but I split this part to a separate
patch for ease of review: I49bfe019aa460651447fd383f73eafa9d7180a92.
* CommentParser still cheats and accesses global state in a few places,
e.g. calling Title::makeTitleSafe or CommentUtils::getTitleFromUrl,
so we can't turn its tests into true unit tests. This work is left
for future commits.
LanguageData.php:
* …is now a service, instead of a static class.
Parser.js:
* …is not a real service, but it's changed to behave in a similar way.
Constructor takes only the required config as argument,
and node and title are instead passed to a new parse() method.
CommentParserTest.php:
parser.test.js:
* Can be simplified, now that we don't need a useless node and title
to test internal methods that don't use them.
testUtils.js:
* Can be simplified, now that we don't need to override internal
ResourceLoader stuff just to change the parser config.
Change-Id: Iadb7757debe000025e52770ca51ebcf24ca8ee66
2022-02-19 02:43:21 +00:00
|
|
|
public static function createParser( array $data ): CommentParser {
|
2020-05-11 15:52:06 +00:00
|
|
|
$services = MediaWikiServices::getInstance();
|
2020-05-14 22:44:49 +00:00
|
|
|
return new CommentParser(
|
2020-05-11 15:52:06 +00:00
|
|
|
$services->getMainConfig(),
|
2022-02-21 17:39:36 +00:00
|
|
|
$services->getContentLanguage(),
|
|
|
|
$services->getLanguageConverterFactory(),
|
2022-02-21 22:07:38 +00:00
|
|
|
new MockLanguageData( $data ),
|
|
|
|
$services->getTitleParser()
|
2020-05-11 15:52:06 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|