mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/TextExtracts
synced 2024-12-12 16:25:17 +00:00
81fd92685a
I argue that the code fixing unclosed HTML tags is – even if optional – an integral part of the code that potentially breaks these HTML tags in the first place. Notice how much code disappears in the ApiQueryExtracts class. Additionally, the new approach uses an interface instead of a static function call that is impossible to mock and hard to test. Change-Id: Ic1a65995f4dba11d060a8738d642905cbfc79271
84 lines
3.3 KiB
PHP
84 lines
3.3 KiB
PHP
<?php
|
|
|
|
namespace TextExtracts\Test;
|
|
|
|
use MediaWikiTestCase;
|
|
use TextExtracts\ExtractFormatter;
|
|
|
|
/**
|
|
* @covers \TextExtracts\ExtractFormatter
|
|
* @group TextExtracts
|
|
*
|
|
* @license GPL-2.0-or-later
|
|
*/
|
|
class ExtractFormatterTest extends MediaWikiTestCase {
|
|
|
|
/**
|
|
* @dataProvider provideExtracts
|
|
*/
|
|
public function testExtracts( $expected, $text, $plainText ) {
|
|
$fmt = new ExtractFormatter( $text, $plainText );
|
|
// .metadata class will be added via $wgExtractsRemoveClasses on WMF
|
|
$fmt->remove( [ 'div', '.metadata' ] );
|
|
$text = $fmt->getText();
|
|
$this->assertSame( $expected, $text );
|
|
}
|
|
|
|
public function provideExtracts() {
|
|
// phpcs:ignore Generic.Files.LineLength
|
|
$dutch = '<b>Dutch</b> (<span class="unicode haudio" style="white-space:nowrap;"><span class="fn"><a href="/wiki/File:Nl-Nederlands.ogg" title="About this sound"><img alt="About this sound" src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/11px-Loudspeaker.svg.png" width="11" height="11" srcset="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/17px-Loudspeaker.svg.png 1.5x, https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/22px-Loudspeaker.svg.png 2x" /></a> <a href="https://upload.wikimedia.org/wikipedia/commons/d/db/Nl-Nederlands.ogg" class="internal" title="Nl-Nederlands.ogg"><i>Nederlands</i></a></span> <small class="metadata audiolinkinfo" style="cursor:help;">(<a href="/w/index.php?title=Wikipedia:Media_help&action=edit&redlink=1" class="new" title="Wikipedia:Media help (page does not exist)"><span style="cursor:help;">help</span></a>·<a href="/wiki/File:Nl-Nederlands.ogg" title="File:Nl-Nederlands.ogg"><span style="cursor:help;">info</span></a>)</small></span>) is a <a href="/w/index.php?title=West_Germanic_languages&action=edit&redlink=1" class="new" title="West Germanic languages (page does not exist)">West Germanic language</a> and the native language of most of the population of the <a href="/w/index.php?title=Netherlands&action=edit&redlink=1" class="new" title="Netherlands (page does not exist)">Netherlands</a>';
|
|
$tocText = 'Lead<div id="toc" class="toc">TOC goes here</div>
|
|
<h1>Section</h1>
|
|
<p>Section text</p>';
|
|
|
|
return [
|
|
[
|
|
'Dutch ( Nederlands ) is a West Germanic language and the native language of ' .
|
|
'most of the population of the Netherlands',
|
|
$dutch,
|
|
true,
|
|
],
|
|
|
|
'HTML cleanup in HTML mode' => [
|
|
"\u{00A0}A & <b>B</b>",
|
|
"
 <a>A</a> & <b>B</b>\r\n",
|
|
false
|
|
],
|
|
'HTML cleanup in plain text mode' => [
|
|
'A & B',
|
|
"
 <a>A</a> & <b>B</b>\r\n",
|
|
true
|
|
],
|
|
|
|
[
|
|
"<span><span lang=\"baz\">qux</span></span>",
|
|
'<span class="foo"><span lang="baz">qux</span></span>',
|
|
false,
|
|
],
|
|
[
|
|
"<span><span lang=\"baz\">qux</span></span>",
|
|
'<span style="foo: bar;"><span lang="baz">qux</span></span>',
|
|
false,
|
|
],
|
|
[
|
|
"<span><span lang=\"qux\">quux</span></span>",
|
|
'<span class="foo"><span style="bar: baz;" lang="qux">quux</span></span>',
|
|
false,
|
|
],
|
|
[
|
|
// Verify that TOC is properly removed (HTML mode)
|
|
"Lead\n<h1>Section</h1>\n<p>Section text</p>",
|
|
$tocText,
|
|
false,
|
|
],
|
|
[
|
|
// Verify that TOC is properly removed (plain text mode)
|
|
"Lead\n\n\x01\x021\2\1Section\nSection text",
|
|
$tocText,
|
|
true,
|
|
],
|
|
];
|
|
}
|
|
|
|
}
|