2013-12-17 21:20:13 +00:00
< ? php
2014-12-31 23:02:05 +00:00
use TextExtracts\ExtractFormatter ;
2013-12-17 21:20:13 +00:00
/**
* @ group TextExtracts
*/
class ExtractFormatterTest extends MediaWikiTestCase {
/**
* @ dataProvider provideExtracts
*/
2016-08-15 20:30:05 +00:00
public function testExtracts ( $expected , $text , $plainText ) {
2013-12-17 21:20:13 +00:00
$title = Title :: newFromText ( 'Test' );
$po = new ParserOptions ();
$po -> setEditSection ( true );
2014-08-13 06:18:34 +00:00
$config = ConfigFactory :: getDefaultInstance () -> makeConfig ( 'textextracts' );
$fmt = new ExtractFormatter ( $text , $plainText , $config );
2013-12-17 21:20:13 +00:00
$fmt -> remove ( '.metadata' ); // Will be added via $wgExtractsRemoveClasses on WMF
$text = trim ( $fmt -> getText () );
$this -> assertEquals ( $expected , $text );
}
public function provideExtracts () {
2016-08-15 20:30:05 +00:00
$dutch = '<b>Dutch</b> (<span class="unicode haudio" style="white-space:nowrap;"><span class="fn"><a href="/wiki/File:Nl-Nederlands.ogg" title="About this sound"><img alt="About this sound" src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/11px-Loudspeaker.svg.png" width="11" height="11" srcset="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/17px-Loudspeaker.svg.png 1.5x, https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/22px-Loudspeaker.svg.png 2x" /></a> <a href="https://upload.wikimedia.org/wikipedia/commons/d/db/Nl-Nederlands.ogg" class="internal" title="Nl-Nederlands.ogg"><i>Nederlands</i></a></span> <small class="metadata audiolinkinfo" style="cursor:help;">(<a href="/w/index.php?title=Wikipedia:Media_help&action=edit&redlink=1" class="new" title="Wikipedia:Media help (page does not exist)"><span style="cursor:help;">help</span></a>·<a href="/wiki/File:Nl-Nederlands.ogg" title="File:Nl-Nederlands.ogg"><span style="cursor:help;">info</span></a>)</small></span>) is a <a href="/w/index.php?title=West_Germanic_languages&action=edit&redlink=1" class="new" title="West Germanic languages (page does not exist)">West Germanic language</a> and the native language of most of the population of the <a href="/w/index.php?title=Netherlands&action=edit&redlink=1" class="new" title="Netherlands (page does not exist)">Netherlands</a>' ;
2015-01-08 12:50:23 +00:00
2013-12-17 21:20:13 +00:00
return array (
array (
" Dutch ( Nederlands ) is a West Germanic language and the native language of most of the population of the Netherlands " ,
$dutch ,
true ,
),
2015-01-08 12:50:23 +00:00
array (
2016-08-15 20:30:05 +00:00
" <span><span lang= \" baz \" >qux</span></span> " ,
2015-01-08 12:50:23 +00:00
'<span class="foo"><span lang="baz">qux</span></span>' ,
false ,
),
array (
2016-08-15 20:30:05 +00:00
" <span><span lang= \" baz \" >qux</span></span> " ,
2015-01-08 12:50:23 +00:00
'<span style="foo: bar;"><span lang="baz">qux</span></span>' ,
false ,
),
array (
2016-08-15 20:30:05 +00:00
" <span><span lang= \" qux \" >quux</span></span> " ,
2015-01-08 12:50:23 +00:00
'<span class="foo"><span style="bar: baz;" lang="qux">quux</span></span>' ,
false ,
),
2013-12-17 21:20:13 +00:00
);
}
2014-04-23 01:12:44 +00:00
/**
* @ dataProvider provideGetFirstSentences
* @ param $text
* @ param $sentences
* @ param $expected
*/
public function testGetFirstSentences ( $text , $sentences , $expected ) {
$this -> assertEquals ( $expected , ExtractFormatter :: getFirstSentences ( $text , $sentences ) );
}
public function provideGetFirstSentences () {
return array (
array (
2015-11-18 05:46:36 +00:00
'Foo is a bar. Such a smart boy. But completely useless.' ,
2014-04-23 01:12:44 +00:00
2 ,
2015-11-18 05:46:36 +00:00
'Foo is a bar. Such a smart boy.' ,
2014-04-23 01:12:44 +00:00
),
array (
2015-11-18 05:46:36 +00:00
'Foo is a bar. Such a smart boy. But completely useless.' ,
2014-04-23 01:12:44 +00:00
1 ,
2015-11-18 05:46:36 +00:00
'Foo is a bar.' ,
2014-04-23 01:12:44 +00:00
),
2016-04-10 23:08:14 +00:00
array (
'Foo is a bar. Such a smart boy.' ,
2 ,
'Foo is a bar. Such a smart boy.' ,
),
2014-04-23 01:12:44 +00:00
array (
2015-11-18 05:46:36 +00:00
'Foo is a bar.' ,
2014-04-23 01:12:44 +00:00
1 ,
2015-11-18 05:46:36 +00:00
'Foo is a bar.' ,
2014-04-23 01:12:44 +00:00
),
array (
2015-11-18 05:46:36 +00:00
'Foo is a bar.' ,
2014-04-23 01:12:44 +00:00
2 ,
2015-11-18 05:46:36 +00:00
'Foo is a bar.' ,
2014-04-23 01:12:44 +00:00
),
array (
'' ,
1 ,
'' ,
),
2015-11-18 05:46:36 +00:00
// Exclamation points too!!!
array (
'Foo is a bar! Such a smart boy! But completely useless!' ,
1 ,
'Foo is a bar!' ,
),
// A tricky one
array (
" Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with. Polyvinyl acetate, however, is another story. " ,
1 ,
" Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with. " ,
),
// Bug T118621
array (
'Foo was born in 1977. He enjoys listening to Siouxsie and the Banshees.' ,
1 ,
'Foo was born in 1977.' ,
),
2015-11-30 08:59:16 +00:00
// Bug T115795 - Test no cropping after initials
2014-04-23 01:12:44 +00:00
array (
'P.J. Harvey is a singer. She is awesome!' ,
1 ,
'P.J. Harvey is a singer.' ,
2015-11-30 08:59:16 +00:00
),
2016-03-28 22:29:16 +00:00
// Bug T115817 - Non-breaking space is not a delimiter
array (
html_entity_decode ( 'Pigeons (lat. Columbidae) are birds. They primarily feed on seeds.' ),
1 ,
html_entity_decode ( 'Pigeons (lat. Columbidae) are birds.' ),
),
2014-04-23 01:12:44 +00:00
);
}
/**
* @ dataProvider provideGetFirstChars
* @ param $text
* @ param $chars
* @ param $expected
*/
public function testGetFirstChars ( $text , $chars , $expected ) {
$this -> assertEquals ( $expected , ExtractFormatter :: getFirstChars ( $text , $chars ) );
}
public function provideGetFirstChars () {
$text = 'Lullzy lulz are lullzy!' ;
return array (
//array( $text, 0, '' ),
array ( $text , 100 , $text ),
array ( $text , 1 , 'Lullzy' ),
array ( $text , 6 , 'Lullzy' ),
//array( $text, 7, 'Lullzy' ),
array ( $text , 8 , 'Lullzy lulz' ),
);
}
}