assertSame( $expected, $truncator->getFirstSentences( $text, $sentences ) ); } public function provideGetFirstSentences() { $longLine = str_repeat( 'word ', 1000000 ); return [ [ 'Foo is a bar. Such a smart boy. But completely useless.', 2, 'Foo is a bar. Such a smart boy.', ], [ 'Foo is a bar. Such a smart boy. But completely useless.', 1, 'Foo is a bar.', ], [ 'Foo is a bar. Such a smart boy.', 2, 'Foo is a bar. Such a smart boy.', ], [ 'Foo is a bar.', 1, 'Foo is a bar.', ], [ 'Foo is a bar.', 2, 'Foo is a bar.', ], [ '', 1, '', ], '0 sentences mean empty result' => [ 'Foo is a bar. Such a smart boy.', 0, '', ], "Don't explode on negative input" => [ 'Foo is a bar. Such a smart boy.', -1, '', ], 'More sentences requested than is available' => [ 'Foo is a bar. Such a smart boy.', 3, 'Foo is a bar. Such a smart boy.', ], // Exclamation points too!!! [ 'Foo is a bar! Such a smart boy! But completely useless!', 1, 'Foo is a bar!', ], // A tricky one [ "Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with. " . "Polyvinyl acetate, however, is another story.", 1, "Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with.", ], // No clear sentences [ "foo\nbar\nbaz", 2, 'foo', ], // Bug T118621 [ 'Foo was born in 1977. He enjoys listening to Siouxsie and the Banshees.', 1, 'Foo was born in 1977.', ], // Bug T115795 - Test no cropping after initials [ 'P.J. Harvey is a singer. She is awesome!', 1, 'P.J. Harvey is a singer.', ], // Bug T115817 - Non-breaking space is not a delimiter [ html_entity_decode( 'Pigeons (lat. Columbidae) are birds. ' . 'They primarily feed on seeds.' ), 1, html_entity_decode( 'Pigeons (lat. Columbidae) are birds.' ), ], // Bug T145231 - various problems with regexes [ $longLine, 3, trim( $longLine ), ], [ str_repeat( 'Sentence. ', 70000 ), 65536, trim( str_repeat( 'Sentence. ', 65536 ) ), ], 'Preserve whitespace before end character' => [ 'Aa . Bb', 1, 'Aa .', ], ]; } /** * @dataProvider provideGetFirstChars * @param string $text * @param string $chars * @param string $expected */ public function testGetFirstChars( $text, $chars, $expected ) { $truncator = new TextTruncator( false ); $this->assertSame( $expected, $truncator->getFirstChars( $text, $chars ) ); } public function provideGetFirstChars() { $text = 'Lullzy lulz are lullzy!'; $html = 'foobar'; $longText = str_repeat( 'тест ', 50000 ); $longTextExpected = trim( str_repeat( 'тест ', 13108 ) ); return [ [ $text, -8, '' ], [ $text, 0, '' ], [ $text, 100, $text ], [ $text, 1, 'Lullzy' ], [ $text, 6, 'Lullzy' ], // [ $text, 7, 'Lullzy' ], [ $text, 8, 'Lullzy lulz' ], // HTML processing [ $html, 1, 'foo' ], // let HTML sanitizer clean it up later [ $html, 4, 'foo' ], [ $html, 12, 'foobar' ], [ $html, 13, 'foobar' ], [ $html, 16, 'foobar' ], [ $html, 17, 'foobar' ], // T143178 - previously, characters were extracted using regexps which failed when // requesting 64K chars or more. [ $longText, 65536, $longTextExpected ], ]; } public function testTidyIntegration() { $truncator = new TextTruncator( true ); $text = 'Aa. Bb.'; $this->assertSame( '

Aa.

', $truncator->getFirstSentences( $text, 1 ) ); $this->assertSame( '

Aa

', $truncator->getFirstChars( $text, 4 ) ); } }