remove( [ 'div', '.metadata' ] ); $text = trim( $fmt->getText() ); $this->assertSame( $expected, $text ); } public function provideExtracts() { // phpcs:ignore Generic.Files.LineLength $dutch = 'Dutch ( Nederlands (help·info)) is a West Germanic language and the native language of most of the population of the Netherlands'; $tocText = 'Lead
Section text
'; return [ [ 'Dutch ( Nederlands ) is a West Germanic language and the native language of ' . 'most of the population of the Netherlands', $dutch, true, ], 'HTML cleanup in HTML mode' => [ "\u{00A0}A & B", " A & B\r\n", false ], 'HTML cleanup in plain text mode' => [ 'A & B', " A & B\r\n", true ], [ "qux", 'qux', false, ], [ "qux", 'qux', false, ], [ "quux", 'quux', false, ], [ // Verify that TOC is properly removed (HTML mode) "Lead\nSection text
", $tocText, false, ], [ // Verify that TOC is properly removed (plain text mode) "Lead\n\n\x01\x021\2\1Section\nSection text", $tocText, true, ], ]; } /** * @dataProvider provideGetFirstSentences * @param string $text * @param string $sentences * @param string $expected */ public function testGetFirstSentences( $text, $sentences, $expected ) { $this->assertSame( $expected, ExtractFormatter::getFirstSentences( $text, $sentences ) ); } public function provideGetFirstSentences() { $longLine = str_repeat( 'word ', 1000000 ); return [ [ 'Foo is a bar. Such a smart boy. But completely useless.', 2, 'Foo is a bar. Such a smart boy.', ], [ 'Foo is a bar. Such a smart boy. But completely useless.', 1, 'Foo is a bar.', ], [ 'Foo is a bar. Such a smart boy.', 2, 'Foo is a bar. Such a smart boy.', ], [ 'Foo is a bar.', 1, 'Foo is a bar.', ], [ 'Foo is a bar.', 2, 'Foo is a bar.', ], [ '', 1, '', ], // Exclamation points too!!! [ 'Foo is a bar! Such a smart boy! But completely useless!', 1, 'Foo is a bar!', ], // A tricky one [ "Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with. " . "Polyvinyl acetate, however, is another story.", 1, "Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with.", ], // No clear sentences [ "foo\nbar\nbaz", 2, 'foo', ], // Bug T118621 [ 'Foo was born in 1977. He enjoys listening to Siouxsie and the Banshees.', 1, 'Foo was born in 1977.', ], // Bug T115795 - Test no cropping after initials [ 'P.J. Harvey is a singer. She is awesome!', 1, 'P.J. Harvey is a singer.', ], // Bug T115817 - Non-breaking space is not a delimiter [ html_entity_decode( 'Pigeons (lat. Columbidae) are birds. ' . 'They primarily feed on seeds.' ), 1, html_entity_decode( 'Pigeons (lat. Columbidae) are birds.' ), ], // Bug T145231 - various problems with regexes [ $longLine, 3, trim( $longLine ), ], [ str_repeat( 'Sentence. ', 70000 ), 65536, trim( str_repeat( 'Sentence. ', 65536 ) ), ], ]; } /** * @dataProvider provideGetFirstChars * @param string $text * @param string $chars * @param string $expected */ public function testGetFirstChars( $text, $chars, $expected ) { $this->assertSame( $expected, ExtractFormatter::getFirstChars( $text, $chars ) ); } public function provideGetFirstChars() { $text = 'Lullzy lulz are lullzy!'; $html = 'foo