Merge "Fix separation of text into sentences."

This commit is contained in:
jenkins-bot 2016-04-01 20:45:08 +00:00 committed by Gerrit Code Review
commit 647c39734a
2 changed files with 7 additions and 1 deletions

View file

@ -80,7 +80,7 @@ class ExtractFormatter extends HtmlFormatter {
public static function getFirstSentences( $text, $requestedSentenceCount ) { public static function getFirstSentences( $text, $requestedSentenceCount ) {
// Based on code from OpenSearchXml by Brion Vibber // Based on code from OpenSearchXml by Brion Vibber
$endchars = array( $endchars = array(
'[^\p{Lu}]\.\s', '\!\s', '\?\s', // regular ASCII '[^\p{Lu}]\.[ \n]', '\![ \n]', '\?[ \n]', // regular ASCII
'。', // full-width ideographic full-stop '。', // full-width ideographic full-stop
'', '', '', // double-width roman forms '', '', '', // double-width roman forms
'。', // half-width ideographic full stop '。', // half-width ideographic full stop

View file

@ -115,6 +115,12 @@ class ExtractFormatterTest extends MediaWikiTestCase {
1, 1,
'P.J. Harvey is a singer.', 'P.J. Harvey is a singer.',
), ),
// Bug T115817 - Non-breaking space is not a delimiter
array(
html_entity_decode( 'Pigeons (lat. Columbidae) are birds. They primarily feed on seeds.' ),
1,
html_entity_decode( 'Pigeons (lat. Columbidae) are birds.' ),
),
); );
} }