mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/TextExtracts
synced 2024-11-27 17:40:16 +00:00
60e1c5ad83
This piece of code is only relevant in case when: - the intro section is requested (either in plaintext or html); - the parse result for the full page is available in the parser cache; - the full extract is not available in the TextExtracts WAN cache; - the intro is also not available in the TextExtracts WAN cache. In this case getFirstSection() is called with the parser output, which is different from the the convertText() output it is called with in other code paths, and still contains <h*> tags. A quick regex is used to extract the first section. This stops at any <h2>. A TOC also contains a <h2> (which will be removed later via $wgExtractsRemoveClasses). This one needs to be ignored in case the TOC is placed before the first section using e.g. the __TOC__ keyword. The patch changes the regex so it ignores a h2 with id="mw-toc-heading", but keeps working in plaintext mode when <h*> tags are not present (the code path when the intro section is requested, and the full extract is available in the TextExtracts WAN cache but the intro extract isn't). Bug: T269967 Change-Id: I0a495d06cf1725744e556e81f17047fb53f53521
248 lines
6.4 KiB
PHP
248 lines
6.4 KiB
PHP
<?php
|
|
|
|
namespace TextExtracts\Test;
|
|
|
|
use ILanguageConverter;
|
|
use MediaWiki\Languages\LanguageConverterFactory;
|
|
use MediaWikiCoversValidator;
|
|
use TextExtracts\ApiQueryExtracts;
|
|
use Wikimedia\TestingAccessWrapper;
|
|
|
|
/**
|
|
* @covers \TextExtracts\ApiQueryExtracts
|
|
* @group TextExtracts
|
|
*
|
|
* @license GPL-2.0-or-later
|
|
*/
|
|
class ApiQueryExtractsTest extends \MediaWikiTestCase {
|
|
use MediaWikiCoversValidator;
|
|
|
|
private function newInstance() {
|
|
$config = new \HashConfig( [
|
|
'ParserCacheExpireTime' => \IExpiringStore::TTL_INDEFINITE,
|
|
] );
|
|
|
|
$configFactory = $this->createMock( \ConfigFactory::class );
|
|
$configFactory->method( 'makeConfig' )
|
|
->with( 'textextracts' )
|
|
->willReturn( $config );
|
|
|
|
$cache = new \WANObjectCache( [ 'cache' => new \HashBagOStuff() ] );
|
|
|
|
$context = $this->createMock( \IContextSource::class );
|
|
$context->method( 'getConfig' )
|
|
->willReturn( $config );
|
|
$context->method( 'msg' )
|
|
->willReturnCallback( function ( $key, ...$params ) {
|
|
$msg = $this->createMock( \Message::class );
|
|
$msg->method( 'text' )->willReturn( "($key)" );
|
|
return $msg;
|
|
} );
|
|
|
|
$main = $this->createMock( \ApiMain::class );
|
|
$main->expects( $this->once() )
|
|
->method( 'getContext' )
|
|
->willReturn( $context );
|
|
|
|
$query = $this->createMock( \ApiQuery::class );
|
|
$query->expects( $this->once() )
|
|
->method( 'getMain' )
|
|
->willReturn( $main );
|
|
|
|
$langConvFactory = $this->createMock( LanguageConverterFactory::class );
|
|
$langConvFactory->method( 'getLanguageConverter' )
|
|
->willReturn( $this->createMock( ILanguageConverter::class ) );
|
|
|
|
return new ApiQueryExtracts( $query, '', $configFactory, $cache, $langConvFactory );
|
|
}
|
|
|
|
public function testMemCacheHelpers() {
|
|
$title = $this->createMock( \Title::class );
|
|
$title->method( 'getPageLanguage' )
|
|
->willReturn( $this->createMock( \Language::class ) );
|
|
|
|
$page = $this->createMock( \WikiPage::class );
|
|
$page->method( 'getTitle' )
|
|
->willReturn( $title );
|
|
|
|
$text = 'Text to cache';
|
|
|
|
/** @var ApiQueryExtracts $instance */
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
// Default param values for this API module
|
|
$instance->params = [ 'intro' => false, 'plaintext' => false ];
|
|
|
|
$this->assertFalse( $instance->getFromCache( $page, false ), 'is not cached yet' );
|
|
|
|
$instance->setCache( $page, $text );
|
|
$instance->cache->clearProcessCache();
|
|
$this->assertSame( $text, $instance->getFromCache( $page, false ) );
|
|
}
|
|
|
|
public function testSelfDocumentation() {
|
|
/** @var ApiQueryExtracts $instance */
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
|
|
$this->assertIsString( $instance->getCacheMode( [] ) );
|
|
$this->assertNotEmpty( $instance->getExamplesMessages() );
|
|
$this->assertIsString( $instance->getHelpUrls() );
|
|
|
|
$params = $instance->getAllowedParams();
|
|
$this->assertIsArray( $params );
|
|
|
|
$this->assertSame( $params['chars'][\ApiBase::PARAM_MIN], 1 );
|
|
$this->assertSame( $params['chars'][\ApiBase::PARAM_MAX], 1200 );
|
|
|
|
$this->assertSame( $params['limit'][\ApiBase::PARAM_DFLT], 20 );
|
|
$this->assertSame( $params['limit'][\ApiBase::PARAM_TYPE], 'limit' );
|
|
$this->assertSame( $params['limit'][\ApiBase::PARAM_MIN], 1 );
|
|
$this->assertSame( $params['limit'][\ApiBase::PARAM_MAX], 20 );
|
|
$this->assertSame( $params['limit'][\ApiBase::PARAM_MAX2], 20 );
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideFirstSectionsToExtract
|
|
*/
|
|
public function testGetFirstSection( $text, $isPlainText, $expected ) {
|
|
/** @var ApiQueryExtracts $instance */
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
|
|
$this->assertSame( $expected, $instance->getFirstSection( $text, $isPlainText ) );
|
|
}
|
|
|
|
public function provideFirstSectionsToExtract() {
|
|
return [
|
|
'Plain text match' => [
|
|
"First\nsection \1\2... \1\2...",
|
|
true,
|
|
"First\nsection ",
|
|
],
|
|
'Plain text without a match' => [
|
|
'Example\1\2...',
|
|
true,
|
|
'Example\1\2...',
|
|
],
|
|
|
|
'HTML match' => [
|
|
"First\nsection <h1>...<h2>...",
|
|
false,
|
|
"First\nsection ",
|
|
],
|
|
'HTML without a match' => [
|
|
'Example <h11>...',
|
|
false,
|
|
'Example <h11>...',
|
|
],
|
|
'__TOC__ before intro (HTML)' => [
|
|
'<h2 id="mw-toc-heading">Contents</h2>Intro<h2>Actual heading</h2>...',
|
|
false,
|
|
'<h2 id="mw-toc-heading">Contents</h2>Intro',
|
|
],
|
|
'__TOC__ before intro (plaintext)' => [
|
|
"\1\2_\2\1<h2 id=\"mw-toc-heading\">Contents</h2>Intro\1\2_\2\1<h2>Actual heading</h2>...",
|
|
true,
|
|
"\1\2_\2\1<h2 id=\"mw-toc-heading\">Contents</h2>Intro",
|
|
],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideTextsToTruncate
|
|
*/
|
|
public function testTruncate( $text, array $params, $expected ) {
|
|
/** @var ApiQueryExtracts $instance */
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
$instance->params = $params + [ 'chars' => null, 'sentences' => null, 'plaintext' => true ];
|
|
|
|
$this->assertSame( $expected, $instance->truncate( $text ) );
|
|
}
|
|
|
|
public function provideTextsToTruncate() {
|
|
return [
|
|
[ '', [], '' ],
|
|
[ 'abc', [], 'abc' ],
|
|
[
|
|
'abc',
|
|
[ 'chars' => 1 ],
|
|
'abc'
|
|
],
|
|
[
|
|
'abc',
|
|
[ 'chars' => 1, 'plaintext' => false ],
|
|
'abc'
|
|
],
|
|
[
|
|
'abc',
|
|
[ 'sentences' => 1 ],
|
|
'abc'
|
|
],
|
|
[
|
|
'abc abc. xyz xyz.',
|
|
[ 'chars' => 1 ],
|
|
'abc(ellipsis)'
|
|
],
|
|
[
|
|
'abc abc. xyz xyz.',
|
|
[ 'sentences' => 1 ],
|
|
'abc abc.'
|
|
],
|
|
[
|
|
'abc abc. xyz xyz.',
|
|
[ 'chars' => 1000 ],
|
|
'abc abc. xyz xyz.'
|
|
],
|
|
[
|
|
'abc abc. xyz xyz.',
|
|
[ 'chars' => 1000, 'plaintext' => false ],
|
|
'abc abc. xyz xyz.'
|
|
],
|
|
[
|
|
'abc abc. xyz xyz.',
|
|
[ 'sentences' => 10 ],
|
|
'abc abc. xyz xyz.'
|
|
],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideSectionsToFormat
|
|
*/
|
|
public function testDoSections( $text, $format, $expected ) {
|
|
/** @var ApiQueryExtracts $instance */
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
$instance->params = [ 'sectionformat' => $format ];
|
|
|
|
$this->assertSame( $expected, $instance->doSections( $text ) );
|
|
}
|
|
|
|
public function provideSectionsToFormat() {
|
|
$level = 3;
|
|
$marker = "\1\2$level\2\1";
|
|
|
|
return [
|
|
'Raw' => [
|
|
"$marker Headline\t\nNext line",
|
|
'raw',
|
|
"$marker Headline\t\nNext line",
|
|
],
|
|
'Wiki text' => [
|
|
"$marker Headline\t\nNext line",
|
|
'wiki',
|
|
"\n=== Headline ===\nNext line",
|
|
],
|
|
'Plain text' => [
|
|
"$marker Headline\t\nNext line",
|
|
'plain',
|
|
"\nHeadline\nNext line",
|
|
],
|
|
|
|
'Multiple matches' => [
|
|
"${marker}First\n${marker}Second",
|
|
'wiki',
|
|
"\n=== First ===\n\n=== Second ===",
|
|
],
|
|
];
|
|
}
|
|
|
|
}
|