2017-05-19 13:37:26 +00:00
|
|
|
<?php
|
2019-02-11 16:32:20 +00:00
|
|
|
|
2017-05-19 13:37:26 +00:00
|
|
|
namespace TextExtracts\Test;
|
|
|
|
|
2021-02-27 13:52:34 +00:00
|
|
|
use ILanguageConverter;
|
2024-01-04 21:22:49 +00:00
|
|
|
use MediaWiki\Config\ConfigFactory;
|
|
|
|
use MediaWiki\Config\HashConfig;
|
2024-06-10 19:59:11 +00:00
|
|
|
use MediaWiki\Context\IContextSource;
|
2021-02-27 13:52:34 +00:00
|
|
|
use MediaWiki\Languages\LanguageConverterFactory;
|
2024-06-10 19:59:11 +00:00
|
|
|
use MediaWiki\Message\Message;
|
2023-08-19 04:19:49 +00:00
|
|
|
use MediaWiki\Title\Title;
|
2018-01-23 22:54:12 +00:00
|
|
|
use MediaWikiCoversValidator;
|
2017-05-19 13:37:26 +00:00
|
|
|
use TextExtracts\ApiQueryExtracts;
|
2023-08-06 21:28:48 +00:00
|
|
|
use Wikimedia\LightweightObjectStore\ExpirationAwareness;
|
2019-02-11 16:32:20 +00:00
|
|
|
use Wikimedia\TestingAccessWrapper;
|
2017-05-19 13:37:26 +00:00
|
|
|
|
|
|
|
/**
|
2018-01-23 22:54:12 +00:00
|
|
|
* @covers \TextExtracts\ApiQueryExtracts
|
2017-05-19 13:37:26 +00:00
|
|
|
* @group TextExtracts
|
2019-04-24 16:26:53 +00:00
|
|
|
*
|
|
|
|
* @license GPL-2.0-or-later
|
2017-05-19 13:37:26 +00:00
|
|
|
*/
|
2021-10-13 10:28:17 +00:00
|
|
|
class ApiQueryExtractsTest extends \MediaWikiIntegrationTestCase {
|
2018-01-23 22:54:12 +00:00
|
|
|
use MediaWikiCoversValidator;
|
|
|
|
|
2017-05-19 13:37:26 +00:00
|
|
|
private function newInstance() {
|
2024-01-04 21:22:49 +00:00
|
|
|
$config = new HashConfig( [
|
2023-08-06 21:28:48 +00:00
|
|
|
'ParserCacheExpireTime' => ExpirationAwareness::TTL_INDEFINITE,
|
2019-02-13 13:37:24 +00:00
|
|
|
] );
|
2017-05-19 13:37:26 +00:00
|
|
|
|
2024-01-04 21:22:49 +00:00
|
|
|
$configFactory = $this->createMock( ConfigFactory::class );
|
2021-03-31 20:38:36 +00:00
|
|
|
$configFactory->method( 'makeConfig' )
|
|
|
|
->with( 'textextracts' )
|
|
|
|
->willReturn( $config );
|
|
|
|
|
2020-02-07 02:51:55 +00:00
|
|
|
$cache = new \WANObjectCache( [ 'cache' => new \HashBagOStuff() ] );
|
|
|
|
|
2024-06-10 19:59:11 +00:00
|
|
|
$context = $this->createMock( IContextSource::class );
|
2019-02-13 13:37:24 +00:00
|
|
|
$context->method( 'getConfig' )
|
|
|
|
->willReturn( $config );
|
2021-01-08 08:03:05 +00:00
|
|
|
$context->method( 'msg' )
|
|
|
|
->willReturnCallback( function ( $key, ...$params ) {
|
2024-06-10 19:59:11 +00:00
|
|
|
$msg = $this->createMock( Message::class );
|
2021-01-08 08:03:05 +00:00
|
|
|
$msg->method( 'text' )->willReturn( "($key)" );
|
|
|
|
return $msg;
|
|
|
|
} );
|
2019-02-13 13:37:24 +00:00
|
|
|
|
|
|
|
$main = $this->createMock( \ApiMain::class );
|
2017-05-19 13:37:26 +00:00
|
|
|
$main->expects( $this->once() )
|
|
|
|
->method( 'getContext' )
|
2019-02-13 13:37:24 +00:00
|
|
|
->willReturn( $context );
|
2017-05-19 13:37:26 +00:00
|
|
|
|
2019-02-13 13:37:24 +00:00
|
|
|
$query = $this->createMock( \ApiQuery::class );
|
2017-05-19 13:37:26 +00:00
|
|
|
$query->expects( $this->once() )
|
|
|
|
->method( 'getMain' )
|
2019-02-13 13:37:24 +00:00
|
|
|
->willReturn( $main );
|
2017-05-19 13:37:26 +00:00
|
|
|
|
2022-10-03 17:38:44 +00:00
|
|
|
$langConv = $this->createMock( ILanguageConverter::class );
|
|
|
|
$langConv->method( 'getPreferredVariant' )
|
|
|
|
->willReturn( 'en' );
|
2021-02-27 13:52:34 +00:00
|
|
|
$langConvFactory = $this->createMock( LanguageConverterFactory::class );
|
|
|
|
$langConvFactory->method( 'getLanguageConverter' )
|
2022-10-03 17:38:44 +00:00
|
|
|
->willReturn( $langConv );
|
2021-02-27 13:52:34 +00:00
|
|
|
|
2021-12-14 21:58:51 +00:00
|
|
|
return new ApiQueryExtracts(
|
|
|
|
$query,
|
|
|
|
'',
|
|
|
|
$configFactory,
|
|
|
|
$cache,
|
|
|
|
$langConvFactory,
|
|
|
|
$this->getServiceContainer()->getWikiPageFactory()
|
|
|
|
);
|
2017-05-19 13:37:26 +00:00
|
|
|
}
|
|
|
|
|
2019-02-13 13:37:24 +00:00
|
|
|
public function testMemCacheHelpers() {
|
2023-08-19 04:19:49 +00:00
|
|
|
$title = $this->createMock( Title::class );
|
2019-02-13 13:37:24 +00:00
|
|
|
$title->method( 'getPageLanguage' )
|
|
|
|
->willReturn( $this->createMock( \Language::class ) );
|
|
|
|
|
|
|
|
$page = $this->createMock( \WikiPage::class );
|
|
|
|
$page->method( 'getTitle' )
|
|
|
|
->willReturn( $title );
|
2022-10-03 17:38:44 +00:00
|
|
|
$page->method( 'getId' )
|
|
|
|
->willReturn( 123 );
|
|
|
|
$page->method( 'getTouched' )
|
|
|
|
->willReturn( '20010101000000' );
|
2019-02-13 13:37:24 +00:00
|
|
|
|
|
|
|
$text = 'Text to cache';
|
|
|
|
|
|
|
|
/** @var ApiQueryExtracts $instance */
|
|
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
2020-03-13 21:08:54 +00:00
|
|
|
// Default param values for this API module
|
|
|
|
$instance->params = [ 'intro' => false, 'plaintext' => false ];
|
|
|
|
|
2019-02-13 13:37:24 +00:00
|
|
|
$this->assertFalse( $instance->getFromCache( $page, false ), 'is not cached yet' );
|
2020-02-07 02:51:55 +00:00
|
|
|
|
2019-02-13 13:37:24 +00:00
|
|
|
$instance->setCache( $page, $text );
|
2020-02-07 02:51:55 +00:00
|
|
|
$instance->cache->clearProcessCache();
|
2019-02-13 13:37:24 +00:00
|
|
|
$this->assertSame( $text, $instance->getFromCache( $page, false ) );
|
|
|
|
}
|
|
|
|
|
2019-02-11 16:32:20 +00:00
|
|
|
public function testSelfDocumentation() {
|
|
|
|
/** @var ApiQueryExtracts $instance */
|
|
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
|
|
|
2020-01-15 09:17:21 +00:00
|
|
|
$this->assertIsString( $instance->getCacheMode( [] ) );
|
2019-02-11 16:32:20 +00:00
|
|
|
$this->assertNotEmpty( $instance->getExamplesMessages() );
|
2020-01-15 09:17:21 +00:00
|
|
|
$this->assertIsString( $instance->getHelpUrls() );
|
2019-02-11 16:32:20 +00:00
|
|
|
|
2017-05-19 13:37:26 +00:00
|
|
|
$params = $instance->getAllowedParams();
|
2020-01-15 09:17:21 +00:00
|
|
|
$this->assertIsArray( $params );
|
2019-03-19 11:27:54 +00:00
|
|
|
|
2021-10-23 23:25:07 +00:00
|
|
|
$this->assertSame( 1, $params['chars'][\ApiBase::PARAM_MIN] );
|
|
|
|
$this->assertSame( 1200, $params['chars'][\ApiBase::PARAM_MAX] );
|
|
|
|
|
|
|
|
$this->assertSame( 20, $params['limit'][\ApiBase::PARAM_DFLT] );
|
|
|
|
$this->assertSame( 'limit', $params['limit'][\ApiBase::PARAM_TYPE] );
|
|
|
|
$this->assertSame( 1, $params['limit'][\ApiBase::PARAM_MIN] );
|
|
|
|
$this->assertSame( 20, $params['limit'][\ApiBase::PARAM_MAX] );
|
|
|
|
$this->assertSame( 20, $params['limit'][\ApiBase::PARAM_MAX2] );
|
2017-05-19 13:37:26 +00:00
|
|
|
}
|
2019-02-11 16:32:20 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @dataProvider provideFirstSectionsToExtract
|
|
|
|
*/
|
|
|
|
public function testGetFirstSection( $text, $isPlainText, $expected ) {
|
|
|
|
/** @var ApiQueryExtracts $instance */
|
|
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
|
|
|
|
|
|
$this->assertSame( $expected, $instance->getFirstSection( $text, $isPlainText ) );
|
|
|
|
}
|
|
|
|
|
2023-05-20 12:13:35 +00:00
|
|
|
public static function provideFirstSectionsToExtract() {
|
2019-02-11 16:32:20 +00:00
|
|
|
return [
|
|
|
|
'Plain text match' => [
|
|
|
|
"First\nsection \1\2... \1\2...",
|
|
|
|
true,
|
|
|
|
"First\nsection ",
|
|
|
|
],
|
|
|
|
'Plain text without a match' => [
|
|
|
|
'Example\1\2...',
|
|
|
|
true,
|
|
|
|
'Example\1\2...',
|
|
|
|
],
|
|
|
|
|
|
|
|
'HTML match' => [
|
|
|
|
"First\nsection <h1>...<h2>...",
|
|
|
|
false,
|
|
|
|
"First\nsection ",
|
|
|
|
],
|
|
|
|
'HTML without a match' => [
|
|
|
|
'Example <h11>...',
|
|
|
|
false,
|
|
|
|
'Example <h11>...',
|
|
|
|
],
|
2021-05-31 13:13:30 +00:00
|
|
|
'__TOC__ before intro (HTML)' => [
|
|
|
|
'<h2 id="mw-toc-heading">Contents</h2>Intro<h2>Actual heading</h2>...',
|
|
|
|
false,
|
|
|
|
'<h2 id="mw-toc-heading">Contents</h2>Intro',
|
|
|
|
],
|
|
|
|
'__TOC__ before intro (plaintext)' => [
|
|
|
|
"\1\2_\2\1<h2 id=\"mw-toc-heading\">Contents</h2>Intro\1\2_\2\1<h2>Actual heading</h2>...",
|
|
|
|
true,
|
|
|
|
"\1\2_\2\1<h2 id=\"mw-toc-heading\">Contents</h2>Intro",
|
|
|
|
],
|
2019-02-11 16:32:20 +00:00
|
|
|
];
|
|
|
|
}
|
|
|
|
|
2021-01-08 08:03:05 +00:00
|
|
|
/**
|
|
|
|
* @dataProvider provideTextsToTruncate
|
|
|
|
*/
|
|
|
|
public function testTruncate( $text, array $params, $expected ) {
|
|
|
|
/** @var ApiQueryExtracts $instance */
|
|
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
|
|
$instance->params = $params + [ 'chars' => null, 'sentences' => null, 'plaintext' => true ];
|
|
|
|
|
|
|
|
$this->assertSame( $expected, $instance->truncate( $text ) );
|
|
|
|
}
|
|
|
|
|
2023-05-20 12:13:35 +00:00
|
|
|
public static function provideTextsToTruncate() {
|
2021-01-08 08:03:05 +00:00
|
|
|
return [
|
|
|
|
[ '', [], '' ],
|
|
|
|
[ 'abc', [], 'abc' ],
|
|
|
|
[
|
|
|
|
'abc',
|
|
|
|
[ 'chars' => 1 ],
|
2021-01-08 08:05:35 +00:00
|
|
|
'abc'
|
|
|
|
],
|
|
|
|
[
|
|
|
|
'abc',
|
|
|
|
[ 'chars' => 1, 'plaintext' => false ],
|
|
|
|
'abc'
|
2021-01-08 08:03:05 +00:00
|
|
|
],
|
|
|
|
[
|
|
|
|
'abc',
|
|
|
|
[ 'sentences' => 1 ],
|
|
|
|
'abc'
|
|
|
|
],
|
|
|
|
[
|
|
|
|
'abc abc. xyz xyz.',
|
|
|
|
[ 'chars' => 1 ],
|
|
|
|
'abc(ellipsis)'
|
|
|
|
],
|
|
|
|
[
|
|
|
|
'abc abc. xyz xyz.',
|
|
|
|
[ 'sentences' => 1 ],
|
|
|
|
'abc abc.'
|
|
|
|
],
|
|
|
|
[
|
|
|
|
'abc abc. xyz xyz.',
|
|
|
|
[ 'chars' => 1000 ],
|
2021-01-08 08:05:35 +00:00
|
|
|
'abc abc. xyz xyz.'
|
|
|
|
],
|
|
|
|
[
|
|
|
|
'abc abc. xyz xyz.',
|
|
|
|
[ 'chars' => 1000, 'plaintext' => false ],
|
|
|
|
'abc abc. xyz xyz.'
|
2021-01-08 08:03:05 +00:00
|
|
|
],
|
|
|
|
[
|
|
|
|
'abc abc. xyz xyz.',
|
|
|
|
[ 'sentences' => 10 ],
|
|
|
|
'abc abc. xyz xyz.'
|
|
|
|
],
|
|
|
|
];
|
|
|
|
}
|
|
|
|
|
2019-02-11 16:32:20 +00:00
|
|
|
/**
|
|
|
|
* @dataProvider provideSectionsToFormat
|
|
|
|
*/
|
|
|
|
public function testDoSections( $text, $format, $expected ) {
|
|
|
|
/** @var ApiQueryExtracts $instance */
|
|
|
|
$instance = TestingAccessWrapper::newFromObject( $this->newInstance() );
|
|
|
|
$instance->params = [ 'sectionformat' => $format ];
|
|
|
|
|
|
|
|
$this->assertSame( $expected, $instance->doSections( $text ) );
|
|
|
|
}
|
|
|
|
|
2023-05-20 12:13:35 +00:00
|
|
|
public static function provideSectionsToFormat() {
|
2019-02-11 16:32:20 +00:00
|
|
|
$level = 3;
|
|
|
|
$marker = "\1\2$level\2\1";
|
|
|
|
|
|
|
|
return [
|
|
|
|
'Raw' => [
|
|
|
|
"$marker Headline\t\nNext line",
|
|
|
|
'raw',
|
|
|
|
"$marker Headline\t\nNext line",
|
|
|
|
],
|
|
|
|
'Wiki text' => [
|
|
|
|
"$marker Headline\t\nNext line",
|
|
|
|
'wiki',
|
|
|
|
"\n=== Headline ===\nNext line",
|
|
|
|
],
|
|
|
|
'Plain text' => [
|
|
|
|
"$marker Headline\t\nNext line",
|
|
|
|
'plain',
|
|
|
|
"\nHeadline\nNext line",
|
|
|
|
],
|
|
|
|
|
|
|
|
'Multiple matches' => [
|
2022-11-04 23:06:23 +00:00
|
|
|
"{$marker}First\n{$marker}Second",
|
2019-02-11 16:32:20 +00:00
|
|
|
'wiki',
|
|
|
|
"\n=== First ===\n\n=== Second ===",
|
|
|
|
],
|
|
|
|
];
|
|
|
|
}
|
2021-01-08 08:03:05 +00:00
|
|
|
|
2017-05-19 13:37:26 +00:00
|
|
|
}
|