mediawiki-extensions-PageIm.../tests/phpunit/Hooks/ParserFileProcessingHookHandlersTest.php
Tim Starling 768464d0f9 Identify lead images using a new parser hook instead of during LinksUpdate
I added ParserModifyImageHTML to core to make it easier for PageImages
to identify images in the lead section.

This also allows PageImages to stop writing to LinksUpdate properties.
It is one of only two extensions that do that.

Depends-On: I24528381e8d24ca8d138bceadb9397c83fd31356
Bug: T176520
Bug: T296895
Change-Id: I472f4a023969bfde6298eb56112c16d2ae842199
2022-01-04 14:19:32 +11:00

358 lines
10 KiB
PHP

<?php
namespace PageImages\Tests\Hooks;
use File;
use MediaWikiIntegrationTestCase;
use PageImages\Hooks\ParserFileProcessingHookHandlers;
use PageImages\PageImageCandidate;
use PageImages\PageImages;
use Parser;
use ParserOptions;
use RepoGroup;
use Title;
use Wikimedia\TestingAccessWrapper;
/**
* @covers \PageImages\Hooks\ParserFileProcessingHookHandlers
*
* @group PageImages
*
* @license WTFPL
* @author Thiemo Kreuz
*/
class ParserFileProcessingHookHandlersTest extends MediaWikiIntegrationTestCase {
public function setUp(): void {
parent::setUp();
// Force LinksUpdateHookHandler::getPageImageCandidates to look at all
// sections.
$this->setMwGlobals( 'wgPageImagesLeadSectionOnly', false );
}
/**
* @param array[] $images
*
* @return Parser
*/
private function getParser( array $images ) {
$parser = $this->getServiceContainer()->getParser();
$title = Title::newFromText( 'test' );
$options = ParserOptions::newFromAnon();
$parser->startExternalParse( $title, $options, Parser::OT_HTML );
$parser->getOutput()->setExtensionData( 'pageImages', $images );
return $parser;
}
private function getHtml( $indexes, $nonLeadIndex = INF ) {
$html = '';
$doneSectionBreak = false;
foreach ( $indexes as $index ) {
if ( $index >= $nonLeadIndex && !$doneSectionBreak ) {
$html .= '<mw:editsection page="Test" section="1"/>';
$doneSectionBreak = true;
}
$html .= "<!--MW-PAGEIMAGES-CANDIDATE-$index-->";
}
return $html;
}
/**
* Required to make RepoGroup::findFile in ParserFileProcessingHookHandlers::getScore return something.
* @return RepoGroup
*/
private function getRepoGroup() {
$file = $this->getMockBuilder( File::class )
->disableOriginalConstructor()
->getMock();
// ugly hack to avoid all the unmockable crap in FormatMetadata
$file->method( 'isDeleted' )
->willReturn( true );
$repoGroup = $this->getMockBuilder( RepoGroup::class )
->disableOriginalConstructor()
->getMock();
$repoGroup->method( 'findFile' )
->willReturn( $file );
return $repoGroup;
}
private function getHandler( $images ) {
return new class ( $images ) extends ParserFileProcessingHookHandlers {
private $images;
private $isFreeMap;
public function __construct( $images ) {
$this->images = $images;
foreach ( $images as $image ) {
$this->isFreeMap[$image['filename']] = $image['isFree'];
}
}
protected function isImageFree( $fileName ) {
return $this->isFreeMap[$fileName] ?? false;
}
protected function getScore( PageImageCandidate $image, $position ) {
return $this->images[$position]['score'];
}
};
}
/**
* @dataProvider provideDoParserAfterTidy
* @covers \PageImages\Hooks\ParserFileProcessingHookHandlers::doParserAfterTidy
*/
public function testDoParserAfterTidy(
array $images,
$expectedFreeFileName,
$expectedNonFreeFileName
) {
$parser = $this->getParser( $images );
$html = $this->getHtml( array_keys( $images ) );
$handler = $this->getHandler( $images );
$handler->doParserAfterTidy( $parser, $html );
$properties = $parser->getOutput()->getPageProperties();
if ( $expectedFreeFileName === null ) {
$this->assertArrayNotHasKey( PageImages::PROP_NAME_FREE, $properties );
} else {
$this->assertSame( $expectedFreeFileName,
$properties[PageImages::PROP_NAME_FREE] );
}
if ( $expectedNonFreeFileName === null ) {
$this->assertArrayNotHasKey( PageImages::PROP_NAME, $properties );
} else {
$this->assertSame( $expectedNonFreeFileName, $properties[PageImages::PROP_NAME] );
}
}
public function provideDoParserAfterTidy() {
return [
// both images are non-free
[
[
[ 'filename' => 'A.jpg', 'score' => 100, 'isFree' => false ],
[ 'filename' => 'B.jpg', 'score' => 90, 'isFree' => false ],
],
null,
'A.jpg'
],
// both images are free
[
[
[ 'filename' => 'A.jpg', 'score' => 100, 'isFree' => true ],
[ 'filename' => 'B.jpg', 'score' => 90, 'isFree' => true ],
],
'A.jpg',
null
],
// one free (with a higher score), one non-free image
[
[
[ 'filename' => 'A.jpg', 'score' => 100, 'isFree' => true ],
[ 'filename' => 'B.jpg', 'score' => 90, 'isFree' => false ],
],
'A.jpg',
null
],
// one non-free (with a higher score), one free image
[
[
[ 'filename' => 'A.jpg', 'score' => 100, 'isFree' => false ],
[ 'filename' => 'B.jpg', 'score' => 90, 'isFree' => true ],
],
'B.jpg',
'A.jpg'
]
];
}
/**
* @dataProvider provideDoParserAfterTidy_lead
* @covers \PageImages\Hooks\ParserFileProcessingHookHandlers::doParserAfterTidy
*/
public function testDoParserAfterTidy_lead( $leadOnly ) {
$this->setMwGlobals( 'wgPageImagesLeadSectionOnly', $leadOnly );
$candidates = [
[ 'filename' => 'A.jpg', 'score' => 100, 'isFree' => false ],
[ 'filename' => 'B.jpg', 'score' => 90, 'isFree' => true ],
];
$parser = $this->getParser( $candidates );
$html = $this->getHtml( array_keys( $candidates ), 1 );
$handler = $this->getHandler( $candidates );
$handler->doParserAfterTidy( $parser, $html );
if ( $leadOnly ) {
$this->assertFalse(
$parser->getOutput()->getPageProperty( PageImages::PROP_NAME_FREE ),
'Only lead images are returned.' );
} else {
$this->assertIsString(
$parser->getOutput()->getPageProperty( PageImages::PROP_NAME_FREE ),
'All images are returned'
);
}
}
public static function provideDoParserAfterTidy_lead() {
return [
[ false ],
[ true ]
];
}
/**
* @dataProvider provideGetScore
*/
public function testGetScore( $image, $scoreFromTable, $position, $expected ) {
$mock = TestingAccessWrapper::newFromObject(
$this->getMockBuilder( ParserFileProcessingHookHandlers::class )
->onlyMethods( [ 'scoreFromTable', 'fetchFileMetadata', 'getRatio', 'getDenylist' ] )
->getMock()
);
$mock->method( 'scoreFromTable' )
->willReturn( $scoreFromTable );
$mock->method( 'getRatio' )
->willReturn( 0 );
$mock->method( 'getDenylist' )
->willReturn( [ 'denylisted.jpg' => 1 ] );
$score = $mock->getScore( PageImageCandidate::newFromArray( $image ), $position );
$this->assertSame( $expected, $score );
}
public function provideGetScore() {
return [
[
[ 'filename' => 'A.jpg', 'handler' => [ 'width' => 100 ] ],
100,
0,
// width score + ratio score + position score
100 + 100 + 8
],
[
[ 'filename' => 'A.jpg', 'fullwidth' => 100 ],
50,
1,
// width score + ratio score + position score
106
],
[
[ 'filename' => 'A.jpg', 'fullwidth' => 100 ],
50,
2,
// width score + ratio score + position score
104
],
[
[ 'filename' => 'A.jpg', 'fullwidth' => 100 ],
50,
3,
// width score + ratio score + position score
103
],
[
[ 'filename' => 'denylisted.jpg', 'fullwidth' => 100 ],
50,
3,
// denylist score
- 1000
],
];
}
/**
* @dataProvider provideScoreFromTable
* @covers \PageImages\Hooks\ParserFileProcessingHookHandlers::scoreFromTable
*/
public function testScoreFromTable( array $scores, $value, $expected ) {
/** @var ParserFileProcessingHookHandlers $handlerWrapper */
$handlerWrapper = TestingAccessWrapper::newFromObject( new ParserFileProcessingHookHandlers );
$score = $handlerWrapper->scoreFromTable( $value, $scores );
$this->assertEquals( $expected, $score );
}
public function provideScoreFromTable() {
global $wgPageImagesScores;
return [
'no match' => [ [], 100, 0 ],
'float' => [ [ 0.5 ], 0, 0.5 ],
'always min when below range' => [ [ 200 => 2, 800 => 1 ], 0, 2 ],
'always max when above range' => [ [ 200 => 2, 800 => 1 ], 1000, 1 ],
'always min when below range (reversed)' => [ [ 800 => 1, 200 => 2 ], 0, 2 ],
'always max when above range (reversed)' => [ [ 800 => 1, 200 => 2 ], 1000, 1 ],
'min match' => [ [ 200 => 2, 400 => 3, 800 => 1 ], 200, 2 ],
'above min' => [ [ 200 => 2, 400 => 3, 800 => 1 ], 201, 3 ],
'second last match' => [ [ 200 => 2, 400 => 3, 800 => 1 ], 400, 3 ],
'above second last' => [ [ 200 => 2, 400 => 3, 800 => 1 ], 401, 1 ],
// These test cases use the default values from extension.json
[ $wgPageImagesScores['width'], 100, -100 ],
[ $wgPageImagesScores['width'], 119, -100 ],
[ $wgPageImagesScores['width'], 300, 10 ],
[ $wgPageImagesScores['width'], 400, 10 ],
[ $wgPageImagesScores['width'], 500, 5 ],
[ $wgPageImagesScores['width'], 600, 5 ],
[ $wgPageImagesScores['width'], 601, 0 ],
[ $wgPageImagesScores['width'], 999, 0 ],
[ $wgPageImagesScores['galleryImageWidth'], 99, -100 ],
[ $wgPageImagesScores['galleryImageWidth'], 100, 0 ],
[ $wgPageImagesScores['galleryImageWidth'], 500, 0 ],
[ $wgPageImagesScores['ratio'], 1, -100 ],
[ $wgPageImagesScores['ratio'], 3, -100 ],
[ $wgPageImagesScores['ratio'], 4, 0 ],
[ $wgPageImagesScores['ratio'], 5, 0 ],
[ $wgPageImagesScores['ratio'], 10, 5 ],
[ $wgPageImagesScores['ratio'], 20, 5 ],
[ $wgPageImagesScores['ratio'], 25, 0 ],
[ $wgPageImagesScores['ratio'], 30, 0 ],
[ $wgPageImagesScores['ratio'], 31, -100 ],
[ $wgPageImagesScores['ratio'], 40, -100 ],
'T212013' => [ $wgPageImagesScores['width'], 0, -100 ],
];
}
/**
* @dataProvider provideIsFreeImage
* @covers \PageImages\Hooks\ParserFileProcessingHookHandlers::isImageFree
*/
public function testIsFreeImage( $fileName, $metadata, $expected ) {
$this->overrideMwServices( null, [
'RepoGroup' => function () {
return $this->getRepoGroup();
}
] );
$mock = TestingAccessWrapper::newFromObject(
$this->getMockBuilder( ParserFileProcessingHookHandlers::class )
->onlyMethods( [ 'fetchFileMetadata' ] )
->getMock()
);
$mock->method( 'fetchFileMetadata' )
->willReturn( $metadata );
/** @var ParserFileProcessingHookHandlers $mock */
$this->assertSame( $expected, $mock->isImageFree( $fileName ) );
}
public function provideIsFreeImage() {
return [
[ 'A.jpg', [], true ],
[ 'A.jpg', [ 'NonFree' => [ 'value' => '0' ] ], true ],
[ 'A.jpg', [ 'NonFree' => [ 'value' => 0 ] ], true ],
[ 'A.jpg', [ 'NonFree' => [ 'value' => false ] ], true ],
[ 'A.jpg', [ 'NonFree' => [ 'value' => 'something' ] ], false ],
[ 'A.jpg', [ 'something' => [ 'value' => 'something' ] ], true ],
];
}
}