ApiQueryExtracts: Replace custom parsing logic with ParserOutputAccess

Change-Id: I853617651867044cbe2624857ba08753cce332a5
This commit is contained in:
Bartosz Dziewoński 2024-06-20 22:14:00 +02:00
parent 7f9f4ff4a3
commit 44a3c538e1

View file

@ -3,7 +3,6 @@
namespace TextExtracts; namespace TextExtracts;
use ApiBase; use ApiBase;
use ApiMain;
use ApiQueryBase; use ApiQueryBase;
use ApiUsageException; use ApiUsageException;
use MediaWiki\Config\Config; use MediaWiki\Config\Config;
@ -12,7 +11,6 @@ use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Logger\LoggerFactory; use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MediaWikiServices; use MediaWiki\MediaWikiServices;
use MediaWiki\Page\WikiPageFactory; use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Request\FauxRequest;
use MediaWiki\Title\Title; use MediaWiki\Title\Title;
use ParserOptions; use ParserOptions;
use WANObjectCache; use WANObjectCache;
@ -252,74 +250,31 @@ class ApiQueryExtracts extends ApiQueryBase {
/** /**
* Returns page HTML * Returns page HTML
* @param WikiPage $page * @param WikiPage $page
* @return string|null * @return string
* @throws ApiUsageException * @throws ApiUsageException
*/ */
private function parse( WikiPage $page ) { private function parse( WikiPage $page ) {
$apiException = null; $parserOutputAccess = MediaWikiServices::getInstance()->getParserOutputAccess();
$parserOptions = ParserOptions::newFromAnon(); $status = $parserOutputAccess->getParserOutput(
$page->toPageRecord(),
// first try finding full page in parser cache ParserOptions::newFromAnon()
if ( $page->shouldCheckParserCache( $parserOptions, 0 ) ) { );
// TODO inject ParserCache if ( $status->isOK() ) {
$pout = MediaWikiServices::getInstance()->getParserCache()->get( $page, $parserOptions ); $pout = $status->getValue();
if ( $pout ) { $text = $pout->getText( [ 'unwrap' => true ] );
$text = $pout->getText( [ 'unwrap' => true ] ); if ( $this->params['intro'] ) {
if ( $this->params['intro'] ) { $text = $this->getFirstSection( $text, false );
$text = $this->getFirstSection( $text, false );
}
return $text;
} }
} return $text;
$request = [ } else {
'action' => 'parse',
'page' => $page->getTitle()->getPrefixedText(),
'prop' => 'text',
// Invokes special handling when using partial wikitext (T168743)
'sectionpreview' => 1,
'wrapoutputclass' => '',
];
if ( $this->params['intro'] ) {
$request['section'] = 0;
}
// in case of cache miss, render just the needed section
$api = new ApiMain( new FauxRequest( $request ) );
try {
$api->execute();
$data = $api->getResult()->getResultData( null, [
'BC' => [],
'Types' => [],
] );
} catch ( ApiUsageException $e ) {
$apiException = $e->__toString();
if ( $e->getStatusValue()->hasMessage( 'apierror-nosuchsection' ) ) {
// Looks like we tried to get the intro to a page without
// sections! Lets just grab what we can get.
unset( $request['section'] );
$api = new ApiMain( new FauxRequest( $request ) );
$api->execute();
$data = $api->getResult()->getResultData( null, [
'BC' => [],
'Types' => [],
] );
} else {
// Some other unexpected error - lets just report it to the user
// on the off chance that is the right thing.
throw $e;
}
}
if ( !array_key_exists( 'parse', $data ) ) {
LoggerFactory::getInstance( 'textextracts' )->warning( LoggerFactory::getInstance( 'textextracts' )->warning(
'API Parse request failed while generating text extract', [ 'Parse attempt failed while generating text extract', [
'title' => $page->getTitle()->getFullText(), 'title' => $page->getTitle()->getFullText(),
'url' => $this->getRequest()->getFullRequestURL(), 'url' => $this->getRequest()->getFullRequestURL(),
'exception' => $apiException, 'reason' => $status->getWikiText( false, false, 'en' )
'request' => $request
] ); ] );
return null; $this->dieStatus( $status );
} }
return $data['parse']['text']['*'];
} }
/** /**