ApiQueryExtracts: Replace custom parsing logic with ParserOutputAccess

Change-Id: I853617651867044cbe2624857ba08753cce332a5
2024-11-23 15:56:52 +00:00 · 2024-06-20 22:14:00 +02:00 · 2024-06-20 22:14:00 +02:00 · 44a3c538e1
parent 7f9f4ff4a3
commit 44a3c538e1
1 changed files with 16 additions and 61 deletions
--- a/includes/ApiQueryExtracts.php
+++ b/includes/ApiQueryExtracts.php
@ -3,7 +3,6 @@
 namespace TextExtracts;
 use ApiBase;
 use ApiMain;
 use ApiQueryBase;
 use ApiUsageException;
 use MediaWiki\Config\Config;
@ -12,7 +11,6 @@ use MediaWiki\Languages\LanguageConverterFactory;
 use MediaWiki\Logger\LoggerFactory;
 use MediaWiki\MediaWikiServices;
 use MediaWiki\Page\WikiPageFactory;
 use MediaWiki\Request\FauxRequest;
 use MediaWiki\Title\Title;
 use ParserOptions;
 use WANObjectCache;
@ -252,74 +250,31 @@ class ApiQueryExtracts extends ApiQueryBase {
 	/**
 	 * Returns page HTML
 	 * @param WikiPage $page
-	 * @return string|null
+	 * @return string
 	 * @throws ApiUsageException
 	 */
 	private function parse( WikiPage $page ) {
-		$apiException = null;
+		$parserOutputAccess = MediaWikiServices::getInstance()->getParserOutputAccess();
-		$parserOptions = ParserOptions::newFromAnon();
+		$status = $parserOutputAccess->getParserOutput(
-
+			$page->toPageRecord(),
-		// first try finding full page in parser cache
+			ParserOptions::newFromAnon()
-		if ( $page->shouldCheckParserCache( $parserOptions, 0 ) ) {
+		);
-			// TODO inject ParserCache
+		if ( $status->isOK() ) {
-			$pout = MediaWikiServices::getInstance()->getParserCache()->get( $page, $parserOptions );
+			$pout = $status->getValue();
-			if ( $pout ) {
+			$text = $pout->getText( [ 'unwrap' => true ] );
-				$text = $pout->getText( [ 'unwrap' => true ] );
+			if ( $this->params['intro'] ) {
-				if ( $this->params['intro'] ) {
+				$text = $this->getFirstSection( $text, false );
 					$text = $this->getFirstSection( $text, false );
 				}
 				return $text;
 			}
-		}
+			return $text;
-		$request = [
+		} else {
 			'action' => 'parse',
 			'page' => $page->getTitle()->getPrefixedText(),
 			'prop' => 'text',
 			// Invokes special handling when using partial wikitext (T168743)
 			'sectionpreview' => 1,
 			'wrapoutputclass' => '',
 		];
 		if ( $this->params['intro'] ) {
 			$request['section'] = 0;
 		}
 		// in case of cache miss, render just the needed section
 		$api = new ApiMain( new FauxRequest( $request ) );
 		try {
 			$api->execute();
 			$data = $api->getResult()->getResultData( null, [
 				'BC' => [],
 				'Types' => [],
 			] );
 		} catch ( ApiUsageException $e ) {
 			$apiException = $e->__toString();
 			if ( $e->getStatusValue()->hasMessage( 'apierror-nosuchsection' ) ) {
 				// Looks like we tried to get the intro to a page without
 				// sections!  Lets just grab what we can get.
 				unset( $request['section'] );
 				$api = new ApiMain( new FauxRequest( $request ) );
 				$api->execute();
 				$data = $api->getResult()->getResultData( null, [
 					'BC' => [],
 					'Types' => [],
 				] );
 			} else {
 				// Some other unexpected error - lets just report it to the user
 				// on the off chance that is the right thing.
 				throw $e;
 			}
 		}
 		if ( !array_key_exists( 'parse', $data ) ) {
 			LoggerFactory::getInstance( 'textextracts' )->warning(
-				'API Parse request failed while generating text extract', [
+				'Parse attempt failed while generating text extract', [
 					'title' => $page->getTitle()->getFullText(),
 					'url' => $this->getRequest()->getFullRequestURL(),
-					'exception' => $apiException,
+					'reason' => $status->getWikiText( false, false, 'en' )
 					'request' => $request
 				] );
-			return null;
+			$this->dieStatus( $status );
 		}
 		return $data['parse']['text']['*'];
 	}
 	/**