diff --git a/extension.json b/extension.json index abfb0ab..3f3e55c 100644 --- a/extension.json +++ b/extension.json @@ -35,11 +35,15 @@ "TextExtracts\\": "includes/" }, "Hooks": { - "ApiOpenSearchSuggest": "main" + "ApiOpenSearchSuggest": "main", + "SearchResultProvideDescription": "main" }, "HookHandlers": { "main": { - "class": "TextExtracts\\Hooks" + "class": "TextExtracts\\Hooks", + "services": [ + "ConfigFactory" + ] } }, "config": { @@ -64,6 +68,9 @@ }, "ExtractsExtendOpenSearchXml": { "value": false + }, + "ExtractsExtendRestSearch": { + "value": false } }, "manifest_version": 2 diff --git a/includes/Hooks.php b/includes/Hooks.php index e55b119..f181e59 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -2,41 +2,110 @@ namespace TextExtracts; -use ApiBase; use ApiMain; use ApiResult; +use Generator; use MediaWiki\Api\Hook\ApiOpenSearchSuggestHook; -use MediaWiki\MediaWikiServices; +use MediaWiki\Config\Config; +use MediaWiki\Config\ConfigFactory; use MediaWiki\Request\FauxRequest; +use MediaWiki\Rest\Hook\SearchResultProvideDescriptionHook; /** * @license GPL-2.0-or-later */ -class Hooks implements ApiOpenSearchSuggestHook { +class Hooks implements + ApiOpenSearchSuggestHook, + SearchResultProvideDescriptionHook +{ + + private Config $config; + + public function __construct( + ConfigFactory $configFactory + ) { + $this->config = $configFactory->makeConfig( 'textextracts' ); + } /** - * ApiOpenSearchSuggest hook handler - * @param array &$results Array of search results + * Trim an extract to a sensible length. + * + * Adapted from Extension:OpenSearchXml, which adapted it from + * Extension:ActiveAbstract. + * + * @param string $text + * @param int $length Target length; actual result will continue to the end of a sentence. + * @return string */ - public function onApiOpenSearchSuggest( &$results ) { - $config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'textextracts' ); - if ( !$config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) { - return; + private static function trimExtract( $text, $length ) { + static $regex = null; + if ( $regex === null ) { + $endchars = [ + // regular ASCII + '([^\d])\.\s', '\!\s', '\?\s', + // full-width ideographic full-stop + '。', + // double-width roman forms + '.', '!', '?', + // half-width ideographic full stop + '。', + ]; + $endgroup = implode( '|', $endchars ); + $end = "(?:$endgroup)"; + $sentence = ".{{$length},}?$end+"; + $regex = "/^($sentence)/u"; } + $matches = []; + if ( preg_match( $regex, $text, $matches ) ) { + return trim( $matches[1] ); + } else { + // Just return the first line + return trim( explode( "\n", $text )[0] ); + } + } - foreach ( array_chunk( array_keys( $results ), ApiBase::LIMIT_SML1 ) as $pageIds ) { + /** + * Retrieves extracts data for the given page IDs from the TextExtract API. + * The page IDs are chunked into the max limit of exlimit of the TextExtract API + * + * @param array $pageIds An array of page IDs to retrieve extracts for + * @return Generator Yields the result data from the API request + * $data = [ + * 'pageId' => [ + * 'ns' => int of the namespace + * 'title' => string of the title of the page + * 'extract' => string of the text extracts of the page + * ] + * ] + */ + private function getExtractsData( array $pageIds ) { + foreach ( array_chunk( $pageIds, 20 ) as $chunkedPageIds ) { $api = new ApiMain( new FauxRequest( [ 'action' => 'query', 'prop' => 'extracts', 'explaintext' => true, 'exintro' => true, - 'exlimit' => count( $pageIds ), - 'pageids' => implode( '|', $pageIds ), - ] ) - ); + 'exlimit' => count( $chunkedPageIds ), + 'pageids' => implode( '|', $chunkedPageIds ), + ] + ) ); $api->execute(); - $data = $api->getResult()->getResultData( [ 'query', 'pages' ] ); + yield $api->getResult()->getResultData( [ 'query', 'pages' ] ); + } + } + + /** + * ApiOpenSearchSuggest hook handler + * @param array &$results Array of search results + */ + public function onApiOpenSearchSuggest( &$results ) { + if ( !$this->config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) { + return; + } + + $pageIds = array_keys( $results ); + foreach ( $this->getExtractsData( $pageIds ) as $data ) { foreach ( $pageIds as $id ) { $contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*'; if ( isset( $data[$id]['extract'][$contentKey] ) ) { @@ -46,4 +115,31 @@ class Hooks implements ApiOpenSearchSuggestHook { } } } + + /** + * Used to update Search Results with descriptions for Search Engine. + * @param array $pageIdentities Array (string=>SearchResultPageIdentity) where key is pageId + * @param array &$descriptions Output array (string=>string|null) + * where key is pageId and value is either a description for given page or null + */ + public function onSearchResultProvideDescription( + array $pageIdentities, + &$descriptions + ): void { + if ( !$this->config->get( 'ExtractsExtendRestSearch' ) || $pageIdentities === [] ) { + return; + } + + $pageIds = array_map( static function ( $identity ) { + return $identity->getId(); + }, $pageIdentities ); + foreach ( $this->getExtractsData( $pageIds ) as $data ) { + foreach ( $pageIds as $id ) { + $contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*'; + if ( isset( $data[$id]['extract'][$contentKey] ) ) { + $descriptions[$id] = self::trimExtract( $data[$id]['extract'][$contentKey], 150 ); + } + } + } + } }