Add extracts to REST search as description

Add the config $wgExtractsExtendRestSearch to enable adding the extracts
to REST search as description. It is disabled by default.

Change-Id: I4335f161856b3a4035333de6eb4f547745480d91
This commit is contained in:
alistair3149 2024-06-26 17:56:41 -04:00 committed by Alistair3149
parent c086438ee6
commit fe3982c204
2 changed files with 120 additions and 17 deletions

View file

@ -34,11 +34,15 @@
"TextExtracts\\": "includes/"
},
"Hooks": {
"ApiOpenSearchSuggest": "main"
"ApiOpenSearchSuggest": "main",
"SearchResultProvideDescription": "main"
},
"HookHandlers": {
"main": {
"class": "TextExtracts\\Hooks"
"class": "TextExtracts\\Hooks",
"services": [
"ConfigFactory"
]
}
},
"config": {
@ -63,6 +67,9 @@
},
"ExtractsExtendOpenSearchXml": {
"value": false
},
"ExtractsExtendRestSearch": {
"value": false
}
},
"manifest_version": 2

View file

@ -2,41 +2,110 @@
namespace TextExtracts;
use ApiBase;
use ApiMain;
use ApiResult;
use Generator;
use MediaWiki\Api\Hook\ApiOpenSearchSuggestHook;
use MediaWiki\MediaWikiServices;
use MediaWiki\Config\Config;
use MediaWiki\Config\ConfigFactory;
use MediaWiki\Request\FauxRequest;
use MediaWiki\Rest\Hook\SearchResultProvideDescriptionHook;
/**
* @license GPL-2.0-or-later
*/
class Hooks implements ApiOpenSearchSuggestHook {
class Hooks implements
ApiOpenSearchSuggestHook,
SearchResultProvideDescriptionHook
{
private Config $config;
public function __construct(
ConfigFactory $configFactory
) {
$this->config = $configFactory->makeConfig( 'textextracts' );
}
/**
* ApiOpenSearchSuggest hook handler
* @param array &$results Array of search results
* Trim an extract to a sensible length.
*
* Adapted from Extension:OpenSearchXml, which adapted it from
* Extension:ActiveAbstract.
*
* @param string $text
* @param int $length Target length; actual result will continue to the end of a sentence.
* @return string
*/
public function onApiOpenSearchSuggest( &$results ) {
$config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'textextracts' );
if ( !$config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) {
return;
private static function trimExtract( $text, $length ) {
static $regex = null;
if ( $regex === null ) {
$endchars = [
// regular ASCII
'([^\d])\.\s', '\!\s', '\?\s',
// full-width ideographic full-stop
'。',
// double-width roman forms
'', '', '',
// half-width ideographic full stop
'。',
];
$endgroup = implode( '|', $endchars );
$end = "(?:$endgroup)";
$sentence = ".{{$length},}?$end+";
$regex = "/^($sentence)/u";
}
$matches = [];
if ( preg_match( $regex, $text, $matches ) ) {
return trim( $matches[1] );
} else {
// Just return the first line
return trim( explode( "\n", $text )[0] );
}
}
foreach ( array_chunk( array_keys( $results ), ApiBase::LIMIT_SML1 ) as $pageIds ) {
/**
* Retrieves extracts data for the given page IDs from the TextExtract API.
* The page IDs are chunked into the max limit of exlimit of the TextExtract API
*
* @param array $pageIds An array of page IDs to retrieve extracts for
* @return Generator Yields the result data from the API request
* $data = [
* 'pageId' => [
* 'ns' => int of the namespace
* 'title' => string of the title of the page
* 'extract' => string of the text extracts of the page
* ]
* ]
*/
private function getExtractsData( array $pageIds ) {
foreach ( array_chunk( $pageIds, 20 ) as $chunkedPageIds ) {
$api = new ApiMain( new FauxRequest(
[
'action' => 'query',
'prop' => 'extracts',
'explaintext' => true,
'exintro' => true,
'exlimit' => count( $pageIds ),
'pageids' => implode( '|', $pageIds ),
] )
);
'exlimit' => count( $chunkedPageIds ),
'pageids' => implode( '|', $chunkedPageIds ),
]
) );
$api->execute();
$data = $api->getResult()->getResultData( [ 'query', 'pages' ] );
yield $api->getResult()->getResultData( [ 'query', 'pages' ] );
}
}
/**
* ApiOpenSearchSuggest hook handler
* @param array &$results Array of search results
*/
public function onApiOpenSearchSuggest( &$results ) {
if ( !$this->config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) {
return;
}
$pageIds = array_keys( $results );
foreach ( $this->getExtractsData( $pageIds ) as $data ) {
foreach ( $pageIds as $id ) {
$contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*';
if ( isset( $data[$id]['extract'][$contentKey] ) ) {
@ -46,4 +115,31 @@ class Hooks implements ApiOpenSearchSuggestHook {
}
}
}
/**
* Used to update Search Results with descriptions for Search Engine.
* @param array $pageIdentities Array (string=>SearchResultPageIdentity) where key is pageId
* @param array &$descriptions Output array (string=>string|null)
* where key is pageId and value is either a description for given page or null
*/
public function onSearchResultProvideDescription(
array $pageIdentities,
&$descriptions
): void {
if ( !$this->config->get( 'ExtractsExtendRestSearch' ) || $pageIdentities === [] ) {
return;
}
$pageIds = array_map( static function ( $identity ) {
return $identity->getId();
}, $pageIdentities );
foreach ( $this->getExtractsData( $pageIds ) as $data ) {
foreach ( $pageIds as $id ) {
$contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*';
if ( isset( $data[$id]['extract'][$contentKey] ) ) {
$descriptions[$id] = self::trimExtract( $data[$id]['extract'][$contentKey], 150 );
}
}
}
}
}