mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Description2
synced 2024-11-27 07:59:58 +00:00
Truncate descriptions to a certain length
With this change, generated descriptions are cut at 300 characters, without breaking words when possible, and with an ellipsis added in case the cut happened mid-sentence. The `Description2::getFirstChars` function was borrowed from TextExtracts with minor alterations (added comment for their regex, and removed `>` from word healing given it's unclear why it's been included). New configuration variable `$wgDescriptionMaxChars` (proposed default: 300, which seems like a sensible amount) controls this behaviour. This has been in testing on ark.wiki.gg (with the platform's approval, which I'm glad for) since early September. Without this change, we had a few pages with little sections having a huge part of their body text thrown into the `description` meta tag... Depends-On: I585f2c0046571310aad67f3ba148c4f22aaae49f Change-Id: I04b00f99085f07f773212ee3eca8470eece34e9e
This commit is contained in:
parent
bc54a5a120
commit
c146909532
|
@ -12,7 +12,8 @@
|
|||
"MediaWiki": ">= 1.35.0"
|
||||
},
|
||||
"config": {
|
||||
"EnableMetaDescriptionFunctions": false
|
||||
"EnableMetaDescriptionFunctions": false,
|
||||
"DescriptionMaxChars": 300
|
||||
},
|
||||
"ConfigRegistry": {
|
||||
"Description2": "GlobalVarConfig::newInstance"
|
||||
|
|
|
@ -48,4 +48,36 @@ class Description2 {
|
|||
self::setDescription( $parser, $desc );
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns no more than a requested number of characters, preserving words.
|
||||
*
|
||||
* Borrowed from TextExtracts.
|
||||
*
|
||||
* @param string $text Source plain text to extract from. HTML tags should be removed by the description provider.
|
||||
* @param int $requestedLength Maximum number of characters to return
|
||||
* @return string
|
||||
*/
|
||||
public static function getFirstChars( string $text, int $requestedLength ) {
|
||||
if ( $requestedLength <= 0 ) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$length = mb_strlen( $text );
|
||||
if ( $length <= $requestedLength ) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
// The following (although in somewhat backwards order) cuts the text at given length and restores the end if it
|
||||
// has been cut, with the ungreedy pattern always matching a single word built of word characters (no
|
||||
// punctuation) and/or forward slashes.
|
||||
$pattern = '/^[\w\/]*/su';
|
||||
preg_match( $pattern, mb_substr( $text, $requestedLength ), $m );
|
||||
$truncatedText = mb_substr( $text, 0, $requestedLength ) . $m[0];
|
||||
if ( $truncatedText === $text ) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
return trim( $truncatedText );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ interface DescriptionProvider {
|
|||
* Extracts description from the HTML representation of a page.
|
||||
*
|
||||
* @param string $text HTML to extract the description from.
|
||||
* @return ?string
|
||||
* @return ?string Plain text, or null if no description could have been generated
|
||||
*/
|
||||
public function derive( string $text ): ?string;
|
||||
}
|
||||
|
|
|
@ -31,6 +31,9 @@ class Hooks implements
|
|||
/** @var DescriptionProvider */
|
||||
private DescriptionProvider $descriptionProvider;
|
||||
|
||||
/** @var int */
|
||||
private int $maxChars;
|
||||
|
||||
/**
|
||||
* @param ConfigFactory $configFactory
|
||||
*/
|
||||
|
@ -40,6 +43,7 @@ class Hooks implements
|
|||
) {
|
||||
$this->config = $configFactory->makeConfig( 'Description2' );
|
||||
$this->descriptionProvider = $descriptionProvider;
|
||||
$this->maxChars = $this->config->get( 'DescriptionMaxChars' );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -69,11 +73,21 @@ class Hooks implements
|
|||
}
|
||||
|
||||
$desc = $this->descriptionProvider->derive( $text );
|
||||
|
||||
if ( $desc ) {
|
||||
Description2::setDescription( $parser, $desc );
|
||||
if ( !$desc ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ( $this->maxChars > 0 ) {
|
||||
$truncated = Description2::getFirstChars( $desc, $this->maxChars );
|
||||
if ( $truncated !== $desc ) {
|
||||
$desc = $truncated;
|
||||
if ( !preg_match( '/\p{P}$/u', $truncated ) ) {
|
||||
$desc = $truncated . wfMessage( 'ellipsis' )->text();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Description2::setDescription( $parser, $desc );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue