diff --git a/extension.json b/extension.json index fbe460e..e89cdbc 100644 --- a/extension.json +++ b/extension.json @@ -12,7 +12,8 @@ "MediaWiki": ">= 1.35.0" }, "config": { - "EnableMetaDescriptionFunctions": false + "EnableMetaDescriptionFunctions": false, + "DescriptionMaxChars": 300 }, "ConfigRegistry": { "Description2": "GlobalVarConfig::newInstance" diff --git a/includes/Description2.php b/includes/Description2.php index 62e1004..6794e5d 100644 --- a/includes/Description2.php +++ b/includes/Description2.php @@ -48,4 +48,36 @@ class Description2 { self::setDescription( $parser, $desc ); return ''; } + + /** + * Returns no more than a requested number of characters, preserving words. + * + * Borrowed from TextExtracts. + * + * @param string $text Source plain text to extract from. HTML tags should be removed by the description provider. + * @param int $requestedLength Maximum number of characters to return + * @return string + */ + public static function getFirstChars( string $text, int $requestedLength ) { + if ( $requestedLength <= 0 ) { + return ''; + } + + $length = mb_strlen( $text ); + if ( $length <= $requestedLength ) { + return $text; + } + + // The following (although in somewhat backwards order) cuts the text at given length and restores the end if it + // has been cut, with the ungreedy pattern always matching a single word built of word characters (no + // punctuation) and/or forward slashes. + $pattern = '/^[\w\/]*/su'; + preg_match( $pattern, mb_substr( $text, $requestedLength ), $m ); + $truncatedText = mb_substr( $text, 0, $requestedLength ) . $m[0]; + if ( $truncatedText === $text ) { + return $text; + } + + return trim( $truncatedText ); + } } diff --git a/includes/DescriptionProvider.php b/includes/DescriptionProvider.php index 2ed2ff2..b82ca62 100644 --- a/includes/DescriptionProvider.php +++ b/includes/DescriptionProvider.php @@ -7,7 +7,7 @@ interface DescriptionProvider { * Extracts description from the HTML representation of a page. * * @param string $text HTML to extract the description from. - * @return ?string + * @return ?string Plain text, or null if no description could have been generated */ public function derive( string $text ): ?string; } diff --git a/includes/Hooks.php b/includes/Hooks.php index 477c0a6..25b7f6b 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -31,6 +31,9 @@ class Hooks implements /** @var DescriptionProvider */ private DescriptionProvider $descriptionProvider; + /** @var int */ + private int $maxChars; + /** * @param ConfigFactory $configFactory */ @@ -40,6 +43,7 @@ class Hooks implements ) { $this->config = $configFactory->makeConfig( 'Description2' ); $this->descriptionProvider = $descriptionProvider; + $this->maxChars = $this->config->get( 'DescriptionMaxChars' ); } /** @@ -69,11 +73,21 @@ class Hooks implements } $desc = $this->descriptionProvider->derive( $text ); - - if ( $desc ) { - Description2::setDescription( $parser, $desc ); + if ( !$desc ) { + return true; } + if ( $this->maxChars > 0 ) { + $truncated = Description2::getFirstChars( $desc, $this->maxChars ); + if ( $truncated !== $desc ) { + $desc = $truncated; + if ( !preg_match( '/\p{P}$/u', $truncated ) ) { + $desc = $truncated . wfMessage( 'ellipsis' )->text(); + } + } + } + + Description2::setDescription( $parser, $desc ); return true; }