mediawiki-extensions-Descri.../includes/Hooks.php
alex4401 c146909532
Truncate descriptions to a certain length
With this change, generated descriptions are cut at 300 characters, without breaking words when possible, and with an ellipsis added in case the cut happened mid-sentence.

The `Description2::getFirstChars` function was borrowed from TextExtracts with minor alterations (added comment for their regex, and removed `>` from word healing given it's unclear why it's been included).

New configuration variable `$wgDescriptionMaxChars` (proposed default: 300, which seems like a sensible amount) controls this behaviour.

This has been in testing on ark.wiki.gg (with the platform's approval, which I'm glad for) since early September. Without this change, we had a few pages with little sections having a huge part of their body text thrown into the `description` meta tag...

Depends-On: I585f2c0046571310aad67f3ba148c4f22aaae49f
Change-Id: I04b00f99085f07f773212ee3eca8470eece34e9e
2024-02-24 08:44:53 +01:00

131 lines
3.4 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace MediaWiki\Extension\Description2;
use Config;
use ConfigFactory;
use OutputPage;
use Parser;
use ParserOutput;
/**
* Description2 Adds meaningful description <meta> tag to MW pages and into the parser output
*
* @file
* @ingroup Extensions
* @author Daniel Friesen (http://danf.ca/mw/)
* @copyright Copyright 2010 Daniel Friesen
* @license GPL-2.0-or-later
* @link https://www.mediawiki.org/wiki/Extension:Description2 Documentation
*/
class Hooks implements
\MediaWiki\Hook\ParserAfterTidyHook,
\MediaWiki\Hook\ParserFirstCallInitHook,
\MediaWiki\Hook\OutputPageParserOutputHook
{
/** @var Config */
private Config $config;
/** @var DescriptionProvider */
private DescriptionProvider $descriptionProvider;
/** @var int */
private int $maxChars;
/**
* @param ConfigFactory $configFactory
*/
public function __construct(
ConfigFactory $configFactory,
DescriptionProvider $descriptionProvider
) {
$this->config = $configFactory->makeConfig( 'Description2' );
$this->descriptionProvider = $descriptionProvider;
$this->maxChars = $this->config->get( 'DescriptionMaxChars' );
}
/**
* @link https://www.mediawiki.org/wiki/Manual:Hooks/ParserAfterTidy
* @param Parser $parser The parser.
* @param string &$text The page text.
* @return bool
*/
public function onParserAfterTidy( $parser, &$text ) {
$parserOutput = $parser->getOutput();
// Avoid running the algorithm on interface messages which may waste time
if ( $parser->getOptions()->getInterfaceMessage() ) {
return true;
}
// Avoid running the algorithm multiple times if we already have determined the description. This may happen
// on file pages.
if ( method_exists( $parserOutput, 'getPageProperty' ) ) {
// MW 1.38+
$description = $parserOutput->getPageProperty( 'description' );
} else {
$description = $parserOutput->getProperty( 'description' );
}
if ( $description ) {
return true;
}
$desc = $this->descriptionProvider->derive( $text );
if ( !$desc ) {
return true;
}
if ( $this->maxChars > 0 ) {
$truncated = Description2::getFirstChars( $desc, $this->maxChars );
if ( $truncated !== $desc ) {
$desc = $truncated;
if ( !preg_match( '/\p{P}$/u', $truncated ) ) {
$desc = $truncated . wfMessage( 'ellipsis' )->text();
}
}
}
Description2::setDescription( $parser, $desc );
return true;
}
/**
* @param Parser $parser The parser.
* @return bool
*/
public function onParserFirstCallInit( $parser ) {
if ( !$this->config->get( 'EnableMetaDescriptionFunctions' ) ) {
// Functions and tags are disabled
return true;
}
$parser->setFunctionHook(
'description2',
[ Description2::class, 'parserFunctionCallback' ],
Parser::SFH_OBJECT_ARGS
);
return true;
}
/**
* @param OutputPage $out The output page to add the meta element to.
* @param ParserOutput $parserOutput The parser output to get the description from.
*/
public function onOutputPageParserOutput( $out, $parserOutput ): void {
// Export the description from the main parser output into the OutputPage
if ( method_exists( $parserOutput, 'getPageProperty' ) ) {
// MW 1.38+
$description = $parserOutput->getPageProperty( 'description' );
} else {
$description = $parserOutput->getProperty( 'description' );
if ( $description === false ) {
$description = null;
}
}
if ( $description !== null ) {
$out->addMeta( 'description', $description );
}
}
}