Extract description algorithm into a new class
Separating the extract algorithm from integration code. This results in a slightly cleaner code structure (at least in my opinion) and enables adding alternate algorithms without devolving into spaghetti. The DescriptionProvider (name of the new base interface) is exposed as a service through dependency injection to avoid factories. The implementation can be swapped at service instantiation time. Depends-On: I73c61ce045dcf31ac1ca5888f1548de8fd8b56ff Change-Id: I97fd065c9554837747021ba9fff26005e33270f4
This commit is contained in:
parent
43eb47183e
commit
b73fe26c29
|
@ -17,6 +17,9 @@
|
||||||
"ConfigRegistry": {
|
"ConfigRegistry": {
|
||||||
"Description2": "GlobalVarConfig::newInstance"
|
"Description2": "GlobalVarConfig::newInstance"
|
||||||
},
|
},
|
||||||
|
"ServiceWiringFiles": [
|
||||||
|
"includes/ServiceWiring.php"
|
||||||
|
],
|
||||||
"AutoloadNamespaces": {
|
"AutoloadNamespaces": {
|
||||||
"MediaWiki\\Extension\\Description2\\": "includes/"
|
"MediaWiki\\Extension\\Description2\\": "includes/"
|
||||||
},
|
},
|
||||||
|
@ -32,7 +35,8 @@
|
||||||
"Description2": {
|
"Description2": {
|
||||||
"class": "MediaWiki\\Extension\\Description2\\Hooks",
|
"class": "MediaWiki\\Extension\\Description2\\Hooks",
|
||||||
"services": [
|
"services": [
|
||||||
"ConfigFactory"
|
"ConfigFactory",
|
||||||
|
"Description2.DescriptionProvider"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
@ -17,7 +17,6 @@ use PPFrame;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class Description2 {
|
class Description2 {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param Parser $parser The parser.
|
* @param Parser $parser The parser.
|
||||||
* @param string $desc The description text.
|
* @param string $desc The description text.
|
||||||
|
|
13
includes/DescriptionProvider.php
Normal file
13
includes/DescriptionProvider.php
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace MediaWiki\Extension\Description2;
|
||||||
|
|
||||||
|
interface DescriptionProvider {
|
||||||
|
/**
|
||||||
|
* Extracts description from the HTML representation of a page.
|
||||||
|
*
|
||||||
|
* @param string $text HTML to extract the description from.
|
||||||
|
* @return ?string
|
||||||
|
*/
|
||||||
|
public function derive( string $text ): ?string;
|
||||||
|
}
|
|
@ -25,15 +25,21 @@ class Hooks implements
|
||||||
\MediaWiki\Hook\OutputPageParserOutputHook
|
\MediaWiki\Hook\OutputPageParserOutputHook
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/** @var Config */
|
||||||
private Config $config;
|
private Config $config;
|
||||||
|
|
||||||
|
/** @var DescriptionProvider */
|
||||||
|
private DescriptionProvider $descriptionProvider;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param ConfigFactory $configFactory
|
* @param ConfigFactory $configFactory
|
||||||
*/
|
*/
|
||||||
public function __construct(
|
public function __construct(
|
||||||
ConfigFactory $configFactory
|
ConfigFactory $configFactory,
|
||||||
|
DescriptionProvider $descriptionProvider
|
||||||
) {
|
) {
|
||||||
$this->config = $configFactory->makeConfig( 'Description2' );
|
$this->config = $configFactory->makeConfig( 'Description2' );
|
||||||
|
$this->descriptionProvider = $descriptionProvider;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -43,22 +49,7 @@ class Hooks implements
|
||||||
* @return bool
|
* @return bool
|
||||||
*/
|
*/
|
||||||
public function onParserAfterTidy( $parser, &$text ) {
|
public function onParserAfterTidy( $parser, &$text ) {
|
||||||
$desc = '';
|
$desc = $this->descriptionProvider->derive( $text );
|
||||||
|
|
||||||
$pattern = '%<table\b[^>]*+>(?:(?R)|[^<]*+(?:(?!</?table\b)<[^<]*+)*+)*+</table>%i';
|
|
||||||
$myText = preg_replace( $pattern, '', $text );
|
|
||||||
|
|
||||||
$paragraphs = [];
|
|
||||||
if ( preg_match_all( '#<p>.*?</p>#is', $myText, $paragraphs ) ) {
|
|
||||||
foreach ( $paragraphs[0] as $paragraph ) {
|
|
||||||
$paragraph = trim( strip_tags( $paragraph ) );
|
|
||||||
if ( !$paragraph ) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$desc = $paragraph;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( $desc ) {
|
if ( $desc ) {
|
||||||
Description2::setDescription( $parser, $desc );
|
Description2::setDescription( $parser, $desc );
|
||||||
|
|
13
includes/ServiceWiring.php
Normal file
13
includes/ServiceWiring.php
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
use MediaWiki\Extension\Description2\DescriptionProvider;
|
||||||
|
use MediaWiki\Extension\Description2\SimpleDescriptionProvider;
|
||||||
|
use MediaWiki\MediaWikiServices;
|
||||||
|
|
||||||
|
return [
|
||||||
|
'Description2.DescriptionProvider' => static function (
|
||||||
|
MediaWikiServices $services
|
||||||
|
): DescriptionProvider {
|
||||||
|
return new SimpleDescriptionProvider();
|
||||||
|
},
|
||||||
|
];
|
36
includes/SimpleDescriptionProvider.php
Normal file
36
includes/SimpleDescriptionProvider.php
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace MediaWiki\Extension\Description2;
|
||||||
|
|
||||||
|
class SimpleDescriptionProvider implements DescriptionProvider {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts description from the HTML representation of a page.
|
||||||
|
*
|
||||||
|
* The algorithm:
|
||||||
|
* 1. Removes all <table> elements and their contents.
|
||||||
|
* 2. Selects all <p> elements.
|
||||||
|
* 3. Iterates over those paragraphs, strips out all HTML tags and trims white-space around.
|
||||||
|
* 4. Then the first non-empty paragraph is picked as the result.
|
||||||
|
*
|
||||||
|
* @param string $text
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public function derive( string $text ): ?string {
|
||||||
|
$pattern = '%<table\b[^>]*+>(?:(?R)|[^<]*+(?:(?!</?table\b)<[^<]*+)*+)*+</table>%i';
|
||||||
|
$myText = preg_replace( $pattern, '', $text );
|
||||||
|
|
||||||
|
$paragraphs = [];
|
||||||
|
if ( preg_match_all( '#<p>.*?</p>#is', $myText, $paragraphs ) ) {
|
||||||
|
foreach ( $paragraphs[0] as $paragraph ) {
|
||||||
|
$paragraph = trim( strip_tags( $paragraph ) );
|
||||||
|
if ( !$paragraph ) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return $paragraph;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue