From b73fe26c299ddce687039d004ae0ac69b8f6547c Mon Sep 17 00:00:00 2001 From: alex4401 Date: Mon, 4 Dec 2023 20:54:13 +0100 Subject: [PATCH] Extract description algorithm into a new class Separating the extract algorithm from integration code. This results in a slightly cleaner code structure (at least in my opinion) and enables adding alternate algorithms without devolving into spaghetti. The DescriptionProvider (name of the new base interface) is exposed as a service through dependency injection to avoid factories. The implementation can be swapped at service instantiation time. Depends-On: I73c61ce045dcf31ac1ca5888f1548de8fd8b56ff Change-Id: I97fd065c9554837747021ba9fff26005e33270f4 --- extension.json | 6 ++++- includes/Description2.php | 1 - includes/DescriptionProvider.php | 13 ++++++++++ includes/Hooks.php | 25 ++++++------------ includes/ServiceWiring.php | 13 ++++++++++ includes/SimpleDescriptionProvider.php | 36 ++++++++++++++++++++++++++ 6 files changed, 75 insertions(+), 19 deletions(-) create mode 100644 includes/DescriptionProvider.php create mode 100644 includes/ServiceWiring.php create mode 100644 includes/SimpleDescriptionProvider.php diff --git a/extension.json b/extension.json index ec21ab8..fbe460e 100644 --- a/extension.json +++ b/extension.json @@ -17,6 +17,9 @@ "ConfigRegistry": { "Description2": "GlobalVarConfig::newInstance" }, + "ServiceWiringFiles": [ + "includes/ServiceWiring.php" + ], "AutoloadNamespaces": { "MediaWiki\\Extension\\Description2\\": "includes/" }, @@ -32,7 +35,8 @@ "Description2": { "class": "MediaWiki\\Extension\\Description2\\Hooks", "services": [ - "ConfigFactory" + "ConfigFactory", + "Description2.DescriptionProvider" ] } }, diff --git a/includes/Description2.php b/includes/Description2.php index da9e404..62e1004 100644 --- a/includes/Description2.php +++ b/includes/Description2.php @@ -17,7 +17,6 @@ use PPFrame; */ class Description2 { - /** * @param Parser $parser The parser. * @param string $desc The description text. diff --git a/includes/DescriptionProvider.php b/includes/DescriptionProvider.php new file mode 100644 index 0000000..2ed2ff2 --- /dev/null +++ b/includes/DescriptionProvider.php @@ -0,0 +1,13 @@ +config = $configFactory->makeConfig( 'Description2' ); + $this->descriptionProvider = $descriptionProvider; } /** @@ -43,22 +49,7 @@ class Hooks implements * @return bool */ public function onParserAfterTidy( $parser, &$text ) { - $desc = ''; - - $pattern = '%]*+>(?:(?R)|[^<]*+(?:(?!%i'; - $myText = preg_replace( $pattern, '', $text ); - - $paragraphs = []; - if ( preg_match_all( '#

.*?

#is', $myText, $paragraphs ) ) { - foreach ( $paragraphs[0] as $paragraph ) { - $paragraph = trim( strip_tags( $paragraph ) ); - if ( !$paragraph ) { - continue; - } - $desc = $paragraph; - break; - } - } + $desc = $this->descriptionProvider->derive( $text ); if ( $desc ) { Description2::setDescription( $parser, $desc ); diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php new file mode 100644 index 0000000..3358d00 --- /dev/null +++ b/includes/ServiceWiring.php @@ -0,0 +1,13 @@ + static function ( + MediaWikiServices $services + ): DescriptionProvider { + return new SimpleDescriptionProvider(); + }, +]; diff --git a/includes/SimpleDescriptionProvider.php b/includes/SimpleDescriptionProvider.php new file mode 100644 index 0000000..ea3f904 --- /dev/null +++ b/includes/SimpleDescriptionProvider.php @@ -0,0 +1,36 @@ + elements and their contents. + * 2. Selects all

elements. + * 3. Iterates over those paragraphs, strips out all HTML tags and trims white-space around. + * 4. Then the first non-empty paragraph is picked as the result. + * + * @param string $text + * @return string + */ + public function derive( string $text ): ?string { + $pattern = '%]*+>(?:(?R)|[^<]*+(?:(?!%i'; + $myText = preg_replace( $pattern, '', $text ); + + $paragraphs = []; + if ( preg_match_all( '#

.*?

#is', $myText, $paragraphs ) ) { + foreach ( $paragraphs[0] as $paragraph ) { + $paragraph = trim( strip_tags( $paragraph ) ); + if ( !$paragraph ) { + continue; + } + return $paragraph; + } + } + + return null; + } +}