mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Description2
synced 2024-11-23 22:33:34 +00:00
Remove style tags in description
Some pages' description may include CSS code rendered by [[Extension:TemplateStyles]]. Change-Id: I352ac2338eb5977305308546523ec6c55f7cb599
This commit is contained in:
parent
bc54a5a120
commit
055927a901
|
@ -8,7 +8,7 @@ class SimpleDescriptionProvider implements DescriptionProvider {
|
|||
* Extracts description from the HTML representation of a page.
|
||||
*
|
||||
* The algorithm:
|
||||
* 1. Removes all <table> elements and their contents.
|
||||
* 1. Removes all <style> and <table> elements and their contents.
|
||||
* 2. Selects all <p> elements.
|
||||
* 3. Iterates over those paragraphs, strips out all HTML tags and trims white-space around.
|
||||
* 4. Then the first non-empty paragraph is picked as the result.
|
||||
|
@ -17,8 +17,12 @@ class SimpleDescriptionProvider implements DescriptionProvider {
|
|||
* @return string
|
||||
*/
|
||||
public function derive( string $text ): ?string {
|
||||
$pattern = '%<table\b[^>]*+>(?:(?R)|[^<]*+(?:(?!</?table\b)<[^<]*+)*+)*+</table>%i';
|
||||
$myText = preg_replace( $pattern, '', $text );
|
||||
$myText = $text;
|
||||
$stripTags = [ 'style', 'table' ];
|
||||
foreach ( $stripTags as $tag ) {
|
||||
$pattern = "%<$tag\b[^>]*+>(?:(?R)|[^<]*+(?:(?!</?$tag\b)<[^<]*+)*+)*+</$tag>%i";
|
||||
$myText = preg_replace( $pattern, '', $myText );
|
||||
}
|
||||
|
||||
$paragraphs = [];
|
||||
if ( preg_match_all( '#<p>.*?</p>#is', $myText, $paragraphs ) ) {
|
||||
|
|
Loading…
Reference in a new issue