diff --git a/extension.json b/extension.json index 31bac83..afc8559 100644 --- a/extension.json +++ b/extension.json @@ -56,7 +56,9 @@ "input", "style", "ul.gallery", - ".mw-editsection", + "mw\\:editsection", + "editsection", + "meta", "sup.reference", "ol.references", ".error", diff --git a/includes/ApiQueryExtracts.php b/includes/ApiQueryExtracts.php index b647fba..1231f9d 100644 --- a/includes/ApiQueryExtracts.php +++ b/includes/ApiQueryExtracts.php @@ -27,7 +27,7 @@ class ApiQueryExtracts extends ApiQueryBase { /** * Bump when memcache needs clearing */ - private const CACHE_VERSION = 2; + private const CACHE_VERSION = 3; private const PREFIX = 'ex'; @@ -221,10 +221,9 @@ class ApiQueryExtracts extends ApiQueryBase { */ private function getFirstSection( $text, $plainText ) { if ( $plainText ) { - $regexp = '/^.*?(?=' . ExtractFormatter::SECTION_MARKER_START . - '(?!.' . ExtractFormatter::SECTION_MARKER_END . '

isOK() ) { $pout = $status->getValue(); - $text = $pout->getText( [ 'unwrap' => true ] ); + $text = $pout->getRawText(); if ( $this->params['intro'] ) { $text = $this->getFirstSection( $text, false ); } diff --git a/includes/ExtractFormatter.php b/includes/ExtractFormatter.php index 2612573..59d4c0d 100644 --- a/includes/ExtractFormatter.php +++ b/includes/ExtractFormatter.php @@ -4,7 +4,6 @@ namespace MediaWiki\Extension\TextExtracts; use DOMElement; use HtmlFormatter\HtmlFormatter; -use Wikimedia\Parsoid\Utils\DOMCompat; /** * Provides text-only or limited-HTML extracts of page HTML @@ -81,21 +80,9 @@ class ExtractFormatter extends HtmlFormatter { * @return array Array of removed DOMElements */ public function filterContent(): array { - $doc = $this->getDoc(); - - // Headings in a DIV wrapper may get removed by $wgExtractsRemoveClasses, - // move it outside the header to rescue it (T363445) - // https://www.mediawiki.org/wiki/Heading_HTML_changes - $headings = DOMCompat::querySelectorAll( $doc->documentElement, 'h1, h2, h3, h4, h5, h6' ); - foreach ( $headings as $heading ) { - // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType - if ( DOMCompat::getClassList( $heading->parentNode )->contains( 'mw-heading' ) ) { - $heading->parentNode->parentNode->insertBefore( $heading, $heading->parentNode ); - } - } - $removed = parent::filterContent(); + $doc = $this->getDoc(); $spans = $doc->getElementsByTagName( 'span' ); /** @var DOMElement $span */ diff --git a/tests/phpunit/ApiQueryExtractsTest.php b/tests/phpunit/ApiQueryExtractsTest.php index 1f11fb8..7cf8fc9 100644 --- a/tests/phpunit/ApiQueryExtractsTest.php +++ b/tests/phpunit/ApiQueryExtractsTest.php @@ -160,16 +160,6 @@ class ApiQueryExtractsTest extends \MediaWikiIntegrationTestCase { false, 'Example ...', ], - '__TOC__ before intro (HTML)' => [ - '

Contents

Intro

Actual heading

...', - false, - '

Contents

Intro', - ], - '__TOC__ before intro (plaintext)' => [ - "\1\2_\2\1

Contents

Intro\1\2_\2\1

Actual heading

...", - true, - "\1\2_\2\1

Contents

Intro", - ], ]; } diff --git a/tests/phpunit/ExtractFormatterTest.php b/tests/phpunit/ExtractFormatterTest.php index 35bc7d8..fcdfe83 100644 --- a/tests/phpunit/ExtractFormatterTest.php +++ b/tests/phpunit/ExtractFormatterTest.php @@ -67,17 +67,6 @@ class ExtractFormatterTest extends MediaWikiIntegrationTestCase { 'quux', false, ], - [ - // New heading markup from T13555 (T363445) - "

hello

heading

bye

", - '

hello

' . - '
' . - '

heading

' . - 'xxx' . - '
' . - '

bye

', - false, - ], [ // Verify that TOC is properly removed (HTML mode) "Lead\n

Section

\n

Section text

",