plainText = $plainText; $this->setRemoveMedia( true ); if ( $plainText ) { $this->flattenAllTags(); } else { $this->flatten( [ 'a' ] ); } } /** * Performs final transformations (such as newline replacement for plaintext * option) and returns resulting HTML. * * @param DOMElement|string|null $element ID of element to get HTML from. * Ignored * @return string Processed HTML */ public function getText( $element = null ): string { $this->filterContent(); $text = parent::getText(); if ( $this->plainText ) { $text = html_entity_decode( $text ); // replace nbsp with space $text = str_replace( "\u{00A0}", ' ', $text ); // for Windows $text = str_replace( "\r", "\n", $text ); // normalise newlines $text = preg_replace( "/\n{3,}/", "\n\n", $text ); } return trim( $text ); } /** * @param string $html HTML string to process * @return string Processed HTML */ public function onHtmlReady( string $html ): string { if ( $this->plainText ) { $html = preg_replace( '/\s*(getDoc(); // Headings in a DIV wrapper may get removed by $wgExtractsRemoveClasses, // move it outside the header to rescue it (T363445) // https://www.mediawiki.org/wiki/Heading_HTML_changes $headings = DOMCompat::querySelectorAll( $doc->documentElement, 'h1, h2, h3, h4, h5, h6' ); foreach ( $headings as $heading ) { // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType if ( DOMCompat::getClassList( $heading->parentNode )->contains( 'mw-heading' ) ) { $heading->parentNode->parentNode->insertBefore( $heading, $heading->parentNode ); } } $removed = parent::filterContent(); $spans = $doc->getElementsByTagName( 'span' ); /** @var DOMElement $span */ foreach ( $spans as $span ) { $span->removeAttribute( 'class' ); $span->removeAttribute( 'style' ); } return $removed; } }