. * * @file * @ingroup Skins */ declare( strict_types=1 ); namespace Citizen\Partials; use DOMDocument; use DOMElement; use DOMXpath; use Exception; use Html; use OutputPage; use ValueError; final class BodyContent extends Partial { /** * The code below is largely based on the extension MobileFrontend * All credits go to the author and contributors of the project */ /** * Class name for collapsible section wrappers */ public const STYLE_COLLAPSIBLE_SECTION_CLASS = 'section-collapsible'; /** * List of tags that could be considered as section headers. * @var array */ private $topHeadingTags = [ "h1", "h2", "h3", "h4", "h5", "h6 " ]; /** * Rebuild the body content * * @param OutputPage $out OutputPage * @return string html */ public function buildBodyContent( $out ) { $htmlBodyContent = sprintf( "%s\n%s", $out->getHTML(), Html::rawElement( 'div', [ 'class' => 'printfooter' ], $this->skin->printSource() ) ); $title = $out->getTitle(); // Return the page if title is null if ( $title === null ) { return $htmlBodyContent; } // Check if it is not the main page if ( !$title->isMainPage() && $title->isContentPage() ) { $doc = $this->parseXhtml( $htmlBodyContent ); // Make top level sections $this->makeSections( $doc, $this->getTopHeadings( $doc ) ); // Mark subheadings $this->markSubHeadings( $this->getSubHeadings( $doc ) ); $htmlBodyContent = html_entity_decode( $doc->saveHTML(), ENT_QUOTES, 'UTF-8' ); } return $this->skin->wrapHTMLPublic( $title, $htmlBodyContent ); } /** * Actually splits splits the body of the document into sections * * @param DOMDocument $doc representing the HTML of the current article. In the HTML the sections * should not be wrapped. * @param DOMElement[] $headings The headings returned by * @return DOMDocument */ private function makeSections( DOMDocument $doc, array $headings ) { $xpath = new DOMXpath( $doc ); $containers = $xpath->query( 'body/div[@class="mw-parser-output"][1]' ); // Return if no parser output is found if ( !$containers->length || $containers->item( 0 ) === null ) { return $doc; } $container = $containers->item( 0 ); $containerChild = $container->firstChild; $firstHeading = reset( $headings ); $firstHeadingName = $firstHeading->nodeName ?? false; $sectionNumber = 0; $sectionBody = $this->createSectionBodyElement( $doc, $sectionNumber ); while ( $containerChild ) { $node = $containerChild; $containerChild = $containerChild->nextSibling; // If we've found a top level heading, insert the previous section if // necessary and clear the container div. // Note well the use of DOMNode#nodeName here. Only DOMElement defines // DOMElement#tagName. So, if there's trailing text - represented by // DOMText - then accessing #tagName will trigger an error. if ( $node->nodeName === $firstHeadingName ) { // The heading we are transforming is always 1 section ahead of the // section we are currently processing /** @phan-suppress-next-line PhanTypeMismatchArgument DOMNode vs. DOMElement */ $this->prepareHeading( $doc, $node, $sectionNumber + 1 ); // Insert the previous section body and reset it for the new section $container->insertBefore( $sectionBody, $node ); ++$sectionNumber; $sectionBody = $this->createSectionBodyElement( $doc, $sectionNumber ); continue; } // If it is not a top level heading, keep appending the nodes to the // section body container. $sectionBody->appendChild( $node ); } // Append the last section body. $container->appendChild( $sectionBody ); return $doc; } /** * Prepare section headings, add required classes * * @param DOMDocument $doc * @param DOMElement $heading * @param int $sectionNumber */ private function prepareHeading( DOMDocument $doc, DOMElement $heading, $sectionNumber ) { $className = $heading->hasAttribute( 'class' ) ? $heading->getAttribute( 'class' ) . ' ' : ''; $heading->setAttribute( 'class', $className . 'section-heading' ); // prepend indicator - this avoids a reflow by creating a placeholder for a toggling indicator $indicator = $doc->createElement( 'div' ); $indicator->setAttribute( 'class', 'section-toggle' ); $indicator->setAttribute( 'role', 'button' ); $heading->insertBefore( $indicator, $heading->firstChild ); } /** * Creates a Section body element * * @param DOMDocument $doc * @param int $sectionNumber * * @return DOMElement */ private function createSectionBodyElement( DOMDocument $doc, $sectionNumber ) { $sectionBody = $doc->createElement( 'section' ); $sectionBody->setAttribute( 'class', self::STYLE_COLLAPSIBLE_SECTION_CLASS ); $sectionBody->setAttribute( 'id', 'section-collapsible-' . $sectionNumber ); return $sectionBody; } /** * Gets top headings in the document. * * @param DOMDocument $doc * @return array An array first is the highest rank headings */ private function getTopHeadings( DOMDocument $doc ): array { $headings = []; foreach ( $this->topHeadingTags as $tagName ) { $allTags = $doc->getElementsByTagName( $tagName ); foreach ( $allTags as $el ) { if ( $el->parentNode->getAttribute( 'class' ) !== 'toctitle' ) { $headings[] = $el; } } if ( $headings ) { return $headings; } } return $headings; } /** * Marks the subheadings for the approiate styles by adding * the section-subheading class to each of them, if it * hasn't already been added. * * @param DOMElement[] $headings Heading elements */ protected function markSubHeadings( array $headings ) { foreach ( $headings as $heading ) { $class = $heading->getAttribute( 'class' ); if ( strpos( $class, 'section-subheading' ) === false ) { $heading->setAttribute( 'class', ltrim( $class . ' section-subheading' ) ); } } } /** * Gets all subheadings in the document in rank order. * * @param DOMDocument $doc * @return DOMElement[] */ private function getSubHeadings( DOMDocument $doc ): array { $found = false; $subheadings = []; foreach ( $this->topHeadingTags as $tagName ) { $allTags = $doc->getElementsByTagName( $tagName ); $elements = []; foreach ( $allTags as $el ) { if ( $el->parentNode->getAttribute( 'class' ) !== 'toctitle' ) { $elements[] = $el; } } if ( $elements ) { if ( !$found ) { $found = true; } else { $subheadings = array_merge( $subheadings, $elements ); } } } return $subheadings; } /** * Parses a html string into a DOMDocument * * @param string $htmlContent * @param string $charset * * @return DOMDocument */ private function parseXhtml( string $htmlContent, string $charset = 'UTF-8' ): DOMDocument { $htmlContent = $this->convertToHtmlEntities( $htmlContent, $charset ); $internalErrors = libxml_use_internal_errors( true ); $disableEntities = libxml_disable_entity_loader( true ); $dom = new DOMDocument( '1.0', $charset ); $dom->validateOnParse = true; if ( trim( $htmlContent ) !== '' ) { // @phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged @$dom->loadHTML( $htmlContent ); } libxml_use_internal_errors( $internalErrors ); libxml_disable_entity_loader( $disableEntities ); return $dom; } /** * Converts charset to HTML-entities to ensure valid parsing. * * @param string $htmlContent * @param string $charset * @return string */ private function convertToHtmlEntities( string $htmlContent, string $charset = 'UTF-8' ): string { set_error_handler( static function () { // Null } ); try { return mb_convert_encoding( $htmlContent, 'HTML-ENTITIES', $charset ); } catch ( Exception | ValueError $e ) { try { $htmlContent = iconv( $charset, 'UTF-8', $htmlContent ); $htmlContent = mb_convert_encoding( $htmlContent, 'HTML-ENTITIES', 'UTF-8' ); } catch ( Exception | ValueError $e ) { // } return $htmlContent; } finally { restore_error_handler(); } } }