preserveWhiteSpace = false; return $doc; } /** * Get text from path * * @param string $relativePath * @return string */ protected static function getText( string $relativePath ) : string { return file_get_contents( __DIR__ . '/../' . $relativePath ); } /** * Write text to path * * @param string $relativePath * @param string $text */ protected static function overwriteTextFile( string $relativePath, string $text ) : void { file_put_contents( __DIR__ . '/../' . $relativePath, $text ); } /** * Get parsed JSON from path * * @param string $relativePath * @param bool $assoc See json_decode() * @return array */ protected static function getJson( string $relativePath, bool $assoc = true ) : array { $json = json_decode( file_get_contents( __DIR__ . '/' . $relativePath ), $assoc ); return $json; } /** * Write JSON to path * * @param string $relativePath * @param array $data */ protected static function overwriteJsonFile( string $relativePath, array $data ) : void { $json = json_encode( $data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE ); // Tabs instead of 4 spaces $json = preg_replace( '/(?:\G|^) {4}/m', "\t", $json ); file_put_contents( __DIR__ . '/' . $relativePath, $json . "\n" ); } /** * Get HTML from path * * @param string $relativePath * @return string */ protected static function getHtml( string $relativePath ) : string { $html = file_get_contents( __DIR__ . '/../' . $relativePath ); // Remove all but the body tags from full Parsoid docs if ( strpos( $html, ']*>)(.*)()`s', $html, $match ); $html = "
$match[2]
"; } return $html; } /** * Write HTML to path * * @param string $relPath * @param DOMDocument $doc * @param string $origRelPath */ protected static function overwriteHtmlFile( string $relPath, DOMDocument $doc, string $origRelPath ) : void { // Do not use $doc->saveHtml(), it outputs an awful soup of HTML entities for documents with // non-ASCII characters $html = file_get_contents( __DIR__ . '/../' . $origRelPath ); // Replace the body tag only in full Parsoid docs if ( strpos( $html, 'getElementsByTagName( 'body' )->item( 0 )->firstChild ); $html = preg_replace( '`(]*>)(.*)()`s', // Quote \ and $ in the replacement text '$1' . strtr( $innerHtml, [ '\\' => '\\\\', '$' => '\\$' ] ) . '$3', $html ); } else { $html = DOMCompat::getInnerHTML( $doc->getElementsByTagName( 'body' )->item( 0 ) ); } file_put_contents( __DIR__ . '/../' . $relPath, $html ); } /** * Create a comment parser * * @param DOMElement $rootNode * @param array $data * @return CommentParser */ public static function createParser( DOMElement $rootNode, array $data ) : CommentParser { $services = MediaWikiServices::getInstance(); return new CommentParser( $rootNode, $services->getContentLanguage(), $services->getMainConfig(), $data ); } }