PortableInfobox/includes/services/Helpers/PortableInfoboxParsingHelper.php

139 lines
3.9 KiB
PHP
Raw Normal View History

<?php
namespace PortableInfobox\Helpers;
2022-03-11 20:35:51 +00:00
use Exception;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MediaWikiServices;
2022-03-11 20:35:51 +00:00
use ParserOptions;
use PortableInfoboxDataService;
use PortableInfoboxParserTagController;
use RequestContext;
use Title;
class PortableInfoboxParsingHelper {
2018-10-02 07:41:19 +00:00
protected $parserTagController;
protected $logger;
public function __construct() {
2022-03-11 20:35:51 +00:00
$this->parserTagController = PortableInfoboxParserTagController::getInstance();
2018-10-02 07:41:19 +00:00
$this->logger = LoggerFactory::getInstance( 'PortableInfobox' );
}
/**
2018-10-02 07:41:19 +00:00
* Try to find out if infobox got "hidden" inside includeonly tag. Parse it if that's the case.
*
2022-03-11 20:35:51 +00:00
* @param Title $title
*
2015-09-23 13:51:41 +00:00
* @return mixed false when no infoboxes found, Array with infoboxes on success
*/
2022-03-11 20:35:51 +00:00
public function parseIncludeonlyInfoboxes( Title $title ) {
// for templates we need to check for include tags
$templateText = $this->fetchArticleContent( $title );
2018-08-02 16:05:29 +00:00
if ( $templateText ) {
$parser = MediaWikiServices::getInstance()->getParser();
2021-12-15 22:01:13 +00:00
$parser->setPage( $title );
2022-03-11 20:35:51 +00:00
$parserOptions = ParserOptions::newFromAnon();
$parser->setOptions( $parserOptions );
$frame = $parser->getPreprocessor()->newFrame();
2016-12-09 16:11:17 +00:00
2018-08-02 16:05:29 +00:00
$includeonlyText = $parser->getPreloadText( $templateText, $title, $parserOptions );
$infoboxes = $this->getInfoboxes( $this->removeNowikiPre( $includeonlyText ) );
if ( $infoboxes ) {
foreach ( $infoboxes as $infobox ) {
try {
$this->parserTagController->prepareInfobox( $infobox, $parser, $frame );
2022-03-11 20:35:51 +00:00
} catch ( Exception $e ) {
$this->logger->info( 'Invalid infobox syntax' );
}
}
2018-10-02 07:41:19 +00:00
return json_decode(
2022-04-14 02:47:36 +00:00
self::parserOutputGetPageProperty( $parser->getOutput(), PortableInfoboxDataService::INFOBOXES_PROPERTY_NAME ),
2018-10-02 07:41:19 +00:00
true
);
}
}
return false;
}
2022-03-11 20:35:51 +00:00
public function reparseArticle( Title $title ) {
2021-09-10 02:52:19 +00:00
$parser = MediaWikiServices::getInstance()->getParser();
2022-03-11 20:35:51 +00:00
$user = RequestContext::getMain()->getUser();
2021-09-10 02:52:19 +00:00
2022-03-11 20:35:51 +00:00
$parserOptions = new ParserOptions( $user );
$parser->parse( $this->fetchArticleContent( $title ), $title, $parserOptions );
2018-10-02 07:41:19 +00:00
return json_decode(
2022-04-14 02:47:36 +00:00
self::parserOutputGetPageProperty( $parser->getOutput(), PortableInfoboxDataService::INFOBOXES_PROPERTY_NAME ),
2018-10-02 07:41:19 +00:00
true
);
}
2022-04-14 02:47:36 +00:00
private static function parserOutputGetPageProperty( \ParserOutput $parserOutput, string $name ) {
if ( method_exists( \ParserOutput::class, 'getPageProperty' ) ) {
// @phan-suppress-next-line PhanUndeclaredMethod since 1.38
return $parserOutput->getPageProperty( $name );
} else {
// @phan-suppress-next-line PhanDeprecatedFunction deprecated since 1.38
return $parserOutput->getProperty( $name );
}
}
/**
2022-03-11 20:35:51 +00:00
* @param Title $title
*
* @return string
*/
2022-03-11 20:35:51 +00:00
protected function fetchArticleContent( Title $title ) {
2021-09-10 02:52:19 +00:00
if ( $title->exists() ) {
// @phan-suppress-next-line PhanDeprecatedFunction
$content = MediaWikiServices::getInstance()
->getWikiPageFactory()
->newFromTitle( $title )
->getContent()
->getNativeData();
}
return isset( $content ) && $content ? $content : '';
}
/**
2022-03-11 20:35:51 +00:00
* @param Title $title
2018-08-16 09:25:53 +00:00
* @return string[] array of strings (infobox markups)
*/
2022-03-11 20:35:51 +00:00
public function getMarkup( Title $title ) {
$content = $this->fetchArticleContent( $title );
return $this->getInfoboxes( $content );
}
/**
2018-10-02 07:41:19 +00:00
* For given template text returns it without text in <nowiki> and <pre> tags
*
2018-08-16 09:25:53 +00:00
* @param string $text
*
* @return string
*/
protected function removeNowikiPre( $text ) {
$text = preg_replace( '/<(nowiki|pre)>.+<\/\g1>/sU', '', $text );
return $text;
}
/**
2018-10-02 07:41:19 +00:00
* From the template without <includeonly> tags, creates an array of
* strings containing only infoboxes. All template content which is not an infobox is removed.
*
2018-08-16 09:25:53 +00:00
* @param string $text Content of template which uses the <includeonly> tags
*
* @return array of striped infoboxes ready to parse
*/
protected function getInfoboxes( $text ) {
preg_match_all( '/<infobox(?:[^>]*\/>|.+<\/infobox>)/sU', $text, $result );
return $result[0];
}
}