mediawiki-extensions-Templa.../includes/TemplateDataBlob.php
Thiemo Kreuz 001494f443 Move last remaining HTML formating code out of blob class
Effectively a no-op. This patch doesn't change what the code does.
Tests are in place to prove this.

As before, the tests are intentionally not moved but left in place.
This is for later patches to clean up.

Change-Id: If130e0d006a36d8c755288f8a4e4e9a4c42a6295
2022-02-03 09:33:03 +01:00

264 lines
7.2 KiB
PHP

<?php
/**
* @file
* @ingroup Extensions
*/
namespace MediaWiki\Extension\TemplateData;
use MediaWiki\MediaWikiServices;
use Status;
use stdClass;
use Wikimedia\Rdbms\IDatabase;
/**
* Represents the information about a template,
* coming from the JSON blob in the <templatedata> tags
* on wiki pages.
*/
class TemplateDataBlob {
/**
* @var mixed
*/
private $data;
/**
* @var string|null In-object cache for getJSON()
*/
private $json = null;
/**
* @var Status
*/
private $status;
/**
* Parse and validate passed JSON and create a blob handling
* instance.
* Accepts and handles user-provided data.
*
* @param IDatabase $db
* @param string $json
* @return TemplateDataBlob
*/
public static function newFromJSON( IDatabase $db, string $json ): TemplateDataBlob {
if ( $db->getType() === 'mysql' ) {
$tdb = new TemplateDataCompressedBlob( json_decode( $json ) );
} else {
$tdb = new TemplateDataBlob( json_decode( $json ) );
}
$status = $tdb->parse();
if ( !$status->isOK() ) {
// Reset in-object caches
$tdb->json = null;
$tdb->jsonDB = null;
// If data is invalid, replace with the minimal valid blob.
// This is to make sure that, if something forgets to check the status first,
// we don't end up with invalid data in the database.
$tdb->data = (object)[
'description' => null,
'params' => (object)[],
'format' => null,
'sets' => [],
'maps' => (object)[],
];
}
$tdb->status = $status;
return $tdb;
}
/**
* Parse and validate passed JSON (possibly gzip-compressed) and create a blob handling
* instance.
*
* @param IDatabase $db
* @param string $json
* @return TemplateDataBlob
*/
public static function newFromDatabase( IDatabase $db, string $json ): TemplateDataBlob {
// Handle GZIP compression. \037\213 is the header for GZIP files.
if ( substr( $json, 0, 2 ) === "\037\213" ) {
$json = gzdecode( $json );
}
return self::newFromJSON( $db, $json );
}
/**
* Parse the data, normalise it and validate it.
*
* See Specification.md for the expected format of the JSON object.
* @return Status
*/
protected function parse(): Status {
$validator = new TemplateDataValidator();
return $validator->validate( $this->data );
}
/**
* Get a single localized string from an InterfaceText object.
*
* Uses the preferred language passed to this function, or one of its fallbacks,
* or the site content language, or its fallbacks.
*
* @param stdClass $text An InterfaceText object
* @param string $langCode Preferred language
* @return null|string Text value from the InterfaceText object or null if no suitable
* match was found
*/
protected static function getInterfaceTextInLanguage( stdClass $text, string $langCode ): ?string {
if ( isset( $text->$langCode ) ) {
return $text->$langCode;
}
list( $userlangs, $sitelangs ) = MediaWikiServices::getInstance()->getLanguageFallback()
->getAllIncludingSiteLanguage( $langCode );
foreach ( $userlangs as $lang ) {
if ( isset( $text->$lang ) ) {
return $text->$lang;
}
}
foreach ( $sitelangs as $lang ) {
if ( isset( $text->$lang ) ) {
return $text->$lang;
}
}
// If none of the languages are found fallback to null. Alternatively we could fallback to
// reset( $text ) which will return whatever key there is, but we should't give the user a
// "random" language with no context (e.g. could be RTL/Hebrew for an LTR/Japanese user).
return null;
}
/**
* @return Status
*/
public function getStatus(): Status {
return $this->status;
}
/**
* @return mixed
*/
public function getData() {
// Return deep clone so callers can't modify data. Needed for getDataInLanguage().
// Modification must clear 'json' and 'jsonDB' in-object cache.
return unserialize( serialize( $this->data ) );
}
/**
* Get data with all InterfaceText objects resolved to a single string to the
* appropriate language.
*
* @param string $langCode Preferred language
* @return stdClass
*/
public function getDataInLanguage( string $langCode ): stdClass {
$data = $this->getData();
// Root.description
if ( $data->description !== null ) {
$data->description = self::getInterfaceTextInLanguage( $data->description, $langCode );
}
foreach ( $data->params as $param ) {
// Param.label
if ( $param->label !== null ) {
$param->label = self::getInterfaceTextInLanguage( $param->label, $langCode );
}
// Param.description
if ( $param->description !== null ) {
$param->description = self::getInterfaceTextInLanguage( $param->description, $langCode );
}
// Param.default
if ( $param->default !== null ) {
$param->default = self::getInterfaceTextInLanguage( $param->default, $langCode );
}
// Param.example
if ( $param->example !== null ) {
$param->example = self::getInterfaceTextInLanguage( $param->example, $langCode );
}
}
foreach ( $data->sets as $setObj ) {
$label = self::getInterfaceTextInLanguage( $setObj->label, $langCode );
if ( $label === null ) {
// Contrary to other InterfaceTexts, set label is not optional. If we're here it
// means the template data from the wiki doesn't contain either the user language,
// site language or any of its fallbacks. Wikis should fix data that is in this
// condition (TODO: Disallow during saving?). For now, fallback to whatever we can
// get that does exist in the text object.
$arr = (array)$setObj->label;
$label = reset( $arr );
}
$setObj->label = $label;
}
return $data;
}
/**
* @return string JSON
*/
public function getJSON(): string {
if ( $this->json === null ) {
// Cache for repeat calls
$this->json = json_encode( $this->data );
}
return $this->json;
}
/**
* @return string JSON
*/
public function getJSONForDatabase(): string {
return $this->getJSON();
}
/**
* Get parameter descriptions from raw wikitext (used for templates that have no templatedata).
* @param string $wikitext The text to extract parameters from.
* @return string[] Parameter info in the same format as the templatedata 'params' key.
*/
public static function getRawParams( string $wikitext ): array {
// Ignore wikitext within nowiki tags and comments
$wikitext = preg_replace( '/<!--.*?-->/s', '', $wikitext );
$wikitext = preg_replace( '/<nowiki\s*>.*?<\/nowiki\s*>/s', '', $wikitext );
// This regex matches the one in ext.TemplateDataGenerator.sourceHandler.js
preg_match_all( '/{{3,}([^\n#={|}]*?)([<|]|}{3,})/m', $wikitext, $rawParams );
$params = [];
$normalizedParams = [];
if ( isset( $rawParams[1] ) ) {
foreach ( $rawParams[1] as $rawParam ) {
// This normalization process is repeated in JS in ext.TemplateDataGenerator.sourceHandler.js
$normalizedParam = strtolower( trim( preg_replace( '/[-_ ]+/', ' ', $rawParam ) ) );
if ( !$normalizedParam || in_array( $normalizedParam, $normalizedParams ) ) {
// This or a similarly-named parameter has already been found.
continue;
}
$normalizedParams[] = $normalizedParam;
$params[ trim( $rawParam ) ] = [];
}
}
return $params;
}
/**
* @param mixed $data
*/
protected function __construct( $data ) {
$this->data = $data;
}
}