mediawiki-extensions-Scribunto/includes/engines/LuaCommon/TextLibrary.php
Kunal Mehta a839ba855d Add mw.loadJsonData()
Backporting this so the LTS release has forwards compatibility with
Wikipedia templates.

mw.loadData() allows for optimizing the loading Lua tables by requiring
only one parse and lookup. However it's often easier for people to
write/maintain bulk data in JSON rather than Lua tables.

mw.loadJsonData() has roughly the same characteristics as mw.loadData()
and it can be used on JSON content model pages in any namespace.

As noted on the linked bug report, it's possible to already implement
this by writing a wrapper Lua module that loads and parses the JSON
content. But that requires a dummy module for each JSON page, which is
just annoying and inconvenient.

Test cases are copied from the mw.loadData() ones, with a few omissions
for syntax not supported in JSON (e.g. NaN, infinity, etc.).

Bug: T217500
Change-Id: I1b35ad27a37b94064707bb8c9b7108c7078ed4d1
(cherry picked from commit 1000d322e5)
2022-11-07 07:34:42 +00:00

180 lines
4.9 KiB
PHP

<?php
class Scribunto_LuaTextLibrary extends Scribunto_LuaLibraryBase {
// Matches Lua mw.text constants
private const JSON_PRESERVE_KEYS = 1;
private const JSON_TRY_FIXING = 2;
private const JSON_PRETTY = 4;
public function register() {
global $wgUrlProtocols;
$lib = [
'unstrip' => [ $this, 'textUnstrip' ],
'unstripNoWiki' => [ $this, 'textUnstripNoWiki' ],
'killMarkers' => [ $this, 'killMarkers' ],
'getEntityTable' => [ $this, 'getEntityTable' ],
'jsonEncode' => [ $this, 'jsonEncode' ],
'jsonDecode' => [ $this, 'jsonDecode' ],
];
$opts = [
'comma' => wfMessage( 'comma-separator' )->inContentLanguage()->text(),
'and' => wfMessage( 'and' )->inContentLanguage()->text() .
wfMessage( 'word-separator' )->inContentLanguage()->text(),
'ellipsis' => wfMessage( 'ellipsis' )->inContentLanguage()->text(),
'nowiki_protocols' => [],
];
foreach ( $wgUrlProtocols as $prot ) {
if ( substr( $prot, -1 ) === ':' ) {
// To convert the protocol into a case-insensitive Lua pattern,
// we need to replace letters with a character class like [Xx]
// and insert a '%' before various punctuation.
$prot = preg_replace_callback( '/([a-zA-Z])|([()^$%.\[\]*+?-])/', static function ( $m ) {
if ( $m[1] ) {
return '[' . strtoupper( $m[1] ) . strtolower( $m[1] ) . ']';
} else {
return '%' . $m[2];
}
}, substr( $prot, 0, -1 ) );
$opts['nowiki_protocols']["($prot):"] = '%1&#58;';
}
}
return $this->getEngine()->registerInterface( 'mw.text.lua', $lib, $opts );
}
/**
* Handler for textUnstrip
* @internal
* @param string $s
* @return string[]
*/
public function textUnstrip( $s ) {
$this->checkType( 'unstrip', 1, $s, 'string' );
$stripState = $this->getParser()->getStripState();
return [ $stripState->killMarkers( $stripState->unstripNoWiki( $s ) ) ];
}
/**
* Handler for textUnstripNoWiki
* @internal
* @param string $s
* @return string[]
*/
public function textUnstripNoWiki( $s ) {
$this->checkType( 'unstripNoWiki', 1, $s, 'string' );
return [ $this->getParser()->getStripState()->unstripNoWiki( $s ) ];
}
/**
* Handler for killMarkers
* @internal
* @param string $s
* @return string[]
*/
public function killMarkers( $s ) {
$this->checkType( 'killMarkers', 1, $s, 'string' );
return [ $this->getParser()->getStripState()->killMarkers( $s ) ];
}
/**
* Handler for getEntityTable
* @internal
* @return array[]
*/
public function getEntityTable() {
$table = array_flip(
get_html_translation_table( HTML_ENTITIES, ENT_QUOTES | ENT_HTML5, "UTF-8" )
);
return [ $table ];
}
/**
* Handler for jsonEncode
* @internal
* @param mixed $value
* @param string|int $flags
* @return string[]
*/
public function jsonEncode( $value, $flags ) {
$this->checkTypeOptional( 'mw.text.jsonEncode', 2, $flags, 'number', 0 );
$flags = (int)$flags;
if ( !( $flags & self::JSON_PRESERVE_KEYS ) && is_array( $value ) ) {
$value = self::reindexArrays( $value, true );
}
$ret = FormatJson::encode( $value, (bool)( $flags & self::JSON_PRETTY ), FormatJson::ALL_OK );
if ( $ret === false ) {
throw new Scribunto_LuaError( 'mw.text.jsonEncode: Unable to encode value' );
}
return [ $ret ];
}
/**
* Handler for jsonDecode
* @internal
* @param string $s
* @param string|int $flags
* @return array
*/
public function jsonDecode( $s, $flags ) {
$this->checkType( 'mw.text.jsonDecode', 1, $s, 'string' );
$this->checkTypeOptional( 'mw.text.jsonDecode', 2, $flags, 'number', 0 );
$flags = (int)$flags;
$opts = FormatJson::FORCE_ASSOC;
if ( $flags & self::JSON_TRY_FIXING ) {
$opts |= FormatJson::TRY_FIXING;
}
$status = FormatJson::parse( $s, $opts );
if ( !$status->isOk() ) {
throw new Scribunto_LuaError( 'mw.text.jsonDecode: ' . $status->getMessage()->text() );
}
$val = $status->getValue();
if ( !( $flags & self::JSON_PRESERVE_KEYS ) && is_array( $val ) ) {
$val = self::reindexArrays( $val, false );
}
return [ $val ];
}
/** Recursively reindex array with integer keys to 0-based or 1-based
* @param array $arr
* @param bool $isEncoding
* @return array
* @internal
*/
public static function reindexArrays( array $arr, $isEncoding ) {
if ( $isEncoding ) {
ksort( $arr, SORT_NUMERIC );
$next = 1;
} else {
$next = 0;
}
$isSequence = true;
foreach ( $arr as $k => &$v ) {
if ( is_array( $v ) ) {
$v = self::reindexArrays( $v, $isEncoding );
}
if ( $isSequence ) {
if ( is_int( $k ) ) {
$isSequence = $next++ === $k;
} elseif ( $isEncoding && ctype_digit( $k ) ) {
// json_decode currently doesn't return integer keys for {}
$isSequence = $next++ === (int)$k;
} else {
$isSequence = false;
}
}
}
if ( $isSequence ) {
if ( $isEncoding ) {
return array_values( $arr );
} else {
return $arr ? array_combine( range( 1, count( $arr ) ), $arr ) : $arr;
}
}
return $arr;
}
}