Merge "Do not always store template data compressed"

This commit is contained in:
jenkins-bot 2019-09-30 23:56:21 +00:00 committed by Gerrit Code Review
commit 2aeec4610d
6 changed files with 122 additions and 49 deletions

View file

@ -23,6 +23,7 @@
"AutoloadClasses": {
"TemplateDataHooks": "includes/TemplateDataHooks.php",
"TemplateDataBlob": "includes/TemplateDataBlob.php",
"TemplateDataCompressedBlob": "includes/TemplateDataCompressedBlob.php",
"ApiTemplateData": "includes/api/ApiTemplateData.php"
},
"Hooks": {

View file

@ -13,47 +13,45 @@ use MediaWiki\MediaWikiServices;
* @class
*/
class TemplateDataBlob {
// Size of MySQL 'blob' field; page_props table where the data is stored uses one.
const MAX_LENGTH = 65535;
/**
* @var stdClass
*/
private $data;
protected $data;
/**
* @var string|null In-object cache for getJSON()
*/
private $json = null;
/**
* @var string|null In-object cache for getJSONForDatabase()
*/
private $jsonDB = null;
protected $json = null;
/**
* @var Status Cache of TemplateDataBlob::parse
*/
private $status;
protected $status;
/**
* @var string[] Predefined formats for TemplateData to check against
*/
private static $formats = [
protected static $formats = [
'block' => "{{_\n| _ = _\n}}",
'inline' => '{{_|_=_}}',
];
/**
* Parse and validate passed JSON and create a TemplateDataBlob object.
* Parse and validate passed JSON and create a blob handling
* instance.
* Accepts and handles user-provided data.
*
* @param IDatabase $db
* @param string $json
* @throws Exception
* @return TemplateDataBlob
* @return TemplateDataBlob|TemplateDataCompressedBlob
*/
public static function newFromJSON( $json ) {
$tdb = new self( json_decode( $json ) );
public static function newFromJSON( $db, $json ) {
if ( $db->getType() === 'mysql' ) {
$tdb = new TemplateDataCompressedBlob( json_decode( $json ) );
} else {
$tdb = new TemplateDataBlob( json_decode( $json ) );
}
$status = $tdb->parse();
@ -77,17 +75,19 @@ class TemplateDataBlob {
}
/**
* Parse and validate passed JSON (possibly gzip-compressed) and create a TemplateDataBlob object.
* Parse and validate passed JSON (possibly gzip-compressed) and create a blob handling
* instance.
*
* @param IDatabase $db
* @param string $json
* @return TemplateDataBlob
* @return TemplateDataBlob or TemplateDataCompressedBlob
*/
public static function newFromDatabase( $json ) {
public static function newFromDatabase( $db, $json ) {
// Handle GZIP compression. \037\213 is the header for GZIP files.
if ( substr( $json, 0, 2 ) === "\037\213" ) {
$json = gzdecode( $json );
}
return self::newFromJSON( $json );
return self::newFromJSON( $db, $json );
}
/**
@ -96,7 +96,7 @@ class TemplateDataBlob {
* See Specification.md for the expected format of the JSON object.
* @return Status
*/
private function parse() {
protected function parse() {
$data = $this->data;
static $rootKeys = [
@ -550,12 +550,6 @@ class TemplateDataBlob {
}
}
}
$length = strlen( $this->getJSONForDatabase() );
if ( $length > self::MAX_LENGTH ) {
return Status::newFatal( 'templatedata-invalid-length', $length, self::MAX_LENGTH );
}
return Status::newGood();
}
@ -693,14 +687,10 @@ class TemplateDataBlob {
}
/**
* @return string JSON (gzip compressed)
* @return string JSON
*/
public function getJSONForDatabase() {
if ( $this->jsonDB === null ) {
// Cache for repeat calls
$this->jsonDB = gzencode( $this->getJSON() );
}
return $this->jsonDB;
return $this->getJSON();
}
public function getHtml( Language $lang ) {

View file

@ -0,0 +1,62 @@
<?php
/**
* @file
* @ingroup Extensions
*/
/**
* Represents the information about a template,
* coming from the JSON blob in the <templatedata> tags
* on wiki pages.
* This implementation stores the information as a compressed gzip blob
* in the database.
*
* @class
*/
class TemplateDataCompressedBlob extends TemplateDataBlob {
// Size of MySQL 'blob' field; page_props table where the data is stored uses one.
const MAX_LENGTH = 65535;
/**
* @var string|null In-object cache for getJSONForDatabase()
*/
protected $jsonDB = null;
/**
* Parse the data, normalise it and validate it.
*
* See Specification.md for the expected format of the JSON object.
* @return Status
*/
protected function parse() {
$status = parent::parse();
if ( $status->isOK() ) {
$length = strlen( $this->getJSONForDatabase() );
if ( $length > self::MAX_LENGTH ) {
return Status::newFatal( 'templatedata-invalid-length', $length, self::MAX_LENGTH );
}
}
return $status;
}
/**
* @return string JSON (gzip compressed)
*/
public function getJSONForDatabase() {
if ( $this->jsonDB === null ) {
// Cache for repeat calls
$this->jsonDB = gzencode( $this->getJSON() );
}
return $this->jsonDB;
}
/**
* Just initialize the data, compression to be done later.
*
* @param stdClass|null $data Template data
*/
protected function __construct( $data = null ) {
$this->data = $data;
$this->jsonDB = null;
}
}

View file

@ -129,7 +129,7 @@ class TemplateDataHooks {
* @return string HTML to insert in the page.
*/
public static function render( $input, $args, Parser $parser, $frame ) {
$ti = TemplateDataBlob::newFromJSON( $input );
$ti = TemplateDataBlob::newFromJSON( wfGetDB( DB_REPLICA ), $input );
$status = $ti->getStatus();
if ( !$status->isOK() ) {

View file

@ -103,7 +103,7 @@ class ApiTemplateData extends ApiBase {
foreach ( $res as $row ) {
$rawData = $row->pp_value;
$tdb = TemplateDataBlob::newFromDatabase( $rawData );
$tdb = TemplateDataBlob::newFromDatabase( $db, $rawData );
$status = $tdb->getStatus();
if ( !$status->isOK() ) {

View file

@ -2,6 +2,7 @@
/**
* @group TemplateData
* @group Database
* @covers TemplateDataBlob
*/
class TemplateDataBlobTest extends MediaWikiTestCase {
@ -561,15 +562,8 @@ class TemplateDataBlobTest extends MediaWikiTestCase {
'msg' => 'Custom parameter format string (2)',
'status' => true
],
[
// Should be long enough to trigger this condition after gzipping.
'input' => '{
"description": "' . self::generatePseudorandomString( 100000, 42 ) . '",
"params": {}
}',
'status' => 'Data too large to save (75,217 bytes, limit is 65,535)'
],
];
$calls = [];
foreach ( $cases as $case ) {
$calls[] = [ $case ];
@ -643,7 +637,7 @@ class TemplateDataBlobTest extends MediaWikiTestCase {
}
}
$t = TemplateDataBlob::newFromJSON( $case['input'] );
$t = TemplateDataBlob::newFromJSON( $this->db, $case['input'] );
$actual = $t->getJSON();
$status = $t->getStatus();
if ( !$status->isGood() ) {
@ -668,7 +662,7 @@ class TemplateDataBlobTest extends MediaWikiTestCase {
// Assert this case roundtrips properly by running through the output as input.
$t = TemplateDataBlob::newFromJSON( $case['output'] );
$t = TemplateDataBlob::newFromJSON( $this->db, $case['output'] );
$status = $t->getStatus();
if ( !$status->isGood() ) {
@ -693,19 +687,45 @@ class TemplateDataBlobTest extends MediaWikiTestCase {
$this->assertTemplateData( $case );
}
/**
* MySQL breaks if the input is too large even after compression
*/
public function testParseLongString() {
if ( $this->db->getType() === 'mysql' ) {
$this->assertTemplateData(
[
// Should be long enough to trigger this condition after gzipping.
'input' => '{
"description": "' . self::generatePseudorandomString( 100000, 42 ) . '",
"params": {}
}',
'status' => 'Data too large to save (75,217 bytes, limit is 65,535)'
]
);
} else {
$this->markTestSkipped( 'long compressed strings break on MySQL only' );
}
}
/**
* Verify we can gzdecode() which came in PHP 5.4.0. Mediawiki needs a
* fallback function for it.
* If this test fail, we are most probably attempting to use gzdecode()
* with PHP before 5.4.
*
* @see bug 54058
* @see bug T56058
*
* Some databases will not be able to store compressed data cleanly
* but the object will be initialized properly even if compressed
* data are provided
*
* @see bug T203850
*/
public function testGetJsonForDatabase() {
// Compress JSON to trigger the code pass in newFromDatabase that ends
// up calling gzdecode().
$gzJson = gzencode( '{}' );
$templateData = TemplateDataBlob::newFromDatabase( $gzJson );
$templateData = TemplateDataBlob::newFromDatabase( $this->db, $gzJson );
$this->assertInstanceOf( 'TemplateDataBlob', $templateData );
}
@ -985,7 +1005,7 @@ class TemplateDataBlobTest extends MediaWikiTestCase {
$case['msg'] = is_string( $case['status'] ) ? $case['status'] : 'TemplateData assertion';
}
$t = TemplateDataBlob::newFromJSON( $case['input'] );
$t = TemplateDataBlob::newFromJSON( $this->db, $case['input'] );
$status = $t->getStatus();
$this->assertTrue(
@ -1289,7 +1309,7 @@ HTML
* @dataProvider provideGetHtml
*/
public function testGetHtml( array $data, $expected ) {
$t = TemplateDataBlob::newFromJSON( json_encode( $data ) );
$t = TemplateDataBlob::newFromJSON( $this->db, json_encode( $data ) );
$actual = $t->getHtml( Language::factory( 'qqx' ) );
$linedActual = preg_replace( '/>\s*</', ">\n<", $actual );