Merge "Add a maintenance script to clean afl_var_dump"

This commit is contained in:
jenkins-bot 2020-03-03 01:20:25 +00:00 committed by Gerrit Code Review
commit 9335116758
3 changed files with 1108 additions and 1 deletions

View file

@ -175,7 +175,8 @@
"ApiAbuseFilterCheckMatch": "includes/api/ApiAbuseFilterCheckMatch.php",
"ApiAbuseLogPrivateDetails": "includes/api/ApiAbuseLogPrivateDetails.php",
"NormalizeThrottleParameters": "maintenance/normalizeThrottleParameters.php",
"FixOldLogEntries": "maintenance/fixOldLogEntries.php"
"FixOldLogEntries": "maintenance/fixOldLogEntries.php",
"UpdateVarDumps": "maintenance/updateVarDumps.php"
},
"TestAutoloadClasses": {
"AbuseFilterConsequencesTest": "tests/phpunit/AbuseFilterConsequencesTest.php",

View file

@ -0,0 +1,635 @@
<?php
use Wikimedia\AtEase\AtEase;
use Wikimedia\Rdbms\Database;
use Wikimedia\Rdbms\IResultWrapper;
/**
* Performs several tasks aiming to update the stored var dumps for filter hits.
* See T213006 for a list.
*
* @ingroup Maintenance
*/
if ( getenv( 'MW_INSTALL_PATH' ) ) {
$IP = getenv( 'MW_INSTALL_PATH' );
} else {
$IP = __DIR__ . '/../../..';
}
require_once "$IP/maintenance/Maintenance.php";
class UpdateVarDumps extends LoggedUpdateMaintenance {
/** @var Database A connection to replica */
private $dbr;
/** @var Database A connection to the master */
private $dbw;
/** @var bool Whether we're performing a dry run */
private $dryRun = false;
/** @var int Count of rows in the abuse_filter_log table */
private $allRowsCount;
/**
* @inheritDoc
*/
public function __construct() {
parent::__construct();
$this->addDescription( 'Update AbuseFilter var dumps - T213006' );
$this->addOption( 'dry-run-verbose', 'Perform a verbose dry run' );
$this->addOption( 'dry-run', 'Perform a dry run' );
$this->requireExtension( 'Abuse Filter' );
$this->setBatchSize( 200 );
}
/**
* @inheritDoc
*/
public function getUpdateKey() {
return __CLASS__;
}
/**
* @inheritDoc
*/
public function doDBUpdates() {
if ( $this->hasOption( 'dry-run-verbose' ) || $this->hasOption( 'dry-run' ) ) {
// This way the script can be called with dry-run-verbose only and we can check for dry-run
$this->dryRun = true;
}
// Faulty rows aren't inserted anymore, hence we can query the replica and update the master.
$this->dbr = wfGetDB( DB_REPLICA );
$this->dbw = wfGetDB( DB_MASTER );
// Control batching with the primary key to keep the queries performant and allow gaps
$this->allRowsCount = (int)$this->dbr->selectField(
'abuse_filter_log',
'MAX(afl_id)',
[],
__METHOD__
);
if ( $this->allRowsCount === 0 ) {
$this->output( "...the abuse_filter_log table is empty.\n" );
return !$this->dryRun;
}
// Do the actual work. Note that several actions are superfluous (e.g. in fixMissingDumps
// we use "stored-text" but then we replace it in updateAflVarDump), but that's because of SRP.
// First, ensure that afl_var_dump isn't empty
$this->fixMissingDumps();
// Then, ensure that abuse_filter_log.afl_var_dump only contains "stored-text:xxxx"
$this->moveToText();
// Then update the storage format in the text table
$this->updateText();
// Finally, replace "stored-text:xxxx" with "tt:xxxx" for all rows
$this->updateAflVarDump();
return !$this->dryRun;
}
/**
* Handle empty afl_var_dump. gerrit/16527 fixed a bug which caused an extra abuse_filter_log
* row to be inserted without the var dump for a given action. If we find a row identical to
* the current one but with a valid dump, just delete the current one. Otherwise, store a
* very basic var dump for sanity.
* This handles point 7. of T213006.
*/
private function fixMissingDumps() {
$this->output( "...Checking for missing dumps (1/4)\n" );
$batchSize = $this->getBatchSize();
$prevID = 0;
$curID = $batchSize;
$deleted = $rebuilt = 0;
do {
$brokenRows = $this->dbr->select(
'abuse_filter_log',
'*',
[
'afl_var_dump' => '',
"afl_id > $prevID",
"afl_id <= $curID"
],
__METHOD__,
[ 'ORDER BY' => 'afl_id ASC' ]
);
$prevID = $curID;
$curID += $batchSize;
$res = $this->doFixMissingDumps( $brokenRows );
$deleted += $res['deleted'];
$rebuilt += $res['rebuilt'];
} while ( $prevID <= $this->allRowsCount );
if ( $this->dryRun ) {
$this->output(
"...found $deleted rows with blank afl_var_dump to delete, and " .
"$rebuilt rows to rebuild.\n"
);
} else {
$this->output(
"...deleted $deleted rows with blank afl_var_dump, and rebuilt " .
"$rebuilt rows.\n"
);
}
}
/**
* @param IResultWrapper $brokenRows
* @return int[]
*/
private function doFixMissingDumps( IResultWrapper $brokenRows ) {
$deleted = 0;
foreach ( $brokenRows as $row ) {
if ( $row->afl_var_dump === '' ) {
$findRow = array_diff_key(
get_object_vars( $row ),
[ 'afl_var_dump' => true, 'afl_id' => true ]
);
// This is the case where we may have a duplicate row. The wrong insertion happened
// right before the correct one, so their afl_id should only differ by 1, but let's
// play safe and only assume it's greater. Note that the two entries are guaranteed
// to have the same timestamp.
$findRow[] = 'afl_id > ' . $this->dbr->addQuotes( $row->afl_id );
$saneDuplicate = $this->dbr->selectRow(
'abuse_filter_log',
'1',
$findRow,
__METHOD__
);
if ( $saneDuplicate ) {
// Just delete the row!
$deleted++;
if ( !$this->dryRun ) {
$this->dbw->delete(
'abuse_filter_log',
[ 'afl_id' => $row->afl_id ],
__METHOD__
);
}
continue;
}
}
if ( $this->dryRun ) {
continue;
}
// Build a VariableHolder with the only values we can be sure of
$vars = AbuseFilterVariableHolder::newFromArray( [
'timestamp' => wfTimestamp( TS_UNIX, $row->afl_timestamp ),
'action' => $row->afl_action
] );
// Add some action-specific variables
if ( strpos( $row->afl_action, 'createaccount' ) !== false ) {
$vars->setVar( 'accountname', $row->afl_user_text );
} else {
$vars->setVar( 'user_name', $row->afl_user_text );
$title = Title::makeTitle( $row->afl_namespace, $row->afl_title );
if ( $row->afl_action !== 'move' ) {
$vars->setVar( 'page_title', $title->getText() );
$vars->setVar( 'page_prefixedtitle', $title->getPrefixedText() );
} else {
$vars->setVar( 'moved_from_title', $title->getText() );
$vars->setVar( 'moved_from_prefixedtitle', $title->getPrefixedText() );
}
}
$storedID = AbuseFilter::storeVarDump( $vars );
$this->dbw->update(
'abuse_filter_log',
[ 'afl_var_dump' => "tt:$storedID" ],
[ 'afl_id' => $row->afl_id ],
__METHOD__
);
}
$rebuilt = $brokenRows->numRows() - $deleted;
return [ 'rebuilt' => $rebuilt, 'deleted' => $deleted ];
}
/**
* If afl_var_dump contains serialized data, move the dump to the text table.
* This handles point 1. of T213006.
*/
private function moveToText() {
$this->output( "...Moving serialized data away from the abuse_filter_log table (2/4).\n" );
$batchSize = $this->getBatchSize();
$prevID = 0;
$curID = $batchSize;
$changeRows = $truncatedDumps = 0;
do {
$res = $this->dbr->select(
'abuse_filter_log',
[ 'afl_id', 'afl_var_dump' ],
[
'afl_var_dump NOT ' . $this->dbr->buildLike(
'stored-text:',
$this->dbr->anyString()
),
'afl_var_dump NOT ' . $this->dbr->buildLike(
'tt:',
$this->dbr->anyString()
),
"afl_id > $prevID",
"afl_id <= $curID"
],
__METHOD__,
[ 'ORDER BY' => 'afl_id ASC' ]
);
$prevID = $curID;
$curID += $batchSize;
$result = $this->doMoveToText( $res );
$changeRows += $result['change'];
$truncatedDumps += $result['truncated'];
} while ( $prevID <= $this->allRowsCount );
$msg = $this->dryRun ?
"...found $changeRows abuse_filter_log rows with serialized data and $truncatedDumps " .
"truncated dumps to rebuild.\n" :
"...moved $changeRows abuse_filter_log rows and rebuilt $truncatedDumps " .
"truncated dumps.\n";
$this->output( $msg );
}
/**
* @param IResultWrapper $rows
* @return int[]
*/
private function doMoveToText( IResultWrapper $rows ) {
$changeRows = $truncatedDumps = 0;
foreach ( $rows as $row ) {
// Sanity: perform a very raw check to confirm that the dump is indeed a serialized value
$re = '/^(a:\d+:{|O:25:"[Aa]buse[Ff]ilter[Vv]ariable[Hh]older":\d+:{)/';
if ( !preg_match( $re, $row->afl_var_dump ) ) {
$this->fatalError(
"...found a value in afl_var_dump for afl_id {$row->afl_id} which is " .
"neither a reference to the text table or a serialized value: {$row->afl_var_dump}.\n"
);
}
AtEase::suppressWarnings();
$stored = unserialize( $row->afl_var_dump );
AtEase::restoreWarnings();
if ( !$stored ) {
$re = '/^O:25:"[Aa]buse[Ff]ilter[Vv]ariable[Hh]older":\d+:{/';
if ( preg_match( $re, $row->afl_var_dump ) ) {
$this->fatalError(
"...found a corrupted afl_var_dump for afl_id {$row->afl_id} containing " .
"a truncated object: {$row->afl_var_dump}.\n"
);
}
$stored = $this->restoreTruncatedDump( $row->afl_var_dump );
$truncatedDumps++;
}
if ( !is_array( $stored ) && !( $stored instanceof AbuseFilterVariableHolder ) ) {
$this->fatalError(
'...found unexpected data type ( ' . gettype( $stored ) . ' ) in ' .
"afl_var_dump for afl_id {$row->afl_id}.\n"
);
}
$changeRows++;
if ( !$this->dryRun ) {
$holder = is_array( $stored ) ? AbuseFilterVariableHolder::newFromArray( $stored ) : $stored;
// Note: this will upgrade to the new JSON format, so we use tt:
$newDump = AbuseFilter::storeVarDump( $holder );
$this->dbw->update(
'abuse_filter_log',
[ 'afl_var_dump' => "tt:$newDump" ],
[ 'afl_id' => $row->afl_id ],
__METHOD__
);
}
}
return [ 'change' => $changeRows, 'truncated' => $truncatedDumps ];
}
/**
* Try to restore a truncated dumps. This could happen for very old rows, where afl_var_dump
* was a blob instead of a longblob, and we tried to insert very long strings there.
* This handles point 9. of T214193.
*
* @param string $dump The broken serialized dump
* @return array With everything that we can restore from $dump on success
*/
private function restoreTruncatedDump( $dump ) {
// This method makes various assumptions:
// 1 - Everything is wrapped inside an array
// 2 - Array elements can only be strings, integers, bools or null
// 3 - Array keys can only be strings
// As this is what a serialized dump should look like.
$string = preg_replace( '/^a:\d+:{/', '', $dump );
$ret = [];
$key = null;
while ( strlen( $string ) > 2 || $string === 'N;' ) {
$type = substr( $string, 0, 2 );
switch ( $type ) {
case 's:':
// Quotes aren't escaped, so we need to figure out how many characters to include
$matches = [];
if ( !preg_match( '/^s:(\d+):"/', $string, $matches ) ) {
break 2;
}
$len = (int)$matches[1];
$val = substr( $string, strlen( $matches[0] ), $len );
if ( strlen( $val ) === $len ) {
if ( $key === null ) {
// It's an array key
$key = $val;
} else {
$ret[$key] = $val;
$key = null;
}
$offset = strlen( $matches[0] ) + $len + 2;
break;
} else {
// The truncation happened in the middle of the string
break 2;
}
case 'i:':
if ( preg_match( '/^i:(-?\d+);/', $string, $matches ) ) {
if ( $key === null ) {
throw new UnexpectedValueException( "Unexpected integer key: $string" );
}
$ret[$key] = intval( $matches[1] );
$key = null;
$offset = strlen( $matches[0] );
break;
} else {
break 2;
}
case 'b:':
if ( preg_match( '/^b:([01]);/', $string, $matches ) ) {
if ( $key === null ) {
throw new UnexpectedValueException( "Unexpected bool key: $string" );
}
$ret[$key] = (bool)$matches[1];
$key = null;
$offset = 4;
break;
} else {
break 2;
}
case 'N;':
if ( $key === null ) {
throw new UnexpectedValueException( "Unexpected null key: $string" );
}
$ret[$key] = null;
$key = null;
$offset = 2;
break;
default:
break 2;
}
// Remove the value we have just parsed
// @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
// @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal
$string = substr( $string, $offset );
}
if ( $this->hasOption( 'dry-run-verbose' ) ) {
$this->output(
"...converted the following corrupted dump:\n\n$dump\n\n to this:\n\n" .
var_export( $ret, true ) . "\n\n"
);
}
return $ret;
}
/**
* If the text table (or the External Storage) contains a serialized AbuseFilterVariableHolder
* or array, re-store it as a JSON-encoded array. This assumes that afl_var_dump rows starting
* with 'tt:' already point to JSON dumps, and afl_var_dump rows starting with 'stored-text:'
* only point to serialized dumps.
* This handles point 2. and 6. of T213006.
*/
private function updateText() {
$this->output(
"...Re-storing serialized dumps as JSON-encoded arrays for all rows (3/4).\n"
);
$batchSize = $this->getBatchSize();
$prevID = 0;
$curID = $batchSize;
$count = 0;
$idSQL = $this->dbr->buildIntegerCast( $this->dbr->strreplace(
'afl_var_dump',
$this->dbr->addQuotes( 'stored-text:' ),
$this->dbr->addQuotes( '' )
) );
$dumpLike = $this->dbr->buildLike( 'stored-text:', $this->dbr->anyString() );
do {
$res = $this->dbr->select(
[ 'text', 'abuse_filter_log' ],
[ 'old_id', 'old_text', 'old_flags' ],
[
"afl_var_dump $dumpLike",
"afl_id > $prevID",
"afl_id <= $curID"
],
__METHOD__,
[ 'DISTINCT', 'ORDER BY' => 'old_id ASC' ],
[ 'abuse_filter_log' => [ 'JOIN', "old_id = $idSQL" ] ]
);
$prevID = $curID;
$curID += $batchSize;
$count += $res->numRows();
if ( !$this->dryRun ) {
$this->doUpdateText( $res );
}
} while ( $prevID <= $this->allRowsCount );
$msg = $this->dryRun
? "...found $count text rows to update.\n"
: "...updated $count text rows.\n";
$this->output( $msg );
}
/**
* @param IResultWrapper $res text rows
*/
private function doUpdateText( IResultWrapper $res ) {
foreach ( $res as $row ) {
// This is copied from AbuseFilter::loadVarDump
$oldFlags = explode( ',', $row->old_flags );
$text = $row->old_text;
if ( in_array( 'external', $oldFlags ) ) {
$text = ExternalStore::fetchFromURL( $row->old_text );
}
if ( in_array( 'gzip', $oldFlags ) ) {
$text = gzinflate( $text );
}
if ( FormatJson::decode( $text ) !== null ) {
// Already in the new format, apparently.
if (
!in_array( 'utf-8', $oldFlags, true ) ||
in_array( 'nativeDataArray', $oldFlags, true )
) {
// Sanity
$this->fatalError( 'Found a JSON-encoded rows with wrong flags.' );
}
continue;
}
$obj = unserialize( $text );
if ( $obj instanceof AbuseFilterVariableHolder ) {
$newFlags = [ 'utf-8' ];
// Convert to array via dumpAllVars and re-store.
$varArray = $this->updateVariables( $obj->dumpAllVars( [ 'old_wikitext', 'new_wikitext' ] ) );
// This is copied from AbuseFilter::storeVarDump
$toStore = FormatJson::encode( $varArray );
if ( in_array( 'gzip', $oldFlags ) && function_exists( 'gzdeflate' ) ) {
$toStore = gzdeflate( $toStore );
$newFlags[] = 'gzip';
}
if ( in_array( 'external', $oldFlags ) ) {
$toStore = ExternalStore::insert( $row->old_text, $toStore );
$newFlags[] = 'external';
}
} else {
// Just remove the nativeDataArray flag (will be the default) and re-store as JSON
$toStore = FormatJson::encode( $this->updateVariables( $obj ) );
$newFlags = array_diff( $oldFlags, [ 'nativeDataArray' ] );
// Add utf-8 per T34478
$newFlags[] = 'utf-8';
}
$this->dbw->update(
'text',
[
'old_text' => $toStore,
'old_flags' => implode( ',', $newFlags )
],
[ 'old_id' => $row->old_id ],
__METHOD__
);
}
}
/**
* Given a stored object, removes some disabled variables and update deprecated ones.
* Also ensure that core variables are lowercase.
* Handles points 4., 5. and 8. of T213006.
*
* @param array $vars The stored vars.
* @return array
*/
private function updateVariables( array $vars ) {
// Remove all variables used in the past to store metadata
unset( $vars['context'], $vars['logged_local_ids'], $vars['logged_global_ids'] );
$builtinVars = $this->getBuiltinVarNames();
$newVars = [];
foreach ( $vars as $oldName => $value ) {
$lowerName = strtolower( $oldName );
if ( $lowerName !== $oldName && array_key_exists( $lowerName, $builtinVars ) ) {
$oldName = $lowerName;
}
if ( array_key_exists( $oldName, AbuseFilter::getDeprecatedVariables() ) ) {
$newName = AbuseFilter::getDeprecatedVariables()[$oldName];
} else {
$newName = $oldName;
}
$newVars[$newName] = $value;
}
return $newVars;
}
/**
* Get a set of builtin variable names. Copied from AbuseFilterVariableHolder::dumpAllVars.
* @return array [ varname => true ] for instantaneous search. All names are lowercase
*/
private function getBuiltinVarNames() {
global $wgRestrictionTypes;
static $coreVariables = null;
if ( $coreVariables ) {
return $coreVariables;
}
$activeVariables = array_keys( AbuseFilter::getBuilderValues()['vars'] );
$deprecatedVariables = array_keys( AbuseFilter::getDeprecatedVariables() );
$disabledVariables = array_keys( AbuseFilter::DISABLED_VARS );
$coreVariables = array_merge( $activeVariables, $deprecatedVariables, $disabledVariables );
$prefixes = [ 'moved_from', 'moved_to', 'page' ];
foreach ( $wgRestrictionTypes as $action ) {
foreach ( $prefixes as $prefix ) {
$coreVariables[] = "{$prefix}_restrictions_$action";
}
}
$coreVariables = array_fill_keys( $coreVariables, true );
$coreVariables = array_change_key_case( $coreVariables );
return $coreVariables;
}
/**
* Replace 'stored-text:' with 'tt:' in afl_var_dump. Handles point 3. of T213006.
*/
private function updateAflVarDump() {
$this->output(
"...Replacing the 'stored-text:' prefix with 'tt:' (4/4).\n"
);
$batchSize = $this->getBatchSize();
// Use native SQL functions so that we can update all rows at the same time.
$newIdSQL = $this->dbw->strreplace(
'afl_var_dump',
$this->dbr->addQuotes( 'stored-text:' ),
$this->dbr->addQuotes( 'tt:' )
);
$prevID = 0;
$curID = $batchSize;
$numRows = 0;
do {
$args = [
'abuse_filter_log',
[ "afl_var_dump = $newIdSQL" ],
[
"afl_id > $prevID",
"afl_id <= $curID",
'afl_var_dump ' . $this->dbr->buildLike( 'stored-text:', $this->dbr->anyString() )
],
__METHOD__,
[ 'ORDER BY' => 'afl_id ASC' ]
];
if ( $this->dryRun ) {
$numRows += $this->dbr->selectRowCount( ...$args );
} else {
$this->dbw->update( ...$args );
$numRows += $this->dbw->affectedRows();
}
$prevID = $curID;
$curID += $batchSize;
} while ( $prevID <= $this->allRowsCount );
if ( $this->dryRun ) {
$this->output( "...would change afl_var_dump for $numRows rows.\n" );
} else {
$this->output( "...updated afl_var_dump prefix for $numRows rows.\n" );
}
}
}
$maintClass = 'UpdateVarDumps';
require_once RUN_MAINTENANCE_IF_MAIN;

View file

@ -0,0 +1,471 @@
<?php
use MediaWiki\Tests\Maintenance\MaintenanceBaseTestCase;
use Wikimedia\Rdbms\IResultWrapper;
use Wikimedia\TestingAccessWrapper;
/**
* @group Database
* @coversDefaultClass UpdateVarDumps
* @property TestingAccessWrapper|UpdateVarDumps $maintenance
*/
class UpdateVarDumpsTest extends MaintenanceBaseTestCase {
private const TIMESTAMP = '20000102030405';
private static $aflRow = [
// 'afl_id'
'afl_filter' => 1,
'afl_global' => 0,
'afl_filter_id' => 1,
'afl_user' => 1,
'afl_user_text' => 'Foo',
'afl_ip' => '127.0.0.1',
'afl_action' => 'edit',
'afl_actions' => '',
// 'afl_var_dump'
// 'afl_timestamp' added in __construct
'afl_namespace' => 1,
'afl_title' => 'Foobar',
'afl_wiki' => null,
'afl_deleted' => 0,
'afl_patrolled_by' => 1,
'afl_rev_id' => 123
];
private const TEXT_ROW = [
// 'old_id'
'old_flags' => ''
// 'old_text'
];
private const VARS = [
'action' => 'edit',
'page_id' => 12,
'user_blocked' => true,
'accountname' => null,
'user_groups' => [ 'x', 'y' ]
];
/**
* @inheritDoc
*/
protected $tablesUsed = [ 'abuse_filter_log', 'text' ];
/**
* @inheritDoc
*/
public function __construct( $name = null, array $data = [], $dataName = '' ) {
parent::__construct( $name, $data, $dataName );
self::$aflRow['afl_timestamp'] = wfGetDB( DB_REPLICA )->timestamp( self::TIMESTAMP );
}
/**
* @inheritDoc
*/
public function setUp(): void {
parent::setUp();
$this->maintenance->dbr = $this->maintenance->dbw = $this->db;
// This isn't really necessary
$this->maintenance->allRowsCount = 50;
}
/**
* @inheritDoc
*/
public function getMaintenanceClass() {
return UpdateVarDumps::class;
}
/**
* Shorthand to select all rows on a table (either abuse_filter_log or text), ordering
* by pkey ASC
* @param string $table
* @return IResultWrapper
*/
private function selectAllAscending( string $table ) : IResultWrapper {
$key = $table === 'abuse_filter_log' ? 'afl_id' : 'old_id';
return $this->db->select(
$table,
'*',
'',
wfGetCaller(),
[ 'ORDER_BY' => "$key ASC" ]
);
}
/**
* @covers ::doDBUpdates
*/
public function testOnEmptyDB() {
$this->expectOutputRegex( '/the abuse_filter_log table is empty/' );
$this->maintenance->execute();
}
/**
* @covers ::fixMissingDumps
* @covers ::doFixMissingDumps
*/
public function testFixMissingDumps() {
$expected = $this->insertMissingDumps();
$this->maintenance->fixMissingDumps();
$rows = $this->selectAllAscending( 'abuse_filter_log' );
$actual = [];
foreach ( $rows as $row ) {
$actual[] = [ 'afl_id' => (int)$row->afl_id, 'afl_var_dump' => $row->afl_var_dump ];
}
$this->assertSame( $expected, $actual );
}
/**
* @return array Expected content of abuse_filter_log after the cleanup
*/
private function insertMissingDumps() : array {
$insertRows = [
'Wrong duplicate 1' => [ 'afl_id' => 1, 'afl_var_dump' => '' ] + self::$aflRow,
'Good duplicate 1' => [ 'afl_id' => 2, 'afl_var_dump' => 'stored-text:12345' ] + self::$aflRow,
'Wrong duplicate 2' => [ 'afl_id' => 3, 'afl_var_dump' => '' ] + self::$aflRow,
'Good duplicate 2' => [ 'afl_id' => 4, 'afl_var_dump' => 'stored-text:12345' ] + self::$aflRow,
'Wrong duplicate, 3' => [ 'afl_id' => 5, 'afl_var_dump' => '' ] + self::$aflRow,
'Extraneous row' => [ 'afl_id' => 6, 'afl_var_dump' => 'stored-text:444' ] + self::$aflRow,
'Good duplicate 3' => [ 'afl_id' => 7, 'afl_var_dump' => 'stored-text:12345' ] + self::$aflRow,
];
$this->db->insert( 'abuse_filter_log', array_values( $insertRows ), __METHOD__ );
$expected = [
[ 'afl_id' => 2, 'afl_var_dump' => 'stored-text:12345' ],
[ 'afl_id' => 4, 'afl_var_dump' => 'stored-text:12345' ],
[ 'afl_id' => 6, 'afl_var_dump' => 'stored-text:444' ],
[ 'afl_id' => 7, 'afl_var_dump' => 'stored-text:12345' ],
];
return $expected;
}
/**
* @covers ::fixMissingDumps
* @covers ::doFixMissingDumps
*/
public function testFixMissingDumpsToRebuild() {
$expected = $this->insertMissingDumpsToRebuild();
$this->maintenance->fixMissingDumps();
$aflRows = $this->selectAllAscending( 'abuse_filter_log' );
$aflActual = [];
foreach ( $aflRows as $aflRow ) {
$aflActual[] = [ 'afl_id' => (int)$aflRow->afl_id, 'afl_var_dump' => $aflRow->afl_var_dump ];
}
$this->assertSame( $expected['abuse_filter_log'], $aflActual );
$textRows = $this->selectAllAscending( 'text' );
$textActual = [];
foreach ( $textRows as $textRow ) {
$textActual[] = [ 'old_id' => (int)$textRow->old_id, 'old_text' => $textRow->old_text ];
}
$this->assertSame( $expected['text'], $textActual );
}
/**
* @return array Expected content of abuse_filter_log after the cleanup
*/
private function insertMissingDumpsToRebuild() : array {
$baseVars = [
'timestamp' => wfTimestamp( TS_UNIX, self::TIMESTAMP ),
];
$insertRows = [
'Edit' => [ 'afl_id' => 1, 'afl_var_dump' => '' ] + self::$aflRow,
// afl_action added below in order to keep the same order of rows
'Createaccount' => [ 'afl_id' => 2, 'afl_var_dump' => '' ] + self::$aflRow,
'Move' => [ 'afl_id' => 3, 'afl_var_dump' => '' ] + self::$aflRow,
];
$insertRows['Createaccount']['afl_action'] = 'createaccount';
$insertRows['Move']['afl_action'] = 'move';
$this->db->insert( 'abuse_filter_log', array_values( $insertRows ), __METHOD__ );
$title = Title::makeTitle( self::$aflRow['afl_namespace'], self::$aflRow['afl_title'] );
$expected = [
'abuse_filter_log' => [
[ 'afl_id' => 1, 'afl_var_dump' => 'tt:1' ],
[ 'afl_id' => 2, 'afl_var_dump' => 'tt:2' ],
[ 'afl_id' => 3, 'afl_var_dump' => 'tt:3' ],
],
'text' => [
[
'old_id' => 1,
'old_text' => FormatJson::encode( $baseVars + [
'action' => 'edit',
'user_name' => self::$aflRow['afl_user_text'],
'page_title' => self::$aflRow['afl_title'],
'page_prefixedtitle' => $title->getPrefixedText()
] )
],
[
'old_id' => 2,
'old_text' => FormatJson::encode( $baseVars + [
'action' => 'createaccount',
'accountname' => self::$aflRow['afl_user_text']
] )
],
[
'old_id' => 3,
'old_text' => FormatJson::encode( $baseVars + [
'action' => 'move',
'user_name' => self::$aflRow['afl_user_text'],
'moved_from_title' => self::$aflRow['afl_title'],
'moved_from_prefixedtitle' => $title->getPrefixedText()
] )
],
]
];
return $expected;
}
/**
* @covers ::moveToText
* @covers ::doMoveToText
*/
public function testMoveToText() {
$expected = $this->insertMoveToText();
$this->maintenance->moveToText();
$aflRows = $this->selectAllAscending( 'abuse_filter_log' );
$aflActual = [];
foreach ( $aflRows as $row ) {
$aflActual[] = [ 'afl_id' => (int)$row->afl_id, 'afl_var_dump' => $row->afl_var_dump ];
}
$this->assertSame( $expected['abuse_filter_log'], $aflActual );
$textRows = $this->selectAllAscending( 'text' );
$textActual = [];
foreach ( $textRows as $row ) {
$textActual[] = [ 'old_id' => (int)$row->old_id, 'old_text' => $row->old_text ];
}
$this->assertSame( $expected['text'], $textActual );
}
/**
* @return array Expected contents of abuse_filter_log and text tables
*/
private function insertMoveToText() : array {
$serializedArr = serialize( self::VARS );
$serializedVH = serialize( AbuseFilterVariableHolder::newFromArray( self::VARS ) );
$truncatedArr = substr( $serializedArr, 0, -5 );
$expectedTruncated = FormatJson::encode( array_diff_key( self::VARS, [ 'user_groups' => 1 ] ) );
$insertRows = [
'Truncated arr' => [ 'afl_id' => 1, 'afl_var_dump' => $truncatedArr ] + self::$aflRow,
'Serialized array' => [ 'afl_id' => 2, 'afl_var_dump' => $serializedArr ] + self::$aflRow,
'Serialized VariableHolder' =>
[ 'afl_id' => 3, 'afl_var_dump' => $serializedVH ] + self::$aflRow,
];
$this->db->insert( 'abuse_filter_log', array_values( $insertRows ), __METHOD__ );
$expected = [
'abuse_filter_log' => [
[ 'afl_id' => 1, 'afl_var_dump' => 'tt:1' ],
[ 'afl_id' => 2, 'afl_var_dump' => 'tt:2' ],
[ 'afl_id' => 3, 'afl_var_dump' => 'tt:3' ],
],
'text' => [
[ 'old_id' => 1, 'old_text' => $expectedTruncated ],
[ 'old_id' => 2, 'old_text' => FormatJson::encode( self::VARS ) ],
[ 'old_id' => 3, 'old_text' => FormatJson::encode( self::VARS ) ],
]
];
return $expected;
}
/**
* @return TestingAccessWrapper|UpdateVarDumps
*/
private function getMaintenanceWithoutExit() {
// We first need to mock UpdateVarDumps, because fatalError kills PHP.
$maint = $this->getMockBuilder( UpdateVarDumps::class )
->setMethods( [ 'fatalError' ] )
->getMock();
$maint->method( 'fatalError' )->willThrowException( new LogicException() );
// Then use an access wrapper to call private methods.
$wrapper = TestingAccessWrapper::newFromObject( $maint );
$wrapper->allRowsCount = 50;
$wrapper->dbr = $wrapper->dbw = $this->db;
return $wrapper;
}
/**
* @param array $row
* @dataProvider provideMoveToTextUnexpectedTypes
* @covers ::doMoveToText
*/
public function testMoveToTextUnexpectedTypes( array $row ) {
$this->db->insert( 'abuse_filter_log', $row, __METHOD__ );
$maint = $this->getMaintenanceWithoutExit();
$this->expectException( LogicException::class );
$maint->moveToText();
}
/**
* @return array
*/
public function provideMoveToTextUnexpectedTypes() {
$serializedVH = serialize( AbuseFilterVariableHolder::newFromArray( self::VARS ) );
return [
'Truncated obj' => [
[ 'afl_id' => 1, 'afl_var_dump' => substr( $serializedVH, 0, -5 ) ] + self::$aflRow
],
'Wrong type' => [
[ 'afl_id' => 3, 'afl_var_dump' => serialize( 'foo bar baz' ) ] + self::$aflRow
]
];
}
/**
* @param string $str
* @param array $expected
* @covers UpdateVarDumps::restoreTruncatedDump
* @dataProvider provideTruncatedDump
*/
public function testRestoreTruncatedDump( string $str, array $expected ) {
$this->assertSame( $expected, $this->maintenance->restoreTruncatedDump( $str ) );
}
/**
* @return array
*/
public function provideTruncatedDump() {
$serialized = serialize( self::VARS );
$varsWithoutKeys = function ( ...$keys ) {
return array_diff_key( self::VARS, array_fill_keys( $keys, 1 ) );
};
return [
[ substr( $serialized, 0, -1 ), $varsWithoutKeys( 'user_groups' ) ],
[ substr( $serialized, 0, -7 ), $varsWithoutKeys( 'user_groups' ) ],
[ substr( $serialized, 0, -16 ), $varsWithoutKeys( 'user_groups' ) ],
[ substr( $serialized, 0, -32 ), $varsWithoutKeys( 'user_groups' ) ],
[ substr( $serialized, 0, -46 ), $varsWithoutKeys( 'user_groups' ) ],
[ substr( $serialized, 0, -56 ), $varsWithoutKeys( 'user_groups', 'accountname' ) ],
[
substr( $serialized, 0, -72 ),
$varsWithoutKeys( 'user_groups', 'accountname', 'user_blocked' )
],
[
substr( $serialized, 0, -96 ),
$varsWithoutKeys( 'user_groups', 'accountname', 'user_blocked', 'page_id' )
],
[ substr( $serialized, 0, 17 ), [] ],
[ substr( $serialized, 0, 10 ), [] ],
[ substr( $serialized, 0, 5 ), [] ],
];
}
/**
* @covers ::updateText
* @covers ::doUpdateText
*/
public function testUpdateText() {
$expected = $this->insertUpdateText();
$this->maintenance->updateText();
$rows = $this->selectAllAscending( 'text' );
$actual = [];
foreach ( $rows as $row ) {
$actual[] = [
'old_id' => (int)$row->old_id,
'old_flags' => $row->old_flags,
'old_text' => $row->old_text
];
}
$this->assertSame( $expected, $actual );
}
/**
* @return array Expected content of the text table
*/
private function insertUpdateText() {
$serializedArr = serialize( self::VARS );
$serializedVH = serialize( AbuseFilterVariableHolder::newFromArray( self::VARS ) );
$jsonArr = FormatJson::encode( self::VARS );
$textRows = [
'Serialized VH' => [ 'old_text' => $serializedVH ] + self::TEXT_ROW,
'Serialized array' =>
[ 'old_text' => $serializedArr, 'old_flags' => 'nativeDataArray' ] + self::TEXT_ROW,
'JSON array' => [ 'old_text' => $jsonArr, 'old_flags' => 'utf-8' ] + self::TEXT_ROW,
];
$this->db->insert( 'text', array_values( $textRows ), __METHOD__ );
$pointerRows = [
[ 'afl_var_dump' => 'stored-text:1' ] + self::$aflRow,
[ 'afl_var_dump' => 'stored-text:2' ] + self::$aflRow,
[ 'afl_var_dump' => 'stored-text:3' ] + self::$aflRow,
];
$this->db->insert( 'abuse_filter_log', $pointerRows, __METHOD__ );
return [
[ 'old_id' => 1, 'old_flags' => 'utf-8', 'old_text' => $jsonArr ],
[ 'old_id' => 2, 'old_flags' => 'utf-8', 'old_text' => $jsonArr ],
[ 'old_id' => 3, 'old_flags' => 'utf-8', 'old_text' => $jsonArr ],
];
}
/**
* @covers ::doUpdateText
*/
public function testUpdateTextWrongFlags() {
$jsonArr = FormatJson::encode( self::VARS );
$textRow = [ 'old_id' => 1, 'old_flags' => 'nativeDataArray,utf-8', 'old_text' => $jsonArr ];
$this->db->insert( 'text', $textRow, __METHOD__ );
$pointerRow = [ 'afl_var_dump' => 'stored-text:1' ] + self::$aflRow;
$this->db->insert( 'abuse_filter_log', $pointerRow, __METHOD__ );
$maint = $this->getMaintenanceWithoutExit();
$this->expectException( LogicException::class );
$maint->updateText();
}
/**
* @covers ::updateAflVarDump
*/
public function testUpdateAflVarDump() {
$this->insertAflVarDump();
$this->maintenance->updateAflVarDump();
$vals = $this->db->selectFieldValues( 'abuse_filter_log', 'afl_var_dump' );
$this->assertSame( [ 'tt:123' ], array_unique( $vals ) );
}
private function insertAflVarDump() {
$rows = [
'Old prefix' => [ 'afl_var_dump' => 'stored-text:123' ] + self::$aflRow,
'New prefix' => [ 'afl_var_dump' => 'tt:123' ] + self::$aflRow
];
$this->db->insert( 'abuse_filter_log', array_values( $rows ), __METHOD__ );
}
/**
* @param array $old
* @param array $expected
* @covers UpdateVarDumps::updateVariables
* @dataProvider provideUpdateVariables
*/
public function testUpdateVariables( array $old, array $expected ) {
$this->assertSame( $expected, $this->maintenance->updateVariables( $old ) );
}
/**
* @return array
*/
public function provideUpdateVariables() {
return [
'Fine' => [ self::VARS, self::VARS ],
'Meta-variable' => [ [ 'action' => 'edit', 'context' => 'foo' ], [ 'action' => 'edit' ] ],
'Uppercase' => [ [ 'USER_GROUPS' => [ 'bot' ] ], [ 'user_groups' => [ 'bot' ] ] ],
'Deprecated' => [
[ 'article_text' => 'foo', 'moved_to_prefixedtext' => 'bar' ],
[ 'page_title' => 'foo', 'moved_to_prefixedtitle' => 'bar' ]
],
'Mixed' => [
[ 'ARTICLE_ARTICLEID' => 1, 'logged_local_ids' => [ 1, 2, 3 ], 'OLD_HTML' => '' ],
[ 'page_id' => 1, 'old_html' => '' ]
]
];
}
}