mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-11-30 16:54:18 +00:00
24888bea15
Protected effectively means "public to subclasses" and should be avoided for the same reasons as marking everything as public should be avoided. Change-Id: Iba674b486ce53fd1f94f70163d47824e969abb77
502 lines
16 KiB
PHP
502 lines
16 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\Extension\AbuseFilter\Maintenance;
|
|
|
|
use LoggedUpdateMaintenance;
|
|
use MediaWiki\Extension\AbuseFilter\AbuseFilterServices;
|
|
use Wikimedia\Rdbms\IMaintainableDatabase;
|
|
|
|
// @codeCoverageIgnoreStart
|
|
$IP = getenv( 'MW_INSTALL_PATH' );
|
|
if ( $IP === false ) {
|
|
$IP = __DIR__ . '/../../..';
|
|
}
|
|
require_once "$IP/maintenance/Maintenance.php";
|
|
// @codeCoverageIgnoreEnd
|
|
|
|
/**
|
|
* Normalizes throttle parameters as part of the overhaul described in T203587
|
|
*
|
|
* Tasks performed by this script:
|
|
* - Remove duplicated throttle groups (T203584)
|
|
* - Remove unrecognized stuff from throttle groups (T203584)
|
|
* - Checks if throttle count or period have extra commas inside. If this leads to the filter acting
|
|
* like it would with throttle disabled, we just disable it. Otherwise, since we don't know what
|
|
* the filter is meant to do, we just ask users to evaluate and fix every case by hand. This is
|
|
* highly unlikely to happen anyway. (T203585)
|
|
* - If throttle groups are empty (or only contain unknown keywords), ask users to fix every case
|
|
* by hand. (T203584)
|
|
* - Change some edge cases of throttle parameters saved in abuse_filter_history (T215787):
|
|
* - parameters = null ==> parameters = [ filterID, "0,0", 'none' ]
|
|
* - at least a number missing from parameters[1] ==> insert 0 in place of the missing param
|
|
* - empty groups ==> 'none' (special case, uses the message abusefilter-throttle-none)
|
|
*
|
|
* @codeCoverageIgnore
|
|
* No need to test old single-use script.
|
|
*/
|
|
class NormalizeThrottleParameters extends LoggedUpdateMaintenance {
|
|
public function __construct() {
|
|
parent::__construct();
|
|
|
|
$this->addDescription( 'Normalize AbuseFilter throttle parameters' );
|
|
$this->addOption( 'dry-run', 'Perform a dry run' );
|
|
$this->requireExtension( 'Abuse Filter' );
|
|
}
|
|
|
|
/**
|
|
* @see Maintenance::getUpdateKey()
|
|
* @return string
|
|
*/
|
|
public function getUpdateKey() {
|
|
return 'NormalizeThrottleParameters';
|
|
}
|
|
|
|
/** @var IMaintainableDatabase The primary database */
|
|
private $dbw;
|
|
|
|
/**
|
|
* Rollback the current transaction and emit a fatal error
|
|
*
|
|
* @param string $msg The message of the error
|
|
*/
|
|
private function fail( $msg ) {
|
|
$this->rollbackTransaction( $this->dbw, __METHOD__ );
|
|
$this->fatalError( $msg );
|
|
}
|
|
|
|
/**
|
|
* Get normalized throttle groups
|
|
*
|
|
* @param array $params Throttle parameters
|
|
* @return array[] The first element is the array of old throttle groups, the second
|
|
* is an array of formatted throttle groups
|
|
*/
|
|
private function getNewGroups( $params ) {
|
|
$validGroups = [
|
|
'ip',
|
|
'user',
|
|
'range',
|
|
'creationdate',
|
|
'editcount',
|
|
'site',
|
|
'page'
|
|
];
|
|
$rawGroups = array_slice( $params, 2 );
|
|
$newGroups = [];
|
|
// We use a standard order to check for duplicates. This variable is not used as the actual
|
|
// array of groups to avoid silly changes like 'ip,user' => 'user,ip'. In this variable we also
|
|
// store trimmed groups, so that 'ip, user' is considered to be the same as 'ip,user', just
|
|
// as the actual code does. And again, we don't want to edit the filter just to strip spaces.
|
|
$normalizedGroups = [];
|
|
// Every group should be valid, and subgroups should have valid groups inside. Only keep
|
|
// valid (sub)groups.
|
|
foreach ( $rawGroups as $group ) {
|
|
// Groups must be lowercase.
|
|
$group = strtolower( $group );
|
|
if ( strpos( $group, ',' ) !== false ) {
|
|
// No duplicates in subgroups
|
|
$subGroups = array_unique( explode( ',', $group ) );
|
|
$uniqueGroup = implode( ',', $subGroups );
|
|
$valid = true;
|
|
foreach ( $subGroups as $subGroup ) {
|
|
if ( !in_array( trim( $subGroup ), $validGroups ) ) {
|
|
$valid = false;
|
|
break;
|
|
}
|
|
}
|
|
sort( $subGroups );
|
|
if ( $valid && !in_array( $subGroups, $normalizedGroups ) ) {
|
|
$newGroups[] = $uniqueGroup;
|
|
$normalizedGroups[] = array_map( 'trim', $subGroups );
|
|
}
|
|
} elseif ( in_array( trim( $group ), $validGroups ) ) {
|
|
$newGroups[] = $group;
|
|
$normalizedGroups[] = trim( $group );
|
|
}
|
|
}
|
|
|
|
// Remove duplicates
|
|
$newGroups = array_unique( $newGroups );
|
|
|
|
return [ $rawGroups, $newGroups ];
|
|
}
|
|
|
|
/**
|
|
* Check if throttle rate is malformed, i.e. if it has extra commas or a part of it is empty
|
|
*
|
|
* @param string $rate The throttle rate as saved in the DB ("count,period")
|
|
* @return string|null String with error type or null if the rate is valid
|
|
*/
|
|
private function checkThrottleRate( $rate ) {
|
|
if ( preg_match( '/^,/', $rate ) === 1 ) {
|
|
// The comma was inserted at least in throttle count. This behaves like if
|
|
// throttling isn't enabled, so just disable it
|
|
return 'disable';
|
|
} elseif ( preg_match( '/^\d+,$/', $rate ) === 1 || preg_match( '/^\d+,\d+$/', $rate ) === 0 ) {
|
|
// First condition is for comma only inside throttle period. The behaviour in this case
|
|
// is unclear, ask users to fix this by hand. Second condition is for every other case;
|
|
// since it's unpredictable what the problem is, we just ask to fix it by hand.
|
|
return 'hand';
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Main logic of parameters normalization
|
|
*
|
|
* @return int Amount of normalized rows
|
|
*/
|
|
private function normalizeParameters() {
|
|
$user = AbuseFilterServices::getFilterUser()->getUserIdentity();
|
|
$actorMigration = AbuseFilterServices::getActorMigration();
|
|
$dryRun = $this->hasOption( 'dry-run' );
|
|
|
|
// IDs of filters with invalid rate (count or period)
|
|
$invalidRate = [];
|
|
// IDs of filters with invalid groups
|
|
$invalidGroups = [];
|
|
// IDs of filters where throttle parameters are completely empty, and even the filter ID is
|
|
// missing. This happened for filters containing a throttle group with a comma inside which
|
|
// were modified between the OOUI switch (gerrit/421487) and throttle repair (gerrit/459368):
|
|
// a bug caused all existing throttle parameters to be wiped away, so that afa_consequence
|
|
// holds an empty string and (unserialize(afh_actions))['throttle'] is null.
|
|
$totallyEmpty = [];
|
|
|
|
// Only select throttle actions
|
|
$actionRows = $this->dbw->select(
|
|
'abuse_filter_action',
|
|
[ 'afa_filter', 'afa_parameters' ],
|
|
[ 'afa_consequence' => 'throttle' ],
|
|
__METHOD__,
|
|
[ 'LOCK IN SHARE MODE' ]
|
|
);
|
|
|
|
$newActionRows = [];
|
|
// Save new, sanitized throttle parameters to be copied in abuse_filter_history.
|
|
// The structure is [ filterID => val ] where "val" is either an array with new params
|
|
// or null if throttle must be removed.
|
|
$historyThrottleParams = [];
|
|
$deleteActionIDs = [];
|
|
$changeActionIDs = [];
|
|
foreach ( $actionRows as $actRow ) {
|
|
$filter = $actRow->afa_filter;
|
|
|
|
if ( $actRow->afa_parameters === '' ) {
|
|
// All parameters are empty. See comment above the declaration of $totallyEmpty for
|
|
// why this happens. Definitely to be fixed by hand, without further checks.
|
|
$totallyEmpty[] = $filter;
|
|
continue;
|
|
}
|
|
|
|
$params = explode( "\n", $actRow->afa_parameters );
|
|
$rateCheck = $this->checkThrottleRate( $params[1] );
|
|
list( $oldGroups, $newGroups ) = $this->getNewGroups( $params );
|
|
|
|
// If the rate is invalid or the groups are empty (or only contain invalid identifiers),
|
|
// it means that the throttle limit is never reached. Since we cannot guess what the
|
|
// filter should do, nor we want to impose a default, we ask to manually fix the problem.
|
|
if ( $rateCheck === 'hand' ) {
|
|
$invalidRate[] = $filter;
|
|
}
|
|
if ( count( $newGroups ) === 0 ) {
|
|
$invalidGroups[] = $filter;
|
|
}
|
|
if ( $rateCheck === 'hand' || count( $newGroups ) === 0 ) {
|
|
continue;
|
|
}
|
|
|
|
if ( $rateCheck === 'disable' ) {
|
|
// Invalid rate, disable throttle for the filter
|
|
$deleteActionIDs[] = $actRow->afa_filter;
|
|
$historyThrottleParams[ $actRow->afa_filter ] = null;
|
|
} elseif ( $oldGroups !== $newGroups ) {
|
|
$newParams = array_merge( array_slice( $params, 0, 2 ), $newGroups );
|
|
$newActionRows[] = [
|
|
'afa_filter' => $actRow->afa_filter,
|
|
'afa_consequence' => 'throttle',
|
|
'afa_parameters' => implode( "\n", $newParams )
|
|
];
|
|
$changeActionIDs[] = $actRow->afa_filter;
|
|
$historyThrottleParams[ $actRow->afa_filter ] = $newParams;
|
|
} else {
|
|
// The filter is not broken!
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if ( $invalidRate || $invalidGroups || $totallyEmpty ) {
|
|
$invalidMsg = '';
|
|
if ( $invalidRate ) {
|
|
$invalidMsg .= 'Throttle count and period are malformed or empty for the following filters: ' .
|
|
implode( ', ', $invalidRate ) . '. ' .
|
|
"Please fix them by hand in the way they're meant to be, then launch the script again.\n";
|
|
}
|
|
if ( $invalidGroups ) {
|
|
$invalidMsg .= 'Throttle groups are empty for the following filters: ' .
|
|
implode( ', ', $invalidGroups ) . '. ' .
|
|
"Please add some groups or disable throttling, then launch the script again.\n";
|
|
}
|
|
if ( $totallyEmpty ) {
|
|
$invalidMsg .= 'Throttle parameters are empty for the following filters: ' .
|
|
implode( ', ', $totallyEmpty ) . '. ' .
|
|
'This was probably caused by a temporary bug and you should be able to find valid ' .
|
|
"parameters in each filter's history. Please restore them, then launch the script again.\n";
|
|
}
|
|
|
|
$this->fail( $invalidMsg );
|
|
}
|
|
|
|
// Use the same timestamps in abuse_filter and abuse_filter_history, since this is
|
|
// what we do in the actual code.
|
|
$timestamps = [];
|
|
$changeActionCount = count( $changeActionIDs );
|
|
if ( $changeActionCount ) {
|
|
if ( $dryRun ) {
|
|
$this->output(
|
|
"normalizeThrottleParameter has found $changeActionCount rows to change in " .
|
|
"abuse_filter_action for the following IDs: " . implode( ', ', $changeActionIDs ) . "\n"
|
|
);
|
|
} else {
|
|
$this->dbw->replace(
|
|
'abuse_filter_action',
|
|
[ [ 'afa_filter', 'afa_consequence' ] ],
|
|
$newActionRows,
|
|
__METHOD__
|
|
);
|
|
// Touch the abuse_filter table to update the "filter last modified" field
|
|
foreach ( $changeActionIDs as $id ) {
|
|
$timestamps[ $id ] = $this->dbw->timestamp();
|
|
|
|
$this->dbw->update(
|
|
'abuse_filter',
|
|
[
|
|
'af_timestamp' => $timestamps[ $id ]
|
|
] + $actorMigration->getInsertValues( $this->dbw, 'af_user', $user ),
|
|
[ 'af_id' => $id ],
|
|
__METHOD__
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
$deleteActionCount = count( $deleteActionIDs );
|
|
if ( $deleteActionCount ) {
|
|
if ( $dryRun ) {
|
|
$this->output(
|
|
"normalizeThrottleParameter has found $deleteActionCount rows to delete in " .
|
|
"abuse_filter_action and update in abuse_filter for the following IDs: " .
|
|
implode( ', ', $deleteActionIDs ) . "\n"
|
|
);
|
|
} else {
|
|
// Delete rows in abuse_filter_action
|
|
$this->dbw->delete(
|
|
'abuse_filter_action',
|
|
[
|
|
'afa_consequence' => 'throttle',
|
|
'afa_filter' => $deleteActionIDs
|
|
],
|
|
__METHOD__
|
|
);
|
|
// Update abuse_filter. abuse_filter_history done later
|
|
foreach ( $deleteActionIDs as $id ) {
|
|
$timestamps[ $id ] = $this->dbw->timestamp();
|
|
|
|
$this->dbw->update(
|
|
'abuse_filter',
|
|
[
|
|
'af_timestamp' => $timestamps[ $id ],
|
|
// Use string replacement so that we can avoid an extra query to retrieve the
|
|
// value and then explode, remove throttle and implode again.
|
|
'af_actions = ' . $this->dbw->strreplace(
|
|
$this->dbw->strreplace( 'af_actions', "',throttle'", "''" ),
|
|
"'throttle'",
|
|
"''"
|
|
)
|
|
] + $actorMigration->getInsertValues( $this->dbw, 'af_user', $user ),
|
|
[ 'af_id' => $id ],
|
|
__METHOD__
|
|
);
|
|
}
|
|
}
|
|
}
|
|
$affectedActionRows = $changeActionCount + $deleteActionCount;
|
|
|
|
$touchedIDs = array_merge( $changeActionIDs, $deleteActionIDs );
|
|
if ( count( $touchedIDs ) === 0 ) {
|
|
$this->output( "No throttle parameters to normalize.\n" );
|
|
return 0;
|
|
}
|
|
|
|
// Create new history rows for every changed filter
|
|
|
|
$newHistoryRows = [];
|
|
$changeHistoryFilters = [];
|
|
foreach ( $touchedIDs as $filter ) {
|
|
$histRow = $this->dbw->selectRow(
|
|
'abuse_filter_history',
|
|
[
|
|
// All columns in the table, aside from afh_id that we don't need, and the
|
|
// ones where we're going to put something new, plus afh_actions.
|
|
'afh_filter',
|
|
'afh_pattern',
|
|
'afh_comments',
|
|
'afh_flags',
|
|
'afh_public_comments',
|
|
'afh_deleted',
|
|
'afh_group',
|
|
'afh_actions'
|
|
],
|
|
[ 'afh_filter' => $filter ],
|
|
__METHOD__,
|
|
[ 'ORDER BY' => 'afh_id DESC', 'LOCK IN SHARE MODE' ]
|
|
);
|
|
|
|
if ( !isset( $historyThrottleParams[ $filter ] ) ) {
|
|
// Sanity
|
|
$this->fail( "Throttle parameters weren't saved for filter $filter" );
|
|
}
|
|
|
|
$timestamp = $timestamps[ $filter ] ?? null;
|
|
if ( !$timestamp && !$dryRun ) {
|
|
// Sanity check
|
|
$this->fail( "The timestamp wasn't saved for filter $filter" );
|
|
}
|
|
|
|
$actions = unserialize( $histRow->afh_actions );
|
|
if ( $historyThrottleParams[ $filter ] === null ) {
|
|
// Invalid rate, disable throttle for the filter
|
|
unset( $actions['throttle'] );
|
|
} else {
|
|
$actions['throttle'] = $historyThrottleParams[ $filter ];
|
|
}
|
|
|
|
$newHistoryRows[] = [
|
|
'afh_timestamp' => $timestamp,
|
|
'afh_changed_fields' => 'actions',
|
|
'afh_actions' => serialize( $actions )
|
|
] + get_object_vars( $histRow )
|
|
+ $actorMigration->getInsertValues( $this->dbw, 'afh_user', $user );
|
|
$changeHistoryFilters[] = $filter;
|
|
}
|
|
|
|
$historyCount = count( $changeHistoryFilters );
|
|
if ( $historyCount !== $affectedActionRows ) {
|
|
// Sanity: prevent unexpected errors.
|
|
$this->fail(
|
|
"The amount of affected rows isn't equal for abuse_filter_action and abuse_filter history. " .
|
|
"Found $affectedActionRows for the former and $historyCount for the latter."
|
|
);
|
|
}
|
|
if ( count( $newHistoryRows ) ) {
|
|
if ( $dryRun ) {
|
|
$this->output(
|
|
"normalizeThrottleParameter would insert $historyCount rows in abuse_filter_history" .
|
|
" for the following filters: " . implode( ', ', $changeHistoryFilters ) . "\n"
|
|
);
|
|
} else {
|
|
$this->dbw->insert(
|
|
'abuse_filter_history',
|
|
$newHistoryRows,
|
|
__METHOD__
|
|
);
|
|
}
|
|
}
|
|
return $affectedActionRows + $historyCount;
|
|
}
|
|
|
|
/**
|
|
* Beautify empty/missing/corrupted parameters in abuse_filter_history
|
|
*
|
|
* @return int Amount of beautified rows
|
|
*/
|
|
private function beautifyHistory() {
|
|
$dryRun = $this->hasOption( 'dry-run' );
|
|
|
|
// We need any row containing throttle, but there's no
|
|
// need to lock as these rows aren't changed by the actual code.
|
|
$likeClause = $this->dbw->buildLike(
|
|
$this->dbw->anyString(),
|
|
'throttle',
|
|
$this->dbw->anyString()
|
|
);
|
|
$histRows = $this->dbw->select(
|
|
'abuse_filter_history',
|
|
[ 'afh_id', 'afh_actions', 'afh_filter' ],
|
|
[ 'afh_actions ' . $likeClause ],
|
|
__METHOD__
|
|
);
|
|
|
|
$beautyIDs = [];
|
|
foreach ( $histRows as $row ) {
|
|
$acts = unserialize( $row->afh_actions );
|
|
if ( !array_key_exists( 'throttle', $acts ) ) {
|
|
// The LIKE clause is very raw, so this could happen
|
|
continue;
|
|
}
|
|
|
|
if ( $acts['throttle'] === null ) {
|
|
// Corrupted row, rebuild it (T215787)
|
|
$acts['throttle'] = [ $row->afh_filter, '0,0', 'none' ];
|
|
} elseif ( $this->checkThrottleRate( $acts['throttle'][1] ) !== null ) {
|
|
// Missing count, make it explicitly 0
|
|
$acts['throttle'][1] = preg_replace( '/^,/', '0,', $acts['throttle'][1] );
|
|
// Missing period, make it explicitly 0
|
|
$acts['throttle'][1] = preg_replace( '/,$/', ',0', $acts['throttle'][1] );
|
|
} elseif ( count( $acts['throttle'] ) === 2 ) {
|
|
// Missing groups, make them explicitly "none" (special group)
|
|
$acts['throttle'][] = 'none';
|
|
} else {
|
|
// Everything's fine!
|
|
continue;
|
|
}
|
|
|
|
$beautyIDs[] = $row->afh_id;
|
|
if ( !$dryRun ) {
|
|
$this->dbw->update(
|
|
'abuse_filter_history',
|
|
[ 'afh_actions' => serialize( $acts ) ],
|
|
[ 'afh_id' => $row->afh_id ],
|
|
__METHOD__
|
|
);
|
|
}
|
|
}
|
|
|
|
$changed = count( $beautyIDs );
|
|
if ( $changed ) {
|
|
$verb = $dryRun ? 'would beautify' : 'beautified';
|
|
$this->output(
|
|
"normalizeThrottleParameter $verb $changed rows in abuse_filter_history" .
|
|
" for the following history IDs: " . implode( ', ', $beautyIDs ) . "\n"
|
|
);
|
|
}
|
|
return $changed;
|
|
}
|
|
|
|
/**
|
|
* @inheritDoc
|
|
*/
|
|
public function doDBUpdates() {
|
|
$dryRun = $this->hasOption( 'dry-run' );
|
|
$this->dbw = $this->getDB( DB_PRIMARY );
|
|
$this->beginTransaction( $this->dbw, __METHOD__ );
|
|
|
|
$normalized = $this->normalizeParameters();
|
|
$beautified = $this->beautifyHistory();
|
|
|
|
$this->commitTransaction( $this->dbw, __METHOD__ );
|
|
|
|
$changed = $normalized + $beautified;
|
|
|
|
$resultMsg = $dryRun ?
|
|
"Throttle parameter normalization would change a total of $changed rows.\n" :
|
|
"Throttle parameters successfully normalized. Changed $changed rows.\n";
|
|
$this->output( $resultMsg );
|
|
|
|
return !$dryRun;
|
|
}
|
|
}
|
|
|
|
$maintClass = NormalizeThrottleParameters::class;
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|