mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-12-18 00:20:37 +00:00
Fix more inconsistencies in scripts for logging entries
This includes a bunch of improvements. In addMissingLoggingEntries: - Don't access mDescription directly - Build a ManualLogEntry instead of stuffing data in the DB In fixOldLogEntries: - Fix entries having log_page = NULL instead of 0 due to addMissingLoggingEntries skipping that field - Fix entries having log_deleted = afh_deleted caused by addMissingLoggingEntries -- those are completely unrelated - Add batching, controlled by log_id, with default size of 500 - Use Database::strreplace to have a single UPDATE per batch, instead of one per row. - In dry run, when checking rows to update, exclude the rows that would've been deleted in the first phase. Bug: T228655 Change-Id: I885dba3f0772633d843b8a55e483047b169dc9ba
This commit is contained in:
parent
dff56324a3
commit
54512dd124
|
@ -16,7 +16,7 @@ class AddMissingLoggingEntries extends LoggedUpdateMaintenance {
|
|||
public function __construct() {
|
||||
parent::__construct();
|
||||
|
||||
$this->mDescription = 'Add missing logging entries for abusefilter-modify T54919';
|
||||
$this->addDescription( 'Add missing logging entries for abusefilter-modify T54919' );
|
||||
$this->addOption( 'dry-run', 'Perform a dry run' );
|
||||
$this->addOption( 'verbose', 'Print a list of affected afh_id' );
|
||||
$this->requireExtension( 'Abuse Filter' );
|
||||
|
@ -108,20 +108,26 @@ class AddMissingLoggingEntries extends LoggedUpdateMaintenance {
|
|||
$factory->waitForReplication();
|
||||
}
|
||||
$user = User::newFromAnyId( $row->afh_user, $row->afh_user_text, null );
|
||||
$dbw->insert(
|
||||
'logging',
|
||||
[
|
||||
'log_type' => 'abusefilter',
|
||||
'log_action' => 'modify',
|
||||
'log_timestamp' => $row->afh_timestamp,
|
||||
'log_namespace' => -1,
|
||||
'log_title' => SpecialPageFactory::getLocalNameFor( 'AbuseFilter' ) . '/' . $row->afh_filter,
|
||||
'log_params' => $row->afh_id . "\n" . $row->afh_filter,
|
||||
'log_deleted' => $row->afh_deleted,
|
||||
] + CommentStore::getStore()->insert( $dbw, 'log_comment', '' )
|
||||
+ ActorMigration::newMigration()->getInsertValues( $dbw, 'log_user', $user ),
|
||||
__METHOD__
|
||||
|
||||
if ( $user === null ) {
|
||||
// This isn't supposed to happen.
|
||||
continue;
|
||||
}
|
||||
|
||||
// This copies the code in AbuseFilter::doSaveFilter
|
||||
$logEntry = new ManualLogEntry( 'abusefilter', 'modify' );
|
||||
$logEntry->setPerformer( $user );
|
||||
$logEntry->setTarget(
|
||||
SpecialPage::getTitleFor( SpecialAbuseFilter::PAGE_NAME, $row->afh_filter )
|
||||
);
|
||||
// Use the new format!
|
||||
$logEntry->setParameters( [
|
||||
'historyId' => $row->afh_id,
|
||||
'newId' => $row->afh_filter
|
||||
] );
|
||||
$logEntry->setTimestamp( $row->afh_timestamp );
|
||||
$logEntry->insert( $dbw );
|
||||
|
||||
$count++;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,9 @@ class FixOldLogEntries extends LoggedUpdateMaintenance {
|
|||
/** @var bool */
|
||||
private $dryRun;
|
||||
|
||||
/** @var int Amount of rows in the logging table */
|
||||
private $loggingRowsCount;
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
|
@ -28,6 +31,7 @@ class FixOldLogEntries extends LoggedUpdateMaintenance {
|
|||
$this->addOption( 'verbose', 'Print some more debug info' );
|
||||
$this->addOption( 'dry-run', 'Perform a dry run' );
|
||||
$this->requireExtension( 'Abuse Filter' );
|
||||
$this->setBatchSize( 500 );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -46,93 +50,188 @@ class FixOldLogEntries extends LoggedUpdateMaintenance {
|
|||
*/
|
||||
private function deleteDuplicatedRows() {
|
||||
$dbr = wfGetDB( DB_REPLICA, 'vslow' );
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
$newFormatLike = $dbr->buildLike( $dbr->anyString(), 'historyId', $dbr->anyString() );
|
||||
$batchSize = $this->getBatchSize();
|
||||
$prevID = 0;
|
||||
$curID = $batchSize;
|
||||
$ret = [];
|
||||
|
||||
// Select all non-legacy entries
|
||||
$res = $dbr->selectFieldValues(
|
||||
'logging',
|
||||
'log_params',
|
||||
[
|
||||
'log_type' => 'abusefilter',
|
||||
"log_params $newFormatLike"
|
||||
],
|
||||
__METHOD__
|
||||
);
|
||||
|
||||
if ( !$res ) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$legacyParams = [];
|
||||
foreach ( $res as $logParams ) {
|
||||
$params = unserialize( $logParams );
|
||||
// The script always inserted duplicates with the wrong '\n'
|
||||
$legacyParams[] = $params['historyId'] . '\n' . $params['newId'];
|
||||
}
|
||||
|
||||
// Don't do a delete already, as it would have poor performance and could kill the DB
|
||||
$deleteIDs = $dbr->selectFieldValues(
|
||||
'logging',
|
||||
'log_id',
|
||||
[
|
||||
'log_type' => 'abusefilter',
|
||||
'log_params' => $legacyParams
|
||||
],
|
||||
__METHOD__
|
||||
);
|
||||
|
||||
if ( !$this->dryRun && $deleteIDs ) {
|
||||
// Note that we delete entries with legacy format, which are the ones erroneously inserted
|
||||
// by the script.
|
||||
wfGetDB( DB_MASTER )->delete(
|
||||
do {
|
||||
$res = $dbr->selectFieldValues(
|
||||
'logging',
|
||||
[ 'log_id' => $deleteIDs ],
|
||||
'log_params',
|
||||
[
|
||||
'log_type' => 'abusefilter',
|
||||
"log_params $newFormatLike",
|
||||
"log_id > $prevID",
|
||||
"log_id <= $curID",
|
||||
],
|
||||
__METHOD__
|
||||
);
|
||||
}
|
||||
return $deleteIDs;
|
||||
$prevID = $curID;
|
||||
$curID += $batchSize;
|
||||
|
||||
if ( !$res ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$legacyParams = [];
|
||||
foreach ( $res as $logParams ) {
|
||||
$params = unserialize( $logParams );
|
||||
if ( $params === false ) {
|
||||
// Sanity check
|
||||
$this->fatalError( __METHOD__ . ": Cannot unserialize $logParams" );
|
||||
}
|
||||
// The script always inserted duplicates with the wrong '\n'
|
||||
$legacyParams[] = $params['historyId'] . '\n' . $params['newId'];
|
||||
}
|
||||
|
||||
// Save the IDs for later. Note: it is guaranteed that we'll get all and only the entries
|
||||
// that we want, because they contain a reference to afh_id, which is unique.
|
||||
$deleteIDs = $dbr->selectFieldValues(
|
||||
'logging',
|
||||
'log_id',
|
||||
[
|
||||
'log_type' => 'abusefilter',
|
||||
'log_params' => $legacyParams
|
||||
],
|
||||
__METHOD__
|
||||
);
|
||||
$ret = array_merge( $ret, $deleteIDs );
|
||||
|
||||
if ( !$this->dryRun && $deleteIDs ) {
|
||||
// Note that we delete entries with legacy format, which are the ones erroneously inserted
|
||||
// by the script.
|
||||
$dbw->delete(
|
||||
'logging',
|
||||
[ 'log_id' => $deleteIDs ],
|
||||
__METHOD__
|
||||
);
|
||||
}
|
||||
} while ( $prevID <= $this->loggingRowsCount );
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change single-quote newlines to double-quotes newlines
|
||||
*
|
||||
* @param int[] $deleted log_id's that deleteDuplicatedRows would delete/has deleted.
|
||||
* This is used in dry-run to avoid reporting them twice.
|
||||
* @return int[] Affected log_id's
|
||||
*/
|
||||
private function changeNewlineType() {
|
||||
private function changeNewlineType( array $deleted ) {
|
||||
$dbr = wfGetDB( DB_REPLICA, 'vslow' );
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
$res = $dbr->select(
|
||||
'logging',
|
||||
[ 'log_id', 'log_params' ],
|
||||
[
|
||||
'log_type' => 'abusefilter',
|
||||
'log_params ' . $dbr->buildLike(
|
||||
$dbr->anyString(),
|
||||
'\n',
|
||||
$dbr->anyString()
|
||||
)
|
||||
],
|
||||
__METHOD__
|
||||
$batchSize = $this->getBatchSize();
|
||||
$prevID = 1;
|
||||
$curID = $batchSize;
|
||||
$ret = [];
|
||||
|
||||
$likeClause = $dbr->buildLike(
|
||||
$dbr->anyString(),
|
||||
'\n',
|
||||
$dbr->anyString()
|
||||
);
|
||||
$replaceClause = $dbw->strreplace(
|
||||
'log_params',
|
||||
$dbw->addQuotes( '\n' ),
|
||||
$dbw->addQuotes( "\n" )
|
||||
);
|
||||
// Don't pass an empty array to makeList
|
||||
$extraConds = $this->dryRun && $deleted ?
|
||||
[ 'log_id NOT IN ' . $dbr->makeList( $deleted ) ] :
|
||||
[];
|
||||
do {
|
||||
$ids = $dbr->selectFieldValues(
|
||||
'logging',
|
||||
'log_id',
|
||||
array_merge(
|
||||
[
|
||||
'log_type' => 'abusefilter',
|
||||
'log_params ' . $likeClause,
|
||||
"log_id >= $prevID",
|
||||
"log_id <= $curID",
|
||||
],
|
||||
$extraConds
|
||||
),
|
||||
__METHOD__
|
||||
);
|
||||
$prevID = $curID + 1;
|
||||
$curID += $batchSize;
|
||||
|
||||
$updated = [];
|
||||
foreach ( $res as $row ) {
|
||||
$par = explode( '\n', $row->log_params );
|
||||
if ( count( $par ) === 2 ) {
|
||||
if ( !$this->dryRun ) {
|
||||
// Keep the entries legacy
|
||||
$newVal = implode( "\n", $par );
|
||||
|
||||
if ( !$this->dryRun ) {
|
||||
$dbw->update(
|
||||
'logging',
|
||||
[ 'log_params' => $newVal ],
|
||||
[ 'log_id' => $row->log_id ],
|
||||
__METHOD__
|
||||
);
|
||||
}
|
||||
$updated[] = $row->log_id;
|
||||
$dbw->update(
|
||||
'logging',
|
||||
[ 'log_params = ' . $replaceClause ],
|
||||
[ 'log_id' => $ids ],
|
||||
__METHOD__
|
||||
);
|
||||
}
|
||||
}
|
||||
return $updated;
|
||||
$ret = array_merge( $ret, $ids );
|
||||
} while ( $prevID <= $this->loggingRowsCount );
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix other minor errors caused by addMissingLoggingEntries, and align the values to the
|
||||
* ones inserted by the actual code. Namely:
|
||||
* - Set log_page to 0 instead of NULL
|
||||
* - Don't set log_deleted based on afh_deleted (note: this will miss any log entry created
|
||||
* by addMissingLoggingEntries and manually deleted afterwards. However, there shouldn't
|
||||
* be any reason to delete these entries)
|
||||
*
|
||||
* @param int[] $deleted Same as self::changeNewlineType
|
||||
* @return int[]
|
||||
*/
|
||||
private function updateLoggingFields( array $deleted ) {
|
||||
$dbr = wfGetDB( DB_REPLICA, 'vslow' );
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
$batchSize = $this->getBatchSize();
|
||||
$prevID = 1;
|
||||
$curID = $batchSize;
|
||||
$ret = [];
|
||||
|
||||
// Don't pass an empty array to makeList
|
||||
$extraConds = $this->dryRun && $deleted ?
|
||||
[ 'log_id NOT IN ' . $dbr->makeList( $deleted ) ] :
|
||||
[];
|
||||
do {
|
||||
// 'log_page IS NULL' is guaranteed to return all and only the entries created by the script
|
||||
$legacyIDs = $dbr->selectFieldValues(
|
||||
'logging',
|
||||
'log_id',
|
||||
array_merge(
|
||||
[
|
||||
'log_type' => 'abusefilter',
|
||||
'log_page IS NULL',
|
||||
"log_id >= $prevID",
|
||||
"log_id <= $curID",
|
||||
],
|
||||
$extraConds
|
||||
),
|
||||
__METHOD__
|
||||
);
|
||||
$prevID = $curID + 1;
|
||||
$curID += $batchSize;
|
||||
|
||||
if ( !$this->dryRun ) {
|
||||
$dbw->update(
|
||||
'logging',
|
||||
[
|
||||
'log_page' => 0,
|
||||
'log_deleted' => 0
|
||||
],
|
||||
[ 'log_id' => $legacyIDs ],
|
||||
__METHOD__
|
||||
);
|
||||
}
|
||||
$ret = array_merge( $ret, $legacyIDs );
|
||||
} while ( $prevID <= $this->loggingRowsCount );
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -140,6 +239,12 @@ class FixOldLogEntries extends LoggedUpdateMaintenance {
|
|||
*/
|
||||
public function doDBUpdates() {
|
||||
$this->dryRun = $this->hasOption( 'dry-run' );
|
||||
$this->loggingRowsCount = (int)wfGetDB( DB_REPLICA )->selectField(
|
||||
'logging',
|
||||
'MAX(log_id)',
|
||||
[],
|
||||
__METHOD__
|
||||
);
|
||||
|
||||
$deleted = $this->deleteDuplicatedRows();
|
||||
|
||||
|
@ -152,16 +257,26 @@ class FixOldLogEntries extends LoggedUpdateMaintenance {
|
|||
$this->output( 'The affected log_id\'s are: ' . implode( ', ', $deleted ) . "\n" );
|
||||
}
|
||||
|
||||
$updated = $this->changeNewlineType();
|
||||
|
||||
$updatedNewLines = $this->changeNewlineType( $deleted );
|
||||
$updatedFields = $this->updateLoggingFields( $deleted );
|
||||
$updateVerb = $this->dryRun ? 'would update' : 'updated';
|
||||
$numUpd = count( $updated );
|
||||
|
||||
$numNewLine = count( $updatedNewLines );
|
||||
$this->output(
|
||||
__CLASS__ . " $updateVerb $numUpd rows.\n"
|
||||
__CLASS__ . " $updateVerb newlines for $numNewLine rows.\n"
|
||||
);
|
||||
if ( $updated && $this->hasOption( 'verbose' ) ) {
|
||||
$this->output( 'The affected log_id\'s are: ' . implode( ', ', $updated ) . "\n" );
|
||||
if ( $updatedNewLines && $this->hasOption( 'verbose' ) ) {
|
||||
$this->output( 'The affected log_id\'s are: ' . implode( ', ', $updatedNewLines ) . "\n" );
|
||||
}
|
||||
|
||||
$numFields = count( $updatedFields );
|
||||
$this->output(
|
||||
__CLASS__ . " $updateVerb fields for $numFields rows.\n"
|
||||
);
|
||||
if ( $updatedFields && $this->hasOption( 'verbose' ) ) {
|
||||
$this->output( 'The affected log_id\'s are: ' . implode( ', ', $updatedFields ) . "\n" );
|
||||
}
|
||||
|
||||
return !$this->dryRun;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue