maintenance: Migrate to IReadableDatabase::newSelectQueryBuilder

Also use expression builder to avoid raw sql

Bug: T312420
Change-Id: I981f7d8107b7c4401056266c58db4457ad759f9c
This commit is contained in:
Umherirrender 2024-04-29 22:15:35 +02:00
parent ca7c0232d3
commit d0e85abda1
5 changed files with 121 additions and 109 deletions

View file

@ -15,6 +15,8 @@ use ManualLogEntry;
use MediaWiki\Extension\AbuseFilter\AbuseFilterServices;
use MediaWiki\Extension\AbuseFilter\Special\SpecialAbuseFilter;
use MediaWiki\User\User;
use Wikimedia\Rdbms\IExpression;
use Wikimedia\Rdbms\LikeValue;
/**
* @codeCoverageIgnore
@ -47,25 +49,28 @@ class AddMissingLoggingEntries extends LoggedUpdateMaintenance {
$db = $this->getDB( DB_REPLICA, 'vslow' );
$logParamsConcat = $db->buildConcat( [ 'afh_id', $db->addQuotes( "\n" ) ] );
$legacyParamsLike = $db->buildLike( $logParamsConcat, $db->anyString() );
$legacyParamsLike = new LikeValue( $logParamsConcat, $db->anyString() );
// Non-legacy entries are a serialized array with 'newId' and 'historyId' keys
$newLogParamsLike = $db->buildLike( $db->anyString(), 'historyId', $db->anyString() );
$newLogParamsLike = new LikeValue( $db->anyString(), 'historyId', $db->anyString() );
$actorQuery = AbuseFilterServices::getActorMigration()->getJoin( 'afh_user' );
// Find all entries in abuse_filter_history without logging entry of same timestamp
$afhResult = $db->select(
[ 'abuse_filter_history', 'logging' ] + $actorQuery['tables'],
[ 'afh_id', 'afh_filter', 'afh_timestamp', 'afh_deleted' ] + $actorQuery['fields'],
[
'log_id IS NULL',
"NOT log_params $newLogParamsLike"
],
__METHOD__,
[],
[ 'logging' => [
'LEFT JOIN',
"afh_timestamp = log_timestamp AND log_params $legacyParamsLike AND log_type = 'abusefilter'"
] ] + $actorQuery['joins']
);
$afhResult = $db->newSelectQueryBuilder()
->select( [ 'afh_id', 'afh_filter', 'afh_timestamp', 'afh_deleted' ] )
->fields( $actorQuery['fields'] )
->from( 'abuse_filter_history' )
->leftJoin( 'logging', null, [
'afh_timestamp = log_timestamp',
$db->expr( 'log_params', IExpression::LIKE, $legacyParamsLike ),
'log_type' => 'abusefilter',
] )
->tables( $actorQuery['tables'] )
->where( [
'log_id' => null,
$db->expr( 'log_params', IExpression::NOT_LIKE, $newLogParamsLike ),
] )
->joinConds( $actorQuery['joins'] )
->caller( __METHOD__ )
->fetchResultSet();
// Because the timestamp matches aren't exact (sometimes a couple of
// seconds off), we need to check all our results and ignore those that
@ -80,12 +85,12 @@ class AddMissingLoggingEntries extends LoggedUpdateMaintenance {
return !$dryRun;
}
$logResult = $this->getDB( DB_REPLICA )->selectFieldValues(
'logging',
'log_params',
[ 'log_type' => 'abusefilter', 'log_params' => $logParams ],
__METHOD__
);
$logResult = $this->getDB( DB_REPLICA )->newSelectQueryBuilder()
->select( 'log_params' )
->from( 'logging' )
->where( [ 'log_type' => 'abusefilter', 'log_params' => $logParams ] )
->caller( __METHOD__ )
->fetchFieldValues();
foreach ( $logResult as $params ) {
// id . "\n" . filter

View file

@ -178,12 +178,12 @@ class MigrateActorsAF extends LoggedUpdateMaintenance {
->execute();
$countActors += $dbw->affectedRows();
$res = $dbw->select(
'actor',
[ 'actor_id', 'actor_name' ],
[ 'actor_name' => array_map( 'strval', array_keys( $needActors ) ) ],
__METHOD__
);
$res = $dbw->newSelectQueryBuilder()
->select( [ 'actor_id', 'actor_name' ] )
->from( 'actor' )
->where( [ 'actor_name' => array_map( 'strval', array_keys( $needActors ) ) ] )
->caller( __METHOD__ )
->fetchResultSet();
foreach ( $res as $row ) {
$needActors[$row->actor_name] = $row->actor_id;
}
@ -238,19 +238,18 @@ class MigrateActorsAF extends LoggedUpdateMaintenance {
$countErrors = 0;
while ( true ) {
// Fetch the rows needing update
$res = $dbw->select(
$table,
array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
[
$res = $dbw->newSelectQueryBuilder()
->select( $primaryKey )
->fields( [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] )
->from( $table )
->where( [
$actorField => 0,
$next,
],
__METHOD__,
[
'ORDER BY' => $primaryKey,
'LIMIT' => $this->mBatchSize,
]
);
] )
->orderBy( $primaryKey )
->limit( $this->mBatchSize )
->caller( __METHOD__ )
->fetchResultSet();
if ( !$res->numRows() ) {
break;
}

View file

@ -32,16 +32,16 @@ class PurgeOldLogIPData extends Maintenance {
$count = 0;
do {
$ids = $dbw->selectFieldValues(
'abuse_filter_log',
'afl_id',
[
'afl_ip <> ' . $dbw->addQuotes( '' ),
"afl_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $cutoffUnix ) )
],
__METHOD__,
[ 'LIMIT' => $this->getBatchSize() ]
);
$ids = $dbw->newSelectQueryBuilder()
->select( 'afl_id' )
->from( 'abuse_filter_log' )
->where( [
$dbw->expr( 'afl_ip', '!=', '' ),
$dbw->expr( 'afl_timestamp', '<', $dbw->timestamp( $cutoffUnix ) ),
] )
->limit( $this->getBatchSize() )
->caller( __METHOD__ )
->fetchFieldValues();
if ( $ids ) {
$dbw->newUpdateQueryBuilder()

View file

@ -59,13 +59,14 @@ class SearchFilters extends Maintenance {
$pattern = $dbr->addQuotes( $this->getOption( 'pattern' ) );
if ( $dbr->tableExists( 'abuse_filter' ) ) {
$rows = $dbr->select(
'abuse_filter',
[ 'dbname' => 'DATABASE()', 'af_id' ],
[
$rows = $dbr->newSelectQueryBuilder()
->select( [ 'dbname' => 'DATABASE()', 'af_id' ] )
->from( 'abuse_filter' )
->where( [
"af_pattern RLIKE $pattern"
]
);
] )
->caller( __METHOD__ )
->fetchResultSet();
foreach ( $rows as $row ) {
$this->output( $row->dbname . "\t" . $row->af_id . "\n" );

View file

@ -12,8 +12,10 @@ use MediaWiki\Extension\AbuseFilter\Variables\VariablesBlobStore;
use MediaWiki\MediaWikiServices;
use MediaWiki\Title\Title;
use UnexpectedValueException;
use Wikimedia\Rdbms\IExpression;
use Wikimedia\Rdbms\IMaintainableDatabase;
use Wikimedia\Rdbms\IResultWrapper;
use Wikimedia\Rdbms\LikeValue;
// @codeCoverageIgnoreStart
$IP = getenv( 'MW_INSTALL_PATH' );
@ -102,12 +104,11 @@ class UpdateVarDumps extends LoggedUpdateMaintenance {
$this->dbw = $this->getDB( DB_PRIMARY );
// Control batching with the primary key to keep the queries performant and allow gaps
$this->allRowsCount = (int)$this->dbr->selectField(
'abuse_filter_log',
'MAX(afl_id)',
[],
__METHOD__
);
$this->allRowsCount = (int)$this->dbr->newSelectQueryBuilder()
->select( 'MAX(afl_id)' )
->from( 'abuse_filter_log' )
->caller( __METHOD__ )
->fetchField();
if ( $this->allRowsCount === 0 ) {
$this->output( "...the abuse_filter_log table is empty.\n" );
@ -145,17 +146,17 @@ class UpdateVarDumps extends LoggedUpdateMaintenance {
$deleted = $rebuilt = 0;
do {
$this->maybePrintProgress( $prevID );
$brokenRows = $this->dbr->select(
'abuse_filter_log',
'*',
[
$brokenRows = $this->dbr->newSelectQueryBuilder()
->select( '*' )
->from( 'abuse_filter_log' )
->where( [
'afl_var_dump' => '',
"afl_id > $prevID",
"afl_id <= $curID"
],
__METHOD__,
[ 'ORDER BY' => 'afl_id ASC' ]
);
$this->dbr->expr( 'afl_id', '>', $prevID ),
$this->dbr->expr( 'afl_id', '<=', $curID ),
] )
->orderBy( 'afl_id' )
->caller( __METHOD__ )
->fetchResultSet();
$prevID = $curID;
$curID += $batchSize;
@ -195,13 +196,13 @@ class UpdateVarDumps extends LoggedUpdateMaintenance {
// right before the correct one, so their afl_id should only differ by 1, but let's
// play safe and only assume it's greater. Note that the two entries are guaranteed
// to have the same timestamp.
$findRow[] = 'afl_id > ' . $this->dbr->addQuotes( $row->afl_id );
$saneDuplicate = $this->dbr->selectRow(
'abuse_filter_log',
'1',
$findRow,
__METHOD__
);
$findRow[] = $this->dbr->expr( 'afl_id', '>', $row->afl_id );
$saneDuplicate = $this->dbr->newSelectQueryBuilder()
->select( '1' )
->from( 'abuse_filter_log' )
->where( $findRow )
->caller( __METHOD__ )
->fetchRow();
if ( $saneDuplicate ) {
// Just delete the row!
@ -264,24 +265,24 @@ class UpdateVarDumps extends LoggedUpdateMaintenance {
$changeRows = $truncatedDumps = 0;
do {
$this->maybePrintProgress( $prevID );
$res = $this->dbr->select(
'abuse_filter_log',
[ 'afl_id', 'afl_var_dump' ],
[
'afl_var_dump NOT ' . $this->dbr->buildLike(
$res = $this->dbr->newSelectQueryBuilder()
->select( [ 'afl_id', 'afl_var_dump' ] )
->from( 'abuse_filter_log' )
->where( [
$this->dbr->expr( 'afl_var_dump', IExpression::NOT_LIKE, new LikeValue(
'stored-text:',
$this->dbr->anyString()
),
'afl_var_dump NOT ' . $this->dbr->buildLike(
) ),
$this->dbr->expr( 'afl_var_dump', IExpression::NOT_LIKE, new LikeValue(
'tt:',
$this->dbr->anyString()
),
"afl_id > $prevID",
"afl_id <= $curID"
],
__METHOD__,
[ 'ORDER BY' => 'afl_id ASC' ]
);
) ),
$this->dbr->expr( 'afl_id', '>', $prevID ),
$this->dbr->expr( 'afl_id', '<=', $curID ),
] )
->orderBy( 'afl_id' )
->caller( __METHOD__ )
->fetchResultSet();
$prevID = $curID;
$curID += $batchSize;
@ -481,22 +482,23 @@ class UpdateVarDumps extends LoggedUpdateMaintenance {
$this->dbr->addQuotes( '' )
) );
$dumpLike = $this->dbr->buildLike( 'stored-text:', $this->dbr->anyString() );
$dumpLike = new LikeValue( 'stored-text:', $this->dbr->anyString() );
$esAccess = MediaWikiServices::getInstance()->getExternalStoreAccess();
do {
$this->maybePrintProgress( $prevID );
$res = $this->dbr->select(
[ 'text', 'abuse_filter_log' ],
[ 'old_id', 'old_text', 'old_flags' ],
[
"afl_var_dump $dumpLike",
"afl_id > $prevID",
"afl_id <= $curID"
],
__METHOD__,
[ 'DISTINCT', 'ORDER BY' => 'old_id ASC' ],
[ 'abuse_filter_log' => [ 'JOIN', "old_id = $idSQL" ] ]
);
$res = $this->dbr->newSelectQueryBuilder()
->select( [ 'old_id', 'old_text', 'old_flags' ] )
->distinct()
->from( 'text' )
->join( 'abuse_filter_log', null, "old_id = $idSQL" )
->where( [
$this->dbr->expr( 'afl_var_dump', IExpression::LIKE, $dumpLike ),
$this->dbr->expr( 'afl_id', '>', $prevID ),
$this->dbr->expr( 'afl_id', '<=', $curID ),
] )
->orderBy( 'old_id' )
->caller( __METHOD__ )
->fetchResultSet();
$prevID = $curID;
$curID += $batchSize;
@ -673,13 +675,18 @@ class UpdateVarDumps extends LoggedUpdateMaintenance {
$table = 'abuse_filter_log';
$var = "afl_var_dump = $newIdSQL";
$conds = [
"afl_id > $prevID",
"afl_id <= $curID",
'afl_var_dump ' . $this->dbr->buildLike( 'stored-text:', $this->dbr->anyString() )
$this->dbr->expr( 'afl_id', '>', $prevID ),
$this->dbr->expr( 'afl_id', '<=', $curID ),
$this->dbr->expr( 'afl_var_dump', IExpression::LIKE,
new LikeValue( 'stored-text:', $this->dbr->anyString() ) ),
];
$options = [ 'ORDER BY' => 'afl_id ASC' ];
if ( $this->dryRun ) {
$numRows += $this->dbr->selectRowCount( $table, $var, $conds, __METHOD__, $options );
$numRows += $this->dbr->newSelectQueryBuilder()
->from( $table )
->where( $conds )
->caller( __METHOD__ )
->fetchRowCount();
} else {
$this->dbw->newUpdateQueryBuilder()
->update( $table )