* (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them.

Steals the load-existing-links function out of ConfirmEdit.
This commit is contained in:
Brion Vibber 2008-05-13 23:31:33 +00:00
parent 1dc3f3e263
commit 45a4c9f03b

View file

@ -222,7 +222,15 @@ class SpamBlacklist {
$text = $wgParser->preSaveTransform( $text, $title, $wgUser, $options );
$out = $wgParser->parse( $text, $title, $options );
}
$links = implode( "\n", array_keys( $out->getExternalLinks() ) );
$newLinks = array_keys( $out->getExternalLinks() );
$oldLinks = $this->getCurrentLinks( $title );
$addedLinks = array_diff( $newLinks, $oldLinks );
wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) );
wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) );
wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) );
$links = implode( "\n", $addedLinks );
# Strip whitelisted URLs from the match
if( is_array( $whitelists ) ) {
@ -265,6 +273,24 @@ class SpamBlacklist {
wfProfileOut( $fname );
return $retVal;
}
/**
* Look up the links currently in the article, so we can
* ignore them on a second run.
*
* WARNING: I can add more *of the same link* with no problem here.
*/
function getCurrentLinks( $title ) {
$dbr =& wfGetDB( DB_SLAVE );
$id = $title->getArticleId(); // should be zero queries
$res = $dbr->select( 'externallinks', array( 'el_to' ),
array( 'el_from' => $id ), __METHOD__ );
$links = array();
while ( $row = $dbr->fetchObject( $res ) ) {
$links[] = $row->el_to;
}
return $links;
}
/**
* Fetch an article from this or another local MediaWiki database.