From 45a4c9f03b6b896c1354246579c114f17f9cdac0 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 13 May 2008 23:31:33 +0000 Subject: [PATCH] * (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them. Steals the load-existing-links function out of ConfirmEdit. --- SpamBlacklist_body.php | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/SpamBlacklist_body.php b/SpamBlacklist_body.php index c49e8e3b..1f99b8e6 100644 --- a/SpamBlacklist_body.php +++ b/SpamBlacklist_body.php @@ -222,7 +222,15 @@ class SpamBlacklist { $text = $wgParser->preSaveTransform( $text, $title, $wgUser, $options ); $out = $wgParser->parse( $text, $title, $options ); } - $links = implode( "\n", array_keys( $out->getExternalLinks() ) ); + $newLinks = array_keys( $out->getExternalLinks() ); + $oldLinks = $this->getCurrentLinks( $title ); + $addedLinks = array_diff( $newLinks, $oldLinks ); + + wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) ); + wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) ); + wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) ); + + $links = implode( "\n", $addedLinks ); # Strip whitelisted URLs from the match if( is_array( $whitelists ) ) { @@ -265,6 +273,24 @@ class SpamBlacklist { wfProfileOut( $fname ); return $retVal; } + + /** + * Look up the links currently in the article, so we can + * ignore them on a second run. + * + * WARNING: I can add more *of the same link* with no problem here. + */ + function getCurrentLinks( $title ) { + $dbr =& wfGetDB( DB_SLAVE ); + $id = $title->getArticleId(); // should be zero queries + $res = $dbr->select( 'externallinks', array( 'el_to' ), + array( 'el_from' => $id ), __METHOD__ ); + $links = array(); + while ( $row = $dbr->fetchObject( $res ) ) { + $links[] = $row->el_to; + } + return $links; + } /** * Fetch an article from this or another local MediaWiki database.