From aaf4d74d1865db772cf69b72ed325ea64ca653cc Mon Sep 17 00:00:00 2001 From: John Du Hart Date: Wed, 18 Jan 2012 23:29:37 +0000 Subject: [PATCH] Adding Email blacklisting to the SpamBlacklist extension This relies on r109111 --- BaseBlacklist.php | 45 +++++++++++++++++++++++++++++--- EmailBlacklist.php | 59 ++++++++++++++++++++++++++++++++++++++++++ SpamBlacklist.i18n.php | 24 +++++++++++++++++ SpamBlacklist.php | 5 +++- SpamBlacklistHooks.php | 48 +++++++++++++++++++++++++++++++--- SpamBlacklist_body.php | 21 ++++++++++++++- SpamRegexBatch.php | 26 +++++++++---------- 7 files changed, 206 insertions(+), 22 deletions(-) create mode 100644 EmailBlacklist.php diff --git a/BaseBlacklist.php b/BaseBlacklist.php index f837d2a7..9d26e0da 100644 --- a/BaseBlacklist.php +++ b/BaseBlacklist.php @@ -43,6 +43,7 @@ abstract class BaseBlacklist { */ private static $blacklistTypes = array( 'spam' => 'SpamBlacklist', + 'email' => 'EmailBlacklist', ); /** @@ -121,7 +122,7 @@ abstract class BaseBlacklist { * @param Title $title * @return bool */ - public static function isLocalSource( $title ) { + public static function isLocalSource( Title $title ) { global $wgDBname, $wgBlacklistSettings; if( $title->getNamespace() == NS_MEDIAWIKI ) { @@ -167,6 +168,23 @@ abstract class BaseBlacklist { return false; } + /** + * Returns the type of blacklist from the given title + * + * @param Title $title + * @return bool|string + */ + public static function getTypeFromTitle( Title $title ) { + $types = array_map( 'preg_quote', array_keys( self::$blacklistTypes ), array( '/' ) ); + $regex = '/(' . implode( '|', $types ). ')-(?:Blacklist|Whitelist)/'; + + if ( preg_match( $regex, $title->getDBkey(), $m ) ) { + return strtolower( $m[1] ); + } + + return false; + } + /** * Fetch local and (possibly cached) remote blacklists. * Will be cached locally across multiple invocations. @@ -187,7 +205,7 @@ abstract class BaseBlacklist { * @return array Regular expressions */ public function getLocalBlacklists() { - return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist" ); + return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist", $this ); } /** @@ -196,7 +214,7 @@ abstract class BaseBlacklist { * @return array Regular expressions */ public function getWhitelists() { - return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist" ); + return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist", $this ); } /** @@ -263,7 +281,7 @@ abstract class BaseBlacklist { // there's a bad line in one of them we'll gain more // from only having to break that set into smaller pieces. $regexes = array_merge( $regexes, - SpamRegexBatch::regexesFromText( $text, $fileName ) ); + SpamRegexBatch::regexesFromText( $text, $this, $fileName ) ); } return $regexes; @@ -333,4 +351,23 @@ abstract class BaseBlacklist { return strval( $text ); } + /** + * Returns the start of the regex for matches + * + * @return string + */ + public function getRegexStart() { + return '/[a-z0-9_\-.]*'; + } + + /** + * Returns the end of the regex for matches + * + * @param $batchSize + * @return string + */ + public function getRegexEnd( $batchSize ) { + return ($batchSize > 0 ) ? '/Sim' : '/im'; + } + } diff --git a/EmailBlacklist.php b/EmailBlacklist.php new file mode 100644 index 00000000..5d5843c5 --- /dev/null +++ b/EmailBlacklist.php @@ -0,0 +1,59 @@ +getBlacklists(); + $whitelists = $this->getWhitelists(); + + // The email to check + $email = $user->getEmail(); + + if ( !count( $blacklists ) ) { + // Nothing to check + return true; + } + + // Check for whitelisted emails + if ( is_array( $whitelists ) ) { + wfDebugLog( 'SpamBlacklist', "Excluding whitelisted emails from " . count( $whitelists ) . + " regexes: " . implode( ', ', $whitelists ) . "\n" ); + foreach ( $whitelists as $regex ) { + if ( preg_match( $regex, $email ) ) { + // Whitelisted email + return true; + } + } + } + + + # Do the match + wfDebugLog( 'SpamBlacklist', "Checking email against " . count( $blacklists ) . + " regexes: " . implode( ', ', $blacklists ) . "\n" ); + foreach ( $blacklists as $regex ) { + if ( preg_match( $regex, $email ) ) { + return false; + } + } + + return true; + } +} diff --git a/SpamBlacklist.i18n.php b/SpamBlacklist.i18n.php index e4222bdc..987f5f52 100644 --- a/SpamBlacklist.i18n.php +++ b/SpamBlacklist.i18n.php @@ -22,12 +22,31 @@ $messages['en'] = array( 'spam-whitelist' => ' #
 # External URLs matching this list will *not* be blocked even if they would
 # have been blocked by blacklist entries.
+#
+ #
', + 'email-blacklist' => ' # Emails matching this list will be blocked from registering or sending email + # This list affects only this wiki; refer also to the global blacklist. + # For documentation see http://www.mediawiki.org/wiki/Extension:SpamBlacklist + #
 #
 # Syntax is as follows:
 #   * Everything from a "#" character to the end of the line is a comment
 #   * Every non-blank line is a regex fragment which will only match hosts inside URLs
 
  #
', + 'email-whitelist' => ' #
+# Emails matching this list will *not* be blocked even if they would
+# have been blocked by blacklist entries.
+#
+ #
', +# Syntax is as follows: +# * Everything from a "#" character to the end of the line is a comment +# * Every non-blank line is a regex fragment which will only match hosts inside URLs + + 'spam-blacklisted-email' => 'Blacklisted E-mail', + 'spam-blacklisted-email-text' => 'Your e-mail address is currently blacklisted from sending e-mails to other users.', + 'spam-blacklisted-email-signup' => 'The e-mail address given is currently blacklisted from use.', + 'spam-invalid-lines' => "The following spam blacklist {{PLURAL:$1|line is an|lines are}} invalid regular {{PLURAL:$1|expression|expressions}} and {{PLURAL:$1|needs|need}} to be corrected before saving the page:", 'spam-blacklist-desc' => 'Regex-based anti-spam tool: [[MediaWiki:Spam-blacklist]] and [[MediaWiki:Spam-whitelist]]', ); @@ -40,6 +59,11 @@ $messages['en'] = array( $messages['qqq'] = array( 'spam-blacklist' => "See also: [[MediaWiki:spam-whitelist]] and [[MediaWiki:captcha-addurl-whitelist]]. You can translate the text, including 'Leave this line exactly as it is'. Some lines of this messages have one (1) leading space.", 'spam-whitelist' => "See also: [[MediaWiki:spam-blacklist]] and [[MediaWiki:captcha-addurl-whitelist]]. You can translate the text, including 'Leave this line exactly as it is'. Some lines of this messages have one (1) leading space.", + + 'spam-blacklisted-email' => 'Title of errorpage when trying to send an email with a blacklisted email', + 'spam-blacklisted-email-text' => 'Text of errorpage when trying to send an email with a blacklisted email', + 'spam-blacklisted-email-signup' => 'Error when trying to create an account with an invalid email', + 'spam-blacklist-desc' => '{{desc}}', ); diff --git a/SpamBlacklist.php b/SpamBlacklist.php index f2322ff0..acad804b 100644 --- a/SpamBlacklist.php +++ b/SpamBlacklist.php @@ -10,7 +10,7 @@ if ( !defined( 'MEDIAWIKI' ) ) { $wgExtensionCredits[version_compare($wgVersion, '1.17alpha', '>=') ? 'antispam' : 'other'][] = array( 'path' => __FILE__, 'name' => 'SpamBlacklist', - 'author' => 'Tim Starling', + 'author' => array( 'Tim Starling', 'John Du Hart' ), 'url' => 'https://www.mediawiki.org/wiki/Extension:SpamBlacklist', 'descriptionmsg' => 'spam-blacklist-desc', ); @@ -41,8 +41,11 @@ $wgHooks['EditFilterMerged'][] = 'SpamBlacklistHooks::filterMerged'; $wgHooks['APIEditBeforeSave'][] = 'SpamBlacklistHooks::filterAPIEditBeforeSave'; $wgHooks['EditFilter'][] = 'SpamBlacklistHooks::validate'; $wgHooks['ArticleSaveComplete'][] = 'SpamBlacklistHooks::articleSave'; +$wgHooks['UserCanSendEmail'][] = 'SpamBlacklistHooks::userCanSendEmail'; +$wgHooks['AbortNewAccount'][] = 'SpamBlacklistHooks::abortNewAccount'; $wgAutoloadClasses['BaseBlacklist'] = $dir . 'BaseBlacklist.php'; +$wgAutoloadClasses['EmailBlacklist'] = $dir . 'EmailBlacklist.php'; $wgAutoloadClasses['SpamBlacklistHooks'] = $dir . 'SpamBlacklistHooks.php'; $wgAutoloadClasses['SpamBlacklist'] = $dir . 'SpamBlacklist_body.php'; $wgAutoloadClasses['SpamRegexBatch'] = $dir . 'SpamRegexBatch.php'; diff --git a/SpamBlacklistHooks.php b/SpamBlacklistHooks.php index 4fefd32f..832d262b 100644 --- a/SpamBlacklistHooks.php +++ b/SpamBlacklistHooks.php @@ -56,6 +56,43 @@ class SpamBlacklistHooks { return ( $ret === false ); } + /** + * Verify that the user can send emails + * + * @param $user User + * @param $hookErr array + * @return bool + */ + public static function userCanSendEmail( &$user, &$hookErr ) { + /** @var $blacklist EmailBlacklist */ + $blacklist = BaseBlacklist::getInstance( 'email' ); + if ( $blacklist->checkUser( $user ) ) { + return true; + } + + $hookErr = array( 'spam-blacklisted-email', 'spam-blacklisted-email-text', null ); + + return false; + } + + /** + * Processes new accounts for valid emails + * + * @param $user User + * @param $abortError + * @return bool + */ + public static function abortNewAccount( $user, &$abortError ) { + /** @var $blacklist EmailBlacklist */ + $blacklist = BaseBlacklist::getInstance( 'email' ); + if ( $blacklist->checkUser( $user ) ) { + return true; + } + + $abortError = wfMessage( 'spam-blacklisted-email-signup' )->escaped(); + return false; + } + /** * Hook function for EditFilter * Confirm that a local blacklist page being saved is valid, @@ -75,9 +112,14 @@ class SpamBlacklistHooks { return true; } + $type = BaseBlacklist::getTypeFromTitle( $editPage->mTitle ); + if ( $type === false ) { + return true; + } + $lines = explode( "\n", $text ); - $badLines = SpamRegexBatch::getBadLines( $lines ); + $badLines = SpamRegexBatch::getBadLines( $lines, BaseBlacklist::getInstance( $type ) ); if( $badLines ) { wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] given invalid input lines: " . implode( ', ', $badLines ) . "\n" ); @@ -92,11 +134,11 @@ class SpamBlacklistHooks { $badList . "\n" . "
\n"; - return true; } else { wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] ok or empty blacklist\n" ); - return true; } + + return true; } /** diff --git a/SpamBlacklist_body.php b/SpamBlacklist_body.php index a279c67e..c1c5df54 100644 --- a/SpamBlacklist_body.php +++ b/SpamBlacklist_body.php @@ -98,7 +98,7 @@ class SpamBlacklist extends BaseBlacklist { $ip = wfGetIP(); wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: {$matches[0]}\n" ); $retVal = $matches[0]; - break; + break; } } } else { @@ -128,4 +128,23 @@ class SpamBlacklist extends BaseBlacklist { } return $links; } + + /** + * Returns the start of the regex for matches + * + * @return string + */ + public function getRegexStart() { + return '/(?:https?:)?\/\/+[a-z0-9_\-.]*('; + } + + /** + * Returns the end of the regex for matches + * + * @param $batchSize + * @return string + */ + public function getRegexEnd( $batchSize ) { + return ')' . parent::getRegexEnd( $batchSize ); + } } \ No newline at end of file diff --git a/SpamRegexBatch.php b/SpamRegexBatch.php index 3174d246..a4cbed6f 100644 --- a/SpamRegexBatch.php +++ b/SpamRegexBatch.php @@ -13,14 +13,14 @@ class SpamRegexBatch { * if 0, will produce one regex per line * @return array */ - static function buildRegexes( $lines, $batchSize=4096 ) { + static function buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize=4096 ) { # Make regex # It's faster using the S modifier even though it will usually only be run once //$regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')'; //return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim'; $regexes = array(); - $regexStart = '/(?:https?:)?\/\/+[a-z0-9_\-.]*('; - $regexEnd = ($batchSize > 0 ) ? ')/Sim' : ')/im'; + $regexStart = $blacklist->getRegexStart(); + $regexEnd = $blacklist->getRegexEnd( $batchSize ); $build = false; foreach( $lines as $line ) { if( substr( $line, -1, 1 ) == "\\" ) { @@ -90,9 +90,9 @@ class SpamRegexBatch { * @param $fileName string optional for debug reporting * @return array of regexes */ - static function buildSafeRegexes( $lines, $fileName=false ) { + static function buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName=false ) { $lines = SpamRegexBatch::stripLines( $lines ); - $regexes = SpamRegexBatch::buildRegexes( $lines ); + $regexes = SpamRegexBatch::buildRegexes( $lines, $blacklist ); if( SpamRegexBatch::validateRegexes( $regexes ) ) { return $regexes; } else { @@ -102,7 +102,7 @@ class SpamRegexBatch { if( $fileName ) { wfDebugLog( 'SpamBlacklist', "Spam blacklist warning: bogus line in $fileName\n" ); } - return SpamRegexBatch::buildRegexes( $lines, 0 ); + return SpamRegexBatch::buildRegexes( $lines, $blacklist, 0 ); } } @@ -112,7 +112,7 @@ class SpamRegexBatch { * @param array $lines * @return array of input lines which produce invalid input, or empty array if no problems */ - static function getBadLines( $lines ) { + static function getBadLines( $lines, BaseBlacklist $blacklist ) { $lines = SpamRegexBatch::stripLines( $lines ); $badLines = array(); @@ -123,7 +123,7 @@ class SpamRegexBatch { } } - $regexes = SpamRegexBatch::buildRegexes( $lines ); + $regexes = SpamRegexBatch::buildRegexes( $lines, $blacklist ); if( SpamRegexBatch::validateRegexes( $regexes ) ) { // No other problems! return $badLines; @@ -131,7 +131,7 @@ class SpamRegexBatch { // Something failed in the batch, so check them one by one. foreach( $lines as $line ) { - $regexes = SpamRegexBatch::buildRegexes( array( $line ) ); + $regexes = SpamRegexBatch::buildRegexes( array( $line ), $blacklist ); if( !SpamRegexBatch::validateRegexes( $regexes ) ) { $badLines[] = $line; } @@ -147,9 +147,9 @@ class SpamRegexBatch { * @param $fileName bool|string optional, for reporting of bad files * @return array of regular expressions, potentially empty */ - static function regexesFromText( $source, $fileName=false ) { + static function regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false ) { $lines = explode( "\n", $source ); - return SpamRegexBatch::buildSafeRegexes( $lines, $fileName ); + return SpamRegexBatch::buildSafeRegexes( $lines, $blacklist, $fileName ); } /** @@ -159,10 +159,10 @@ class SpamRegexBatch { * @param $message string * @return array of regular expressions, potentially empty */ - static function regexesFromMessage( $message ) { + static function regexesFromMessage( $message, BaseBlacklist $blacklist ) { $source = wfMsgForContent( $message ); if( $source && !wfEmptyMsg( $message, $source ) ) { - return SpamRegexBatch::regexesFromText( $source ); + return SpamRegexBatch::regexesFromText( $source, $blacklist ); } else { return array(); }