Adding Email blacklisting to the SpamBlacklist extension

This relies on r109111
This commit is contained in:
John Du Hart 2012-01-18 23:29:37 +00:00
parent 62b2bde146
commit aaf4d74d18
Notes: John Du Hart 2012-01-18 23:29:37 +00:00
7 changed files with 206 additions and 22 deletions

View file

@ -43,6 +43,7 @@ abstract class BaseBlacklist {
*/
private static $blacklistTypes = array(
'spam' => 'SpamBlacklist',
'email' => 'EmailBlacklist',
);
/**
@ -121,7 +122,7 @@ abstract class BaseBlacklist {
* @param Title $title
* @return bool
*/
public static function isLocalSource( $title ) {
public static function isLocalSource( Title $title ) {
global $wgDBname, $wgBlacklistSettings;
if( $title->getNamespace() == NS_MEDIAWIKI ) {
@ -167,6 +168,23 @@ abstract class BaseBlacklist {
return false;
}
/**
* Returns the type of blacklist from the given title
*
* @param Title $title
* @return bool|string
*/
public static function getTypeFromTitle( Title $title ) {
$types = array_map( 'preg_quote', array_keys( self::$blacklistTypes ), array( '/' ) );
$regex = '/(' . implode( '|', $types ). ')-(?:Blacklist|Whitelist)/';
if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
return strtolower( $m[1] );
}
return false;
}
/**
* Fetch local and (possibly cached) remote blacklists.
* Will be cached locally across multiple invocations.
@ -187,7 +205,7 @@ abstract class BaseBlacklist {
* @return array Regular expressions
*/
public function getLocalBlacklists() {
return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist" );
return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist", $this );
}
/**
@ -196,7 +214,7 @@ abstract class BaseBlacklist {
* @return array Regular expressions
*/
public function getWhitelists() {
return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist" );
return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist", $this );
}
/**
@ -263,7 +281,7 @@ abstract class BaseBlacklist {
// there's a bad line in one of them we'll gain more
// from only having to break that set into smaller pieces.
$regexes = array_merge( $regexes,
SpamRegexBatch::regexesFromText( $text, $fileName ) );
SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
}
return $regexes;
@ -333,4 +351,23 @@ abstract class BaseBlacklist {
return strval( $text );
}
/**
* Returns the start of the regex for matches
*
* @return string
*/
public function getRegexStart() {
return '/[a-z0-9_\-.]*';
}
/**
* Returns the end of the regex for matches
*
* @param $batchSize
* @return string
*/
public function getRegexEnd( $batchSize ) {
return ($batchSize > 0 ) ? '/Sim' : '/im';
}
}

59
EmailBlacklist.php Normal file
View file

@ -0,0 +1,59 @@
<?php
/**
* Email Blacklisting
*/
class EmailBlacklist extends BaseBlacklist {
/**
* Returns the code for the blacklist implementation
*
* @return string
*/
protected function getBlacklistType() {
return 'email';
}
/**
* Checks a User object for a blacklisted email
*
* @param User $user
* @return bool True on valid email
*/
public function checkUser( User $user ) {
$blacklists = $this->getBlacklists();
$whitelists = $this->getWhitelists();
// The email to check
$email = $user->getEmail();
if ( !count( $blacklists ) ) {
// Nothing to check
return true;
}
// Check for whitelisted emails
if ( is_array( $whitelists ) ) {
wfDebugLog( 'SpamBlacklist', "Excluding whitelisted emails from " . count( $whitelists ) .
" regexes: " . implode( ', ', $whitelists ) . "\n" );
foreach ( $whitelists as $regex ) {
if ( preg_match( $regex, $email ) ) {
// Whitelisted email
return true;
}
}
}
# Do the match
wfDebugLog( 'SpamBlacklist', "Checking email against " . count( $blacklists ) .
" regexes: " . implode( ', ', $blacklists ) . "\n" );
foreach ( $blacklists as $regex ) {
if ( preg_match( $regex, $email ) ) {
return false;
}
}
return true;
}
}

View file

@ -22,12 +22,31 @@ $messages['en'] = array(
'spam-whitelist' => ' #<!-- leave this line exactly as it is --> <pre>
# External URLs matching this list will *not* be blocked even if they would
# have been blocked by blacklist entries.
#
#</pre> <!-- leave this line exactly as it is -->',
'email-blacklist' => ' # Emails matching this list will be blocked from registering or sending email
# This list affects only this wiki; refer also to the global blacklist.
# For documentation see http://www.mediawiki.org/wiki/Extension:SpamBlacklist
#<!-- leave this line exactly as it is --> <pre>
#
# Syntax is as follows:
# * Everything from a "#" character to the end of the line is a comment
# * Every non-blank line is a regex fragment which will only match hosts inside URLs
#</pre> <!-- leave this line exactly as it is -->',
'email-whitelist' => ' #<!-- leave this line exactly as it is --> <pre>
# Emails matching this list will *not* be blocked even if they would
# have been blocked by blacklist entries.
#
#</pre> <!-- leave this line exactly as it is -->',
# Syntax is as follows:
# * Everything from a "#" character to the end of the line is a comment
# * Every non-blank line is a regex fragment which will only match hosts inside URLs
'spam-blacklisted-email' => 'Blacklisted E-mail',
'spam-blacklisted-email-text' => 'Your e-mail address is currently blacklisted from sending e-mails to other users.',
'spam-blacklisted-email-signup' => 'The e-mail address given is currently blacklisted from use.',
'spam-invalid-lines' => "The following spam blacklist {{PLURAL:$1|line is an|lines are}} invalid regular {{PLURAL:$1|expression|expressions}} and {{PLURAL:$1|needs|need}} to be corrected before saving the page:",
'spam-blacklist-desc' => 'Regex-based anti-spam tool: [[MediaWiki:Spam-blacklist]] and [[MediaWiki:Spam-whitelist]]',
);
@ -40,6 +59,11 @@ $messages['en'] = array(
$messages['qqq'] = array(
'spam-blacklist' => "See also: [[MediaWiki:spam-whitelist]] and [[MediaWiki:captcha-addurl-whitelist]]. You can translate the text, including 'Leave this line exactly as it is'. Some lines of this messages have one (1) leading space.",
'spam-whitelist' => "See also: [[MediaWiki:spam-blacklist]] and [[MediaWiki:captcha-addurl-whitelist]]. You can translate the text, including 'Leave this line exactly as it is'. Some lines of this messages have one (1) leading space.",
'spam-blacklisted-email' => 'Title of errorpage when trying to send an email with a blacklisted email',
'spam-blacklisted-email-text' => 'Text of errorpage when trying to send an email with a blacklisted email',
'spam-blacklisted-email-signup' => 'Error when trying to create an account with an invalid email',
'spam-blacklist-desc' => '{{desc}}',
);

View file

@ -10,7 +10,7 @@ if ( !defined( 'MEDIAWIKI' ) ) {
$wgExtensionCredits[version_compare($wgVersion, '1.17alpha', '>=') ? 'antispam' : 'other'][] = array(
'path' => __FILE__,
'name' => 'SpamBlacklist',
'author' => 'Tim Starling',
'author' => array( 'Tim Starling', 'John Du Hart' ),
'url' => 'https://www.mediawiki.org/wiki/Extension:SpamBlacklist',
'descriptionmsg' => 'spam-blacklist-desc',
);
@ -41,8 +41,11 @@ $wgHooks['EditFilterMerged'][] = 'SpamBlacklistHooks::filterMerged';
$wgHooks['APIEditBeforeSave'][] = 'SpamBlacklistHooks::filterAPIEditBeforeSave';
$wgHooks['EditFilter'][] = 'SpamBlacklistHooks::validate';
$wgHooks['ArticleSaveComplete'][] = 'SpamBlacklistHooks::articleSave';
$wgHooks['UserCanSendEmail'][] = 'SpamBlacklistHooks::userCanSendEmail';
$wgHooks['AbortNewAccount'][] = 'SpamBlacklistHooks::abortNewAccount';
$wgAutoloadClasses['BaseBlacklist'] = $dir . 'BaseBlacklist.php';
$wgAutoloadClasses['EmailBlacklist'] = $dir . 'EmailBlacklist.php';
$wgAutoloadClasses['SpamBlacklistHooks'] = $dir . 'SpamBlacklistHooks.php';
$wgAutoloadClasses['SpamBlacklist'] = $dir . 'SpamBlacklist_body.php';
$wgAutoloadClasses['SpamRegexBatch'] = $dir . 'SpamRegexBatch.php';

View file

@ -56,6 +56,43 @@ class SpamBlacklistHooks {
return ( $ret === false );
}
/**
* Verify that the user can send emails
*
* @param $user User
* @param $hookErr array
* @return bool
*/
public static function userCanSendEmail( &$user, &$hookErr ) {
/** @var $blacklist EmailBlacklist */
$blacklist = BaseBlacklist::getInstance( 'email' );
if ( $blacklist->checkUser( $user ) ) {
return true;
}
$hookErr = array( 'spam-blacklisted-email', 'spam-blacklisted-email-text', null );
return false;
}
/**
* Processes new accounts for valid emails
*
* @param $user User
* @param $abortError
* @return bool
*/
public static function abortNewAccount( $user, &$abortError ) {
/** @var $blacklist EmailBlacklist */
$blacklist = BaseBlacklist::getInstance( 'email' );
if ( $blacklist->checkUser( $user ) ) {
return true;
}
$abortError = wfMessage( 'spam-blacklisted-email-signup' )->escaped();
return false;
}
/**
* Hook function for EditFilter
* Confirm that a local blacklist page being saved is valid,
@ -75,9 +112,14 @@ class SpamBlacklistHooks {
return true;
}
$type = BaseBlacklist::getTypeFromTitle( $editPage->mTitle );
if ( $type === false ) {
return true;
}
$lines = explode( "\n", $text );
$badLines = SpamRegexBatch::getBadLines( $lines );
$badLines = SpamRegexBatch::getBadLines( $lines, BaseBlacklist::getInstance( $type ) );
if( $badLines ) {
wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] given invalid input lines: " .
implode( ', ', $badLines ) . "\n" );
@ -92,11 +134,11 @@ class SpamBlacklistHooks {
$badList .
"</div>\n" .
"<br clear='all' />\n";
return true;
} else {
wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] ok or empty blacklist\n" );
return true;
}
return true;
}
/**

View file

@ -98,7 +98,7 @@ class SpamBlacklist extends BaseBlacklist {
$ip = wfGetIP();
wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: {$matches[0]}\n" );
$retVal = $matches[0];
break;
break;
}
}
} else {
@ -128,4 +128,23 @@ class SpamBlacklist extends BaseBlacklist {
}
return $links;
}
/**
* Returns the start of the regex for matches
*
* @return string
*/
public function getRegexStart() {
return '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
}
/**
* Returns the end of the regex for matches
*
* @param $batchSize
* @return string
*/
public function getRegexEnd( $batchSize ) {
return ')' . parent::getRegexEnd( $batchSize );
}
}

View file

@ -13,14 +13,14 @@ class SpamRegexBatch {
* if 0, will produce one regex per line
* @return array
*/
static function buildRegexes( $lines, $batchSize=4096 ) {
static function buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize=4096 ) {
# Make regex
# It's faster using the S modifier even though it will usually only be run once
//$regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')';
//return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim';
$regexes = array();
$regexStart = '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
$regexEnd = ($batchSize > 0 ) ? ')/Sim' : ')/im';
$regexStart = $blacklist->getRegexStart();
$regexEnd = $blacklist->getRegexEnd( $batchSize );
$build = false;
foreach( $lines as $line ) {
if( substr( $line, -1, 1 ) == "\\" ) {
@ -90,9 +90,9 @@ class SpamRegexBatch {
* @param $fileName string optional for debug reporting
* @return array of regexes
*/
static function buildSafeRegexes( $lines, $fileName=false ) {
static function buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName=false ) {
$lines = SpamRegexBatch::stripLines( $lines );
$regexes = SpamRegexBatch::buildRegexes( $lines );
$regexes = SpamRegexBatch::buildRegexes( $lines, $blacklist );
if( SpamRegexBatch::validateRegexes( $regexes ) ) {
return $regexes;
} else {
@ -102,7 +102,7 @@ class SpamRegexBatch {
if( $fileName ) {
wfDebugLog( 'SpamBlacklist', "Spam blacklist warning: bogus line in $fileName\n" );
}
return SpamRegexBatch::buildRegexes( $lines, 0 );
return SpamRegexBatch::buildRegexes( $lines, $blacklist, 0 );
}
}
@ -112,7 +112,7 @@ class SpamRegexBatch {
* @param array $lines
* @return array of input lines which produce invalid input, or empty array if no problems
*/
static function getBadLines( $lines ) {
static function getBadLines( $lines, BaseBlacklist $blacklist ) {
$lines = SpamRegexBatch::stripLines( $lines );
$badLines = array();
@ -123,7 +123,7 @@ class SpamRegexBatch {
}
}
$regexes = SpamRegexBatch::buildRegexes( $lines );
$regexes = SpamRegexBatch::buildRegexes( $lines, $blacklist );
if( SpamRegexBatch::validateRegexes( $regexes ) ) {
// No other problems!
return $badLines;
@ -131,7 +131,7 @@ class SpamRegexBatch {
// Something failed in the batch, so check them one by one.
foreach( $lines as $line ) {
$regexes = SpamRegexBatch::buildRegexes( array( $line ) );
$regexes = SpamRegexBatch::buildRegexes( array( $line ), $blacklist );
if( !SpamRegexBatch::validateRegexes( $regexes ) ) {
$badLines[] = $line;
}
@ -147,9 +147,9 @@ class SpamRegexBatch {
* @param $fileName bool|string optional, for reporting of bad files
* @return array of regular expressions, potentially empty
*/
static function regexesFromText( $source, $fileName=false ) {
static function regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false ) {
$lines = explode( "\n", $source );
return SpamRegexBatch::buildSafeRegexes( $lines, $fileName );
return SpamRegexBatch::buildSafeRegexes( $lines, $blacklist, $fileName );
}
/**
@ -159,10 +159,10 @@ class SpamRegexBatch {
* @param $message string
* @return array of regular expressions, potentially empty
*/
static function regexesFromMessage( $message ) {
static function regexesFromMessage( $message, BaseBlacklist $blacklist ) {
$source = wfMsgForContent( $message );
if( $source && !wfEmptyMsg( $message, $source ) ) {
return SpamRegexBatch::regexesFromText( $source );
return SpamRegexBatch::regexesFromText( $source, $blacklist );
} else {
return array();
}