mediawiki-extensions-SpamBl.../SpamBlacklist_body.php

<?php

if ( !defined( 'MEDIAWIKI' ) ) {
	exit;
}

class SpamBlacklist extends BaseBlacklist {
	var $files = array( "http://meta.wikimedia.org/w/index.php?title=Spam_blacklist&action=raw&sb_ver=1" );
	var $ignoreEditSummary = false;

	/**
	 * Returns the code for the blacklist implementation
	 *
	 * @return string
	 */
	protected function getBlacklistType() {
		return 'spam';
	}

	/**
	 * @param Title $title
	 * @param string $text Text of section, or entire text if $editPage!=false
	 * @param string $section Section number or name
	 * @param string $editsummary Edit summary if one exists, some people use urls there too
	 * @param EditPage $editPage EditPage if EditFilterMerged was called, null otherwise
	 * @return Array Matched text(s) if the edit should not be allowed, false otherwise
	 */
	function filter( &$title, $text, $section, $editsummary = '', EditPage &$editPage = null ) {
		/**
		 * @var $wgParser Parser
		 */
		global $wgParser, $wgUser;

		$fname = 'wfSpamBlacklistFilter';
		wfProfileIn( $fname );

		# These don't do anything, commenting out...
		#$this->title = $title;
		#$this->text = $text;
		#$this->section = $section;
		$text = str_replace( '．', '.', $text ); //@bug 12896

		$blacklists = $this->getBlacklists();
		$whitelists = $this->getWhitelists();

		if ( count( $blacklists ) ) {
			# Run parser to strip SGML comments and such out of the markup
			# This was being used to circumvent the filter (see bug 5185)
			if ( $editPage ) {
				$editInfo = $editPage->mArticle->prepareTextForEdit( $text );
				$out = $editInfo->output;
			} else {
				$options = new ParserOptions();
				$text = $wgParser->preSaveTransform( $text, $title, $wgUser, $options );
				$out = $wgParser->parse( $text, $title, $options );
			}
			$newLinks = array_keys( $out->getExternalLinks() );
			$oldLinks = $this->getCurrentLinks( $title );
			$addedLinks = array_diff( $newLinks, $oldLinks );

			// We add the edit summary if one exists
			if ( !$this->ignoreEditSummary && !empty( $editsummary ) ) {
				$addedLinks[] = $editsummary;
			}

			wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) );
			wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) );
			wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) );

			$links = implode( "\n", $addedLinks );

			# Strip whitelisted URLs from the match
			if( is_array( $whitelists ) ) {
				wfDebugLog( 'SpamBlacklist', "Excluding whitelisted URLs from " . count( $whitelists ) .
					" regexes: " . implode( ', ', $whitelists ) . "\n" );
				foreach( $whitelists as $regex ) {
					wfSuppressWarnings();
					$newLinks = preg_replace( $regex, '', $links );
					wfRestoreWarnings();
					if( is_string( $newLinks ) ) {
						// If there wasn't a regex error, strip the matching URLs
						$links = $newLinks;
					}
				}
			}

			# Do the match
			wfDebugLog( 'SpamBlacklist', "Checking text against " . count( $blacklists ) .
				" regexes: " . implode( ', ', $blacklists ) . "\n" );
			$retVal = false;
			foreach( $blacklists as $regex ) {
				wfSuppressWarnings();
				$matches = array();
				$check = ( preg_match_all( $regex, $links, $matches ) > 0 );
				wfRestoreWarnings();
				if( $check ) {
					wfDebugLog( 'SpamBlacklist', "Match!\n" );
					global $wgRequest;
					$ip = $wgRequest->getIP();
					$imploded = implode( ' ', $matches[0] );
					wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: $imploded\n" );
					if( $retVal === false ){
						$retVal = array();
					}
					$retVal = array_merge( $retVal, $matches[0] );
				}
			}
		} else {
			$retVal = false;
		}
		wfProfileOut( $fname );
		return $retVal;
	}

	/**
	 * Look up the links currently in the article, so we can
	 * ignore them on a second run.
	 *
	 * WARNING: I can add more *of the same link* with no problem here.
	 * @param $title Title
	 * @return array
	 */
	function getCurrentLinks( $title ) {
		$dbr = wfGetDB( DB_SLAVE );
		$id = $title->getArticleID(); // should be zero queries
		$res = $dbr->select( 'externallinks', array( 'el_to' ),
			array( 'el_from' => $id ), __METHOD__ );
		$links = array();
		foreach ( $res as $row ) {
			$links[] = $row->el_to;
		}
		return $links;
	}

	/**
	 * Returns the start of the regex for matches
	 *
	 * @return string
	 */
	public function getRegexStart() {
		return '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
	}

	/**
	 * Returns the end of the regex for matches
	 *
	 * @param $batchSize
	 * @return string
	 */
	public function getRegexEnd( $batchSize ) {
		return ')' . parent::getRegexEnd( $batchSize );
	}
}
-												from phase3/extensions

											
										
										
											2004-12-11 09:59:06 +00:00
+								<?php
-												* Optimised startup
* Use the new EditFilterMerged hook if available, for faster link finding
* Random bits of code were leaking out of the body file into the loader, poked them back in.

											
										
										
											2007-11-12 07:44:17 +00:00
+								if ( !defined( 'MEDIAWIKI' ) ) {
 									exit;
 								}
-												from phase3/extensions

											
										
										
											2004-12-11 09:59:06 +00:00
-												Refactored SpamBlacklist to be extendable for other blacklist types

This is the groundwork for Bug 33761

											
										
										
											2012-01-17 06:13:46 +00:00
+								class SpamBlacklist extends BaseBlacklist {
-												Some polishing and refactoring on this monstrosity, it's been allowed to grow without some good snipping in a while. :)
* Handle bad regexes more gracefully:
 - The batched regexes are tested for validity, and if one is bad, the lines from that source are broken out line-by-line. This is slower, but the other lines in that source will still be applied correctly.
 - Suppress warnings and be more verbose in the debug log.
 - Check for bad regexes when a local blacklist page is edited, and prompt the user to fix the bad lines.
* Caching issues:
 - Cache the full regexes per-DB instead of per-site; this should be friendlier to shared environments where not every wiki has the same configuration.
 - Hopefully improve the recaching of local pages, which looked like it would preemptively apply the being-edited text to the cache during the filter callback, even though something else might stop the page from being saved. Now just clearing the cache after save is complete, letting it re-load later.
* Split out some of the regex batch functions for clarity.

There are probably still issues with caching of HTTP bits, and in general the local DB loading looks verrrry fragile.
Test this a bit more before syncing. :)

											
										
										
											2007-07-20 21:13:26 +00:00
+									var $files = array( "http://meta.wikimedia.org/w/index.php?title=Spam_blacklist&action=raw&sb_ver=1" );
-												PLEASE TEST: Bug #26332 — Patch that I think should fix the problem
  according to the comments, but needs more testing

* Also, a one line w/s fix up

											
										
										
											2011-05-03 20:23:35 +00:00
+									var $ignoreEditSummary = false;
-												remove some ending whitespaces

											
										
										
											2007-01-06 20:56:46 +00:00
-												Some polishing and refactoring on this monstrosity, it's been allowed to grow without some good snipping in a while. :)
* Handle bad regexes more gracefully:
 - The batched regexes are tested for validity, and if one is bad, the lines from that source are broken out line-by-line. This is slower, but the other lines in that source will still be applied correctly.
 - Suppress warnings and be more verbose in the debug log.
 - Check for bad regexes when a local blacklist page is edited, and prompt the user to fix the bad lines.
* Caching issues:
 - Cache the full regexes per-DB instead of per-site; this should be friendlier to shared environments where not every wiki has the same configuration.
 - Hopefully improve the recaching of local pages, which looked like it would preemptively apply the being-edited text to the cache during the filter callback, even though something else might stop the page from being saved. Now just clearing the cache after save is complete, letting it re-load later.
* Split out some of the regex batch functions for clarity.

There are probably still issues with caching of HTTP bits, and in general the local DB loading looks verrrry fragile.
Test this a bit more before syncing. :)

											
										
										
											2007-07-20 21:13:26 +00:00
+									/**
-												Refactored SpamBlacklist to be extendable for other blacklist types

This is the groundwork for Bug 33761

											
										
										
											2012-01-17 06:13:46 +00:00
+									 * Returns the code for the blacklist implementation
 									 *
 									 * @return string
-												Some polishing and refactoring on this monstrosity, it's been allowed to grow without some good snipping in a while. :)
* Handle bad regexes more gracefully:
 - The batched regexes are tested for validity, and if one is bad, the lines from that source are broken out line-by-line. This is slower, but the other lines in that source will still be applied correctly.
 - Suppress warnings and be more verbose in the debug log.
 - Check for bad regexes when a local blacklist page is edited, and prompt the user to fix the bad lines.
* Caching issues:
 - Cache the full regexes per-DB instead of per-site; this should be friendlier to shared environments where not every wiki has the same configuration.
 - Hopefully improve the recaching of local pages, which looked like it would preemptively apply the being-edited text to the cache during the filter callback, even though something else might stop the page from being saved. Now just clearing the cache after save is complete, letting it re-load later.
* Split out some of the regex batch functions for clarity.

There are probably still issues with caching of HTTP bits, and in general the local DB loading looks verrrry fragile.
Test this a bit more before syncing. :)

											
										
										
											2007-07-20 21:13:26 +00:00
+									 */
-												Refactored SpamBlacklist to be extendable for other blacklist types

This is the groundwork for Bug 33761

											
										
										
											2012-01-17 06:13:46 +00:00
+									protected function getBlacklistType() {
 										return 'spam';
-												Add a local whitelist, editable by admins at [[MediaWiki:Spam-whitelist]]

											
										
										
											2006-06-22 19:59:43 +00:00
+									}
-												remove some ending whitespaces

											
										
										
											2007-01-06 20:56:46 +00:00
-												* Optimised startup
* Use the new EditFilterMerged hook if available, for faster link finding
* Random bits of code were leaking out of the body file into the loader, poked them back in.

											
										
										
											2007-11-12 07:44:17 +00:00
+									/**
 									 * @param Title $title
 									 * @param string $text Text of section, or entire text if $editPage!=false
 									 * @param string $section Section number or name
-												Maintenance for SpamBlacklist extension.

* Replace deprecated methods. MediaWiki 1.19 required.
* Replace <tt> with <code>.
* Update documentation.
* Use WikiPage instead of Article for doEdit().
* Use __DIR__ instead of dirname( __FILE__ ).
* Remove superfluous newlines.

Change-Id: I3a0e42ca404638f7c7934c316735ad11cbc99d42

											
										
										
											2012-09-02 15:41:39 +00:00
+									 * @param string $editsummary Edit summary if one exists, some people use urls there too
-												Fix mixed up params

											
										
										
											2008-08-16 21:40:30 +00:00
+									 * @param EditPage $editPage EditPage if EditFilterMerged was called, null otherwise
-												Other half of fix for bug #30332 ("API spamblocklist error should
provide all blocked URLs").

SpamBlacklist extension to provide all matched URLs to
spamPageWithContent() rather than just one. Performance
hit negligible and zero for all edits that don't hit the
SpamBlacklist (99.999%+).

DEPENDENT ON OTHER HALF OF FIX (now in core):
https://gerrit.wikimedia.org/r/3740

Change-Id: Ia951d5795c5cedb6c3876be89f8a08f110004102

											
										
										
											2012-03-27 20:42:49 +00:00
+									 * @return Array Matched text(s) if the edit should not be allowed, false otherwise
-												* Optimised startup
* Use the new EditFilterMerged hook if available, for faster link finding
* Random bits of code were leaking out of the body file into the loader, poked them back in.

											
										
										
											2007-11-12 07:44:17 +00:00
+									 */
-												Fix mixed up params

											
										
										
											2008-08-16 21:40:30 +00:00
+									function filter( &$title, $text, $section, $editsummary = '', EditPage &$editPage = null ) {
-												Refactored SpamBlacklist to be extendable for other blacklist types

This is the groundwork for Bug 33761

											
										
										
											2012-01-17 06:13:46 +00:00
+										/**
 										 * @var $wgParser Parser
 										 */
-												Remove some more unused globals

Kill a couple of other unused variables

											
										
										
											2010-07-25 17:12:50 +00:00
+										global $wgParser, $wgUser;
-												split the regex fetching part of the filter into its own function

											
										
										
											2006-01-19 17:14:10 +00:00
 										$fname = 'wfSpamBlacklistFilter';
 										wfProfileIn( $fname );
-												Refactored SpamBlacklist to be extendable for other blacklist types

This is the groundwork for Bug 33761

											
										
										
											2012-01-17 06:13:46 +00:00
+										# These don't do anything, commenting out...
 										#$this->title = $title;
 										#$this->text = $text;
 										#$this->section = $section;
-												* (bug 12896) A way to bypass Spam Blacklist

											
										
										
											2008-02-03 18:58:27 +00:00
+										$text = str_replace( '．', '.', $text ); //@bug 12896
-												Updated DB: for the 1.5 schema, fixed a few bugs

											
										
										
											2006-01-23 01:35:39 +00:00
-												Add a local blacklist at MediaWiki:Spam-blacklist which can always be used, just as the local whitelist at MediaWiki:Spam-whitelist.
Should save some trouble for annoyed people. :)
The regular message cache behavior is used for this message, so it'll also update immediately, without waiting for the shared caches to time out.
Additionally, added a fix for configurations which don't hardcode the PHP include_path by using $IP in an include for HttpFunctions.php.

											
										
										
											2007-07-07 17:21:49 +00:00
+										$blacklists = $this->getBlacklists();
-												Split giant regexes so PCRE stops screaming about them.
Haven't tested cleanup.php

											
										
										
											2006-09-18 09:56:57 +00:00
+										$whitelists = $this->getWhitelists();
-												remove some ending whitespaces

											
										
										
											2007-01-06 20:56:46 +00:00
-												Add a local blacklist at MediaWiki:Spam-blacklist which can always be used, just as the local whitelist at MediaWiki:Spam-whitelist.
Should save some trouble for annoyed people. :)
The regular message cache behavior is used for this message, so it'll also update immediately, without waiting for the shared caches to time out.
Additionally, added a fix for configurations which don't hardcode the PHP include_path by using $IP in an include for HttpFunctions.php.

											
										
										
											2007-07-07 17:21:49 +00:00
+										if ( count( $blacklists ) ) {
-												Run text through the parser and get the actual links recorded instead of trying to second-guess behavior

											
										
										
											2006-06-22 20:35:49 +00:00
+											# Run parser to strip SGML comments and such out of the markup
-												(bug 5185) Strip out SGML comments before scanning the text for matches so some nutter can't circumvent the lot with a well placed <!-- -->

											
										
										
											2006-04-12 04:59:27 +00:00
+											# This was being used to circumvent the filter (see bug 5185)
-												* Optimised startup
* Use the new EditFilterMerged hook if available, for faster link finding
* Random bits of code were leaking out of the body file into the loader, poked them back in.

											
										
										
											2007-11-12 07:44:17 +00:00
+											if ( $editPage ) {
 												$editInfo = $editPage->mArticle->prepareTextForEdit( $text );
 												$out = $editInfo->output;
 											} else {
 												$options = new ParserOptions();
 												$text = $wgParser->preSaveTransform( $text, $title, $wgUser, $options );
 												$out = $wgParser->parse( $text, $title, $options );
 											}
-												* (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them.

Steals the load-existing-links function out of ConfirmEdit.

											
										
										
											2008-05-13 23:31:33 +00:00
+											$newLinks = array_keys( $out->getExternalLinks() );
 											$oldLinks = $this->getCurrentLinks( $title );
 											$addedLinks = array_diff( $newLinks, $oldLinks );
-												(bug 16120) Prevent death on Spam Blacklist trigger using API. Patch by Brad Jorsch.

An API edit attempt with Spam Blacklist firing will now output something instead of crashing:

<?xml version="1.0"?><api><edit spamblacklist="http://blacklistme.example.com"
result="Failure" /></api>

											
										
										
											2008-11-02 22:40:02 +00:00
-												And now SpamBlacklist checks the edit summary field.

											
										
										
											2008-06-19 03:14:34 +00:00
+											// We add the edit summary if one exists
-												More undefined variables

											
										
										
											2011-01-23 10:34:56 +00:00
+											if ( !$this->ignoreEditSummary && !empty( $editsummary ) ) {
 												$addedLinks[] = $editsummary;
 											}
-												(bug 16120) Prevent death on Spam Blacklist trigger using API. Patch by Brad Jorsch.

An API edit attempt with Spam Blacklist firing will now output something instead of crashing:

<?xml version="1.0"?><api><edit spamblacklist="http://blacklistme.example.com"
result="Failure" /></api>

											
										
										
											2008-11-02 22:40:02 +00:00
-												* (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them.

Steals the load-existing-links function out of ConfirmEdit.

											
										
										
											2008-05-13 23:31:33 +00:00
+											wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) );
 											wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) );
 											wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) );
-												(bug 16120) Prevent death on Spam Blacklist trigger using API. Patch by Brad Jorsch.

An API edit attempt with Spam Blacklist firing will now output something instead of crashing:

<?xml version="1.0"?><api><edit spamblacklist="http://blacklistme.example.com"
result="Failure" /></api>

											
										
										
											2008-11-02 22:40:02 +00:00
-												* (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them.

Steals the load-existing-links function out of ConfirmEdit.

											
										
										
											2008-05-13 23:31:33 +00:00
+											$links = implode( "\n", $addedLinks );
-												remove some ending whitespaces

											
										
										
											2007-01-06 20:56:46 +00:00
-												Add a local whitelist, editable by admins at [[MediaWiki:Spam-whitelist]]

											
										
										
											2006-06-22 19:59:43 +00:00
+											# Strip whitelisted URLs from the match
-												Split giant regexes so PCRE stops screaming about them.
Haven't tested cleanup.php

											
										
										
											2006-09-18 09:56:57 +00:00
+											if( is_array( $whitelists ) ) {
-												Break spam blacklist log info out to a sep file

											
										
										
											2007-10-03 00:19:36 +00:00
+												wfDebugLog( 'SpamBlacklist', "Excluding whitelisted URLs from " . count( $whitelists ) .
-												Split giant regexes so PCRE stops screaming about them.
Haven't tested cleanup.php

											
										
										
											2006-09-18 09:56:57 +00:00
+													" regexes: " . implode( ', ', $whitelists ) . "\n" );
 												foreach( $whitelists as $regex ) {
-												suppress warnings

											
										
										
											2007-08-08 15:42:36 +00:00
+													wfSuppressWarnings();
-												* (bug 11545) Don't let everything through if there's a bogus whitelist entry

											
										
										
											2007-10-03 00:48:57 +00:00
+													$newLinks = preg_replace( $regex, '', $links );
-												suppress warnings

											
										
										
											2007-08-08 15:42:36 +00:00
+													wfRestoreWarnings();
-												* (bug 11545) Don't let everything through if there's a bogus whitelist entry

											
										
										
											2007-10-03 00:48:57 +00:00
+													if( is_string( $newLinks ) ) {
 														// If there wasn't a regex error, strip the matching URLs
 														$links = $newLinks;
 													}
-												Split giant regexes so PCRE stops screaming about them.
Haven't tested cleanup.php

											
										
										
											2006-09-18 09:56:57 +00:00
+												}
-												Add a local whitelist, editable by admins at [[MediaWiki:Spam-whitelist]]

											
										
										
											2006-06-22 19:59:43 +00:00
+											}
-												(bug 5185) Strip out SGML comments before scanning the text for matches so some nutter can't circumvent the lot with a well placed <!-- -->

											
										
										
											2006-04-12 04:59:27 +00:00
-												from phase3/extensions

											
										
										
											2004-12-11 09:59:06 +00:00
+											# Do the match
-												Break spam blacklist log info out to a sep file

											
										
										
											2007-10-03 00:19:36 +00:00
+											wfDebugLog( 'SpamBlacklist', "Checking text against " . count( $blacklists ) .
-												Add a local blacklist at MediaWiki:Spam-blacklist which can always be used, just as the local whitelist at MediaWiki:Spam-whitelist.
Should save some trouble for annoyed people. :)
The regular message cache behavior is used for this message, so it'll also update immediately, without waiting for the shared caches to time out.
Additionally, added a fix for configurations which don't hardcode the PHP include_path by using $IP in an include for HttpFunctions.php.

											
										
										
											2007-07-07 17:21:49 +00:00
+												" regexes: " . implode( ', ', $blacklists ) . "\n" );
-												Split giant regexes so PCRE stops screaming about them.
Haven't tested cleanup.php

											
										
										
											2006-09-18 09:56:57 +00:00
+											$retVal = false;
-												Add a local blacklist at MediaWiki:Spam-blacklist which can always be used, just as the local whitelist at MediaWiki:Spam-whitelist.
Should save some trouble for annoyed people. :)
The regular message cache behavior is used for this message, so it'll also update immediately, without waiting for the shared caches to time out.
Additionally, added a fix for configurations which don't hardcode the PHP include_path by using $IP in an include for HttpFunctions.php.

											
										
										
											2007-07-07 17:21:49 +00:00
+											foreach( $blacklists as $regex ) {
-												Some polishing and refactoring on this monstrosity, it's been allowed to grow without some good snipping in a while. :)
* Handle bad regexes more gracefully:
 - The batched regexes are tested for validity, and if one is bad, the lines from that source are broken out line-by-line. This is slower, but the other lines in that source will still be applied correctly.
 - Suppress warnings and be more verbose in the debug log.
 - Check for bad regexes when a local blacklist page is edited, and prompt the user to fix the bad lines.
* Caching issues:
 - Cache the full regexes per-DB instead of per-site; this should be friendlier to shared environments where not every wiki has the same configuration.
 - Hopefully improve the recaching of local pages, which looked like it would preemptively apply the being-edited text to the cache during the filter callback, even though something else might stop the page from being saved. Now just clearing the cache after save is complete, letting it re-load later.
* Split out some of the regex batch functions for clarity.

There are probably still issues with caching of HTTP bits, and in general the local DB loading looks verrrry fragile.
Test this a bit more before syncing. :)

											
										
										
											2007-07-20 21:13:26 +00:00
+												wfSuppressWarnings();
-												More undefined variables

											
										
										
											2011-01-23 10:34:56 +00:00
+												$matches = array();
-												Other half of fix for bug #30332 ("API spamblocklist error should
provide all blocked URLs").

SpamBlacklist extension to provide all matched URLs to
spamPageWithContent() rather than just one. Performance
hit negligible and zero for all edits that don't hit the
SpamBlacklist (99.999%+).

DEPENDENT ON OTHER HALF OF FIX (now in core):
https://gerrit.wikimedia.org/r/3740

Change-Id: Ia951d5795c5cedb6c3876be89f8a08f110004102

											
										
										
											2012-03-27 20:42:49 +00:00
+												$check = ( preg_match_all( $regex, $links, $matches ) > 0 );
-												Some polishing and refactoring on this monstrosity, it's been allowed to grow without some good snipping in a while. :)
* Handle bad regexes more gracefully:
 - The batched regexes are tested for validity, and if one is bad, the lines from that source are broken out line-by-line. This is slower, but the other lines in that source will still be applied correctly.
 - Suppress warnings and be more verbose in the debug log.
 - Check for bad regexes when a local blacklist page is edited, and prompt the user to fix the bad lines.
* Caching issues:
 - Cache the full regexes per-DB instead of per-site; this should be friendlier to shared environments where not every wiki has the same configuration.
 - Hopefully improve the recaching of local pages, which looked like it would preemptively apply the being-edited text to the cache during the filter callback, even though something else might stop the page from being saved. Now just clearing the cache after save is complete, letting it re-load later.
* Split out some of the regex batch functions for clarity.

There are probably still issues with caching of HTTP bits, and in general the local DB loading looks verrrry fragile.
Test this a bit more before syncing. :)

											
										
										
											2007-07-20 21:13:26 +00:00
+												wfRestoreWarnings();
 												if( $check ) {
-												Break spam blacklist log info out to a sep file

											
										
										
											2007-10-03 00:19:36 +00:00
+													wfDebugLog( 'SpamBlacklist', "Match!\n" );
-												Maintenance for SpamBlacklist extension.

* Replace deprecated methods. MediaWiki 1.19 required.
* Replace <tt> with <code>.
* Update documentation.
* Use WikiPage instead of Article for doEdit().
* Use __DIR__ instead of dirname( __FILE__ ).
* Remove superfluous newlines.

Change-Id: I3a0e42ca404638f7c7934c316735ad11cbc99d42

											
										
										
											2012-09-02 15:41:39 +00:00
+													global $wgRequest;
 													$ip = $wgRequest->getIP();
-												Other half of fix for bug #30332 ("API spamblocklist error should
provide all blocked URLs").

SpamBlacklist extension to provide all matched URLs to
spamPageWithContent() rather than just one. Performance
hit negligible and zero for all edits that don't hit the
SpamBlacklist (99.999%+).

DEPENDENT ON OTHER HALF OF FIX (now in core):
https://gerrit.wikimedia.org/r/3740

Change-Id: Ia951d5795c5cedb6c3876be89f8a08f110004102

											
										
										
											2012-03-27 20:42:49 +00:00
+													$imploded = implode( ' ', $matches[0] );
 													wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: $imploded\n" );
 													if( $retVal === false ){
 														$retVal = array();
 													}
 													$retVal = array_merge( $retVal, $matches[0] );
-												Split giant regexes so PCRE stops screaming about them.
Haven't tested cleanup.php

											
										
										
											2006-09-18 09:56:57 +00:00
+												}
-												from phase3/extensions

											
										
										
											2004-12-11 09:59:06 +00:00
+											}
 										} else {
 											$retVal = false;
 										}
 										wfProfileOut( $fname );
 										return $retVal;
 									}
-												(bug 16120) Prevent death on Spam Blacklist trigger using API. Patch by Brad Jorsch.

An API edit attempt with Spam Blacklist firing will now output something instead of crashing:

<?xml version="1.0"?><api><edit spamblacklist="http://blacklistme.example.com"
result="Failure" /></api>

											
										
										
											2008-11-02 22:40:02 +00:00
-												* (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them.

Steals the load-existing-links function out of ConfirmEdit.

											
										
										
											2008-05-13 23:31:33 +00:00
+									/**
 									 * Look up the links currently in the article, so we can
 									 * ignore them on a second run.
 									 *
 									 * WARNING: I can add more *of the same link* with no problem here.
-												Refactored SpamBlacklist to be extendable for other blacklist types

This is the groundwork for Bug 33761

											
										
										
											2012-01-17 06:13:46 +00:00
+									 * @param $title Title
 									 * @return array
-												* (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them.

Steals the load-existing-links function out of ConfirmEdit.

											
										
										
											2008-05-13 23:31:33 +00:00
+									 */
 									function getCurrentLinks( $title ) {
-												Get rid of the last (I think) php4-style calls to wfGetDB()

											
										
										
											2010-02-13 23:03:40 +00:00
+										$dbr = wfGetDB( DB_SLAVE );
-												Bug 35156 - Harmonise spelling of getArticleID() and getArticleId()


Mass change ->getArticleId() to ->getArticleID()

											
										
										
											2012-03-11 19:04:37 +00:00
+										$id = $title->getArticleID(); // should be zero queries
-												(bug 16120) Prevent death on Spam Blacklist trigger using API. Patch by Brad Jorsch.

An API edit attempt with Spam Blacklist firing will now output something instead of crashing:

<?xml version="1.0"?><api><edit spamblacklist="http://blacklistme.example.com"
result="Failure" /></api>

											
										
										
											2008-11-02 22:40:02 +00:00
+										$res = $dbr->select( 'externallinks', array( 'el_to' ),
-												* (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them.

Steals the load-existing-links function out of ConfirmEdit.

											
										
										
											2008-05-13 23:31:33 +00:00
+											array( 'el_from' => $id ), __METHOD__ );
 										$links = array();
-												Conditionals in loops to foreachs

											
										
										
											2010-10-29 21:30:20 +00:00
+										foreach ( $res as $row ) {
-												* (bug 1505) Limit spam blacklist checks to new URLs to reduce disruption of existing pages being legitimately edited by legitimate people which happen to already have some spam on them.

Steals the load-existing-links function out of ConfirmEdit.

											
										
										
											2008-05-13 23:31:33 +00:00
+											$links[] = $row->el_to;
 										}
 										return $links;
 									}
-												Adding Email blacklisting to the SpamBlacklist extension

This relies on r109111

											
										
										
											2012-01-18 23:29:37 +00:00
 									/**
 									 * Returns the start of the regex for matches
 									 *
 									 * @return string
 									 */
 									public function getRegexStart() {
-												(Bug 35023) The spam blacklist doesn't act on protocol-relative links.

Change-Id: Ibe15cdf62d0099f10fb73f56ce0dfee2abac7f35

											
										
										
											2012-07-14 17:40:05 +00:00
+										return '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
-												Adding Email blacklisting to the SpamBlacklist extension

This relies on r109111

											
										
										
											2012-01-18 23:29:37 +00:00
+									}
 									/**
 									 * Returns the end of the regex for matches
 									 *
 									 * @param $batchSize
 									 * @return string
 									 */
 									public function getRegexEnd( $batchSize ) {
 										return ')' . parent::getRegexEnd( $batchSize );
 									}
-												Manually apply r110682 to trunk

											
										
										
											2012-02-03 20:15:02 +00:00
+								}