mediawiki-extensions-SpamBl.../SpamBlacklist_body.php

117 lines
2.9 KiB
PHP
Raw Normal View History

2004-12-11 09:59:06 +00:00
<?php
if ( defined( 'MEDIAWIKI' ) ) {
class SpamBlacklist {
var $regex = false;
var $previousFilter = false;
var $files = array();
function SpamBlacklist() {
global $IP;
$this->files = array( "$IP/extensions/SpamBlacklist/wikimedia_blacklist" );
}
function filter( &$title, $text, $section ) {
global $wgArticle, $wgDBname, $wgMemc, $wgOut;
$fname = 'wfSpamBlacklistFilter';
wfProfileIn( $fname );
# Call the rest of the hook chain first
if ( $this->previousFilter ) {
$f = $this->previousFilter;
if ( $f( $title, $text, $section ) ) {
wfProfileOut( $fname );
return true;
}
}
if ( !is_array( $this->files ) ) {
$this->files = array( $this->files );
}
if ( count( $this->files ) == 0 ){
# No lists
wfProfileOut( $fname );
return false;
}
# Refresh cache if we are saving the blacklist
$recache = false;
foreach ( $this->files as $fileName ) {
if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
2005-02-20 09:09:29 +00:00
if ( $wgDBname == $matches[1] && $title->getPrefixedDBkey() == $matches[2] ) {
2004-12-11 09:59:06 +00:00
$recache = true;
break;
}
}
}
if ( $this->regex === false || $recache ) {
if ( !$recache ) {
$this->regex = $wgMemc->get( "spam_blacklist_regex" );
}
if ( !$this->regex ) {
# Load lists
$lines = array();
foreach ( $this->files as $fileName ) {
if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
2005-02-20 09:03:13 +00:00
if ( $wgDBname == $matches[1] && $title->getPrefixedDBkey() == $matches[2] ) {
2004-12-11 09:59:06 +00:00
$lines += explode( "\n", $text );
} else {
2005-02-20 09:09:29 +00:00
$lines += $this->getArticleLines( $matches[1], $matches[2] );
2004-12-11 09:59:06 +00:00
}
} else {
$lines += file( $fileName );
}
}
# Strip comments and whitespace, then remove blanks
$lines = array_filter( array_map( 'trim', preg_replace( '/#.*$/', '', $lines ) ) );
# No lines, don't make a regex which will match everything
if ( count( $lines ) == 0 ) {
$this->regex = true;
} else {
# Make regex
# It's faster using the S modifier even though it will usually only be run once
$this->regex = 'http://[a-z0-9\-.]*(' . implode( '|', $lines ) . ')';
2005-02-20 08:02:18 +00:00
$this->regex = '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $this->regex) ) . '/Si';
2004-12-11 09:59:06 +00:00
}
2004-12-11 11:12:00 +00:00
$wgMemc->set( "spam_blacklist_regex", $this->regex, 3600 );
2004-12-11 09:59:06 +00:00
}
}
if ( $this->regex !== true ) {
# Do the match
if ( preg_match( $this->regex, $text, $matches ) ) {
EditPage::spamPage( $matches[0] );
$retVal = true;
} else {
$retVal = false;
}
} else {
$retVal = false;
}
wfProfileOut( $fname );
return $retVal;
}
function getArticleLines( $db, $article ) {
$dbr = wfGetDB( DB_READ );
2005-03-09 14:17:22 +00:00
$cur = $dbr->tableName( 'cur' );
2005-03-09 14:20:35 +00:00
$res = $dbr->query( "SELECT cur_text FROM $db.$cur WHERE cur_namespace=0 AND cur_title='$article'" );
2004-12-11 09:59:06 +00:00
$row = $dbr->fetchObject( $res );
if ( $row ) {
return explode( "\n", $row->cur_text );
} else {
return array();
}
}
}
} # End invocation guard
?>