mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-11-23 21:53:35 +00:00
Add ccnorm_contains_any function
Normalize and search a string for multiple substrings Bug: T65242 Change-Id: I4034c0054a6849babbf2d96ea13dc97d3660d5b4
This commit is contained in:
parent
f00de10b24
commit
2bc8873c30
|
@ -225,6 +225,7 @@
|
|||
"abusefilter-edit-builder-funcs-lcase": "To lower case (lcase)",
|
||||
"abusefilter-edit-builder-funcs-ucase": "To upper case (ucase)",
|
||||
"abusefilter-edit-builder-funcs-ccnorm": "Normalize confusable characters (ccnorm)",
|
||||
"abusefilter-edit-builder-funcs-ccnorm-contains-any": "Normalize and search a string for multiple substrings (ccnorm_contains_any)",
|
||||
"abusefilter-edit-builder-funcs-rmdoubles": "Remove double-characters (rmdoubles)",
|
||||
"abusefilter-edit-builder-funcs-specialratio": "Special characters / total characters (specialratio)",
|
||||
"abusefilter-edit-builder-funcs-norm": "Normalize (norm)",
|
||||
|
|
|
@ -256,6 +256,7 @@
|
|||
"abusefilter-edit-builder-funcs-lcase": "{{doc-important|Do not translate \"'''lcase'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
|
||||
"abusefilter-edit-builder-funcs-ucase": "{{doc-important|Do not translate \"'''ucase'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
|
||||
"abusefilter-edit-builder-funcs-ccnorm": "{{doc-important|Do not translate \"'''ccnorm'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
|
||||
"abusefilter-edit-builder-funcs-ccnorm-contains-any": "{{doc-important|Do not translate \"'''ccnorm-contains-any'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
|
||||
"abusefilter-edit-builder-funcs-rmdoubles": "{{doc-important|Do not translate \"'''rmdoubles'''\".}}\nAbuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.\n\nFunctional explanation: rmdoubles removes repeated characters in the argument, and returns the result. For example: \"foobybboo\" will return \"fobybo\".",
|
||||
"abusefilter-edit-builder-funcs-specialratio": "{{doc-important|Do not translate \"'''specialratio'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
|
||||
"abusefilter-edit-builder-funcs-norm": "{{doc-important|Do not translate \"'''norm'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
|
||||
|
|
|
@ -69,6 +69,7 @@ class AbuseFilter {
|
|||
'lcase(string)' => 'lcase',
|
||||
'ucase(string)' => 'ucase',
|
||||
'ccnorm(string)' => 'ccnorm',
|
||||
'ccnorm_contains_any(haystack,needle1,needle2,..)' => 'ccnorm-contains-any',
|
||||
'rmdoubles(string)' => 'rmdoubles',
|
||||
'specialratio(string)' => 'specialratio',
|
||||
'norm(string)' => 'norm',
|
||||
|
@ -77,7 +78,7 @@ class AbuseFilter {
|
|||
'rmwhitespace(text)' => 'rmwhitespace',
|
||||
'rmspecials(text)' => 'rmspecials',
|
||||
'ip_in_range(ip, range)' => 'ip_in_range',
|
||||
'contains_any(haystack,needle1,needle2,needle3)' => 'contains-any',
|
||||
'contains_any(haystack,needle1,needle2,...)' => 'contains-any',
|
||||
'substr(subject, offset, length)' => 'substr',
|
||||
'strpos(haystack, needle)' => 'strpos',
|
||||
'str_replace(subject, search, replace)' => 'str_replace',
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
<?php
|
||||
|
||||
use MediaWiki\Auth\AuthManager;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
|
||||
class AbuseFilterHooks {
|
||||
|
|
|
@ -19,6 +19,7 @@ class AbuseFilterParser {
|
|||
'bool' => 'castBool',
|
||||
'norm' => 'funcNorm',
|
||||
'ccnorm' => 'funcCCNorm',
|
||||
'ccnorm_contains_any' => 'funcCCNormContainsAny',
|
||||
'specialratio' => 'funcSpecialRatio',
|
||||
'rmspecials' => 'funcRMSpecials',
|
||||
'rmdoubles' => 'funcRMDoubles',
|
||||
|
@ -48,7 +49,7 @@ class AbuseFilterParser {
|
|||
'contains' => 'keywordContains',
|
||||
'rlike' => 'keywordRegex',
|
||||
'irlike' => 'keywordRegexInsensitive',
|
||||
'regex' => 'keywordRegex'
|
||||
'regex' => 'keywordRegex',
|
||||
];
|
||||
|
||||
public static $funcCache = [];
|
||||
|
@ -1074,38 +1075,85 @@ class AbuseFilterParser {
|
|||
}
|
||||
|
||||
$s = array_shift( $args );
|
||||
$s = $s->toString();
|
||||
|
||||
$searchStrings = [];
|
||||
return new AFPData( AFPData::DBOOL, self::containsAny( $s, $args ) );
|
||||
}
|
||||
|
||||
foreach ( $args as $arg ) {
|
||||
$searchStrings[] = $arg->toString();
|
||||
/**
|
||||
* Normalize and search a string for multiple substrings
|
||||
*
|
||||
* @param $args array
|
||||
* @return AFPData
|
||||
* @throws AFPUserVisibleException
|
||||
*/
|
||||
protected function funcCCNormContainsAny( $args ) {
|
||||
if ( count( $args ) < 2 ) {
|
||||
throw new AFPUserVisibleException(
|
||||
'notenoughargs',
|
||||
$this->mCur->pos,
|
||||
[ 'ccnorm_contains_any', 2, count( $args ) ]
|
||||
);
|
||||
}
|
||||
|
||||
if ( function_exists( 'fss_prep_search' ) ) {
|
||||
$fss = fss_prep_search( $searchStrings );
|
||||
$result = fss_exec_search( $fss, $s );
|
||||
$s = array_shift( $args );
|
||||
|
||||
$ok = is_array( $result );
|
||||
return new AFPData( AFPData::DBOOL, self::containsAny( $s, $args, true ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for substrings in a string
|
||||
*
|
||||
* Use normalize = true to make use of ccnorm and
|
||||
* normalize both sides of the search.
|
||||
*
|
||||
* @param AFData $string
|
||||
* @param AFData[] $values
|
||||
* @param bool $normalize
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
protected static function containsAny( $string, $values, $normalize = false ) {
|
||||
$string = $string->toString();
|
||||
|
||||
if ( $string == '' ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( $normalize ) {
|
||||
$string = self::ccnorm( $string );
|
||||
}
|
||||
|
||||
$values = array_map( function ( $val ) use ( $normalize ) {
|
||||
$str = $val->toString();
|
||||
if ( $normalize ) {
|
||||
$str = AbuseFilterParser::ccnorm( $str );
|
||||
}
|
||||
|
||||
return $str;
|
||||
}, $values );
|
||||
|
||||
if ( function_exists( 'fss_prep_search' ) ) {
|
||||
$fss = fss_prep_search( $values );
|
||||
$result = fss_exec_search( $fss, $string );
|
||||
|
||||
return ( $result !== false );
|
||||
} else {
|
||||
$ok = false;
|
||||
foreach ( $searchStrings as $needle ) {
|
||||
foreach ( $values as $needle ) {
|
||||
// Bug #60203: Keep empty parameters from causing PHP warnings
|
||||
if ( $needle !== '' && strpos( $s, $needle ) !== false ) {
|
||||
$ok = true;
|
||||
break;
|
||||
if ( $needle !== '' && strpos( $string, $needle ) !== false ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new AFPData( AFPData::DBOOL, $ok );
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $s
|
||||
* @return mixed
|
||||
*/
|
||||
protected function ccnorm( $s ) {
|
||||
protected static function ccnorm( $s ) {
|
||||
if ( is_callable( 'AntiSpoof::normalizeString' ) ) {
|
||||
$s = AntiSpoof::normalizeString( $s );
|
||||
} else {
|
||||
|
|
1
tests/parserTests/ccnorm-contains-any.r
Normal file
1
tests/parserTests/ccnorm-contains-any.r
Normal file
|
@ -0,0 +1 @@
|
|||
MATCH
|
1
tests/parserTests/ccnorm-contains-any.t
Normal file
1
tests/parserTests/ccnorm-contains-any.t
Normal file
|
@ -0,0 +1 @@
|
|||
ccnorm_contains_any("like 4ny0ne else", "foo", "aNyon3") & ccnorm_contains_any("street f1ghter","F1ght")
|
1
tests/parserTests/contains-any.r
Normal file
1
tests/parserTests/contains-any.r
Normal file
|
@ -0,0 +1 @@
|
|||
MATCH
|
1
tests/parserTests/contains-any.t
Normal file
1
tests/parserTests/contains-any.t
Normal file
|
@ -0,0 +1 @@
|
|||
contains_any("like anyone else", "else", "someone") & contains_any("street fighter", "fight")
|
|
@ -55,7 +55,7 @@ class AbuseFilterParserTest extends MediaWikiTestCase {
|
|||
* @dataProvider readTests
|
||||
*/
|
||||
public function testParser( $testName, $rule, $expected ) {
|
||||
if ( !class_exists( 'AntiSpoof' ) && preg_match( '/(cc)?norm\(/i', $rule ) ) {
|
||||
if ( !class_exists( 'AntiSpoof' ) && preg_match( '/(?:cc)?norm(?:\(|_)/i', $rule ) ) {
|
||||
// The norm and ccnorm parser functions aren't working correctly without AntiSpoof
|
||||
$this->markTestSkipped( 'Parser test ' . $testName . ' requires the AntiSpoof extension' );
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue