Add ccnorm_contains_any function

Normalize and search a string for multiple substrings

Bug: T65242
Change-Id: I4034c0054a6849babbf2d96ea13dc97d3660d5b4
This commit is contained in:
Dayllan Maza 2017-09-19 20:54:03 -03:00
parent f00de10b24
commit 2bc8873c30
10 changed files with 73 additions and 19 deletions

View file

@ -225,6 +225,7 @@
"abusefilter-edit-builder-funcs-lcase": "To lower case (lcase)",
"abusefilter-edit-builder-funcs-ucase": "To upper case (ucase)",
"abusefilter-edit-builder-funcs-ccnorm": "Normalize confusable characters (ccnorm)",
"abusefilter-edit-builder-funcs-ccnorm-contains-any": "Normalize and search a string for multiple substrings (ccnorm_contains_any)",
"abusefilter-edit-builder-funcs-rmdoubles": "Remove double-characters (rmdoubles)",
"abusefilter-edit-builder-funcs-specialratio": "Special characters / total characters (specialratio)",
"abusefilter-edit-builder-funcs-norm": "Normalize (norm)",

View file

@ -256,6 +256,7 @@
"abusefilter-edit-builder-funcs-lcase": "{{doc-important|Do not translate \"'''lcase'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
"abusefilter-edit-builder-funcs-ucase": "{{doc-important|Do not translate \"'''ucase'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
"abusefilter-edit-builder-funcs-ccnorm": "{{doc-important|Do not translate \"'''ccnorm'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
"abusefilter-edit-builder-funcs-ccnorm-contains-any": "{{doc-important|Do not translate \"'''ccnorm-contains-any'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
"abusefilter-edit-builder-funcs-rmdoubles": "{{doc-important|Do not translate \"'''rmdoubles'''\".}}\nAbuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.\n\nFunctional explanation: rmdoubles removes repeated characters in the argument, and returns the result. For example: \"foobybboo\" will return \"fobybo\".",
"abusefilter-edit-builder-funcs-specialratio": "{{doc-important|Do not translate \"'''specialratio'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",
"abusefilter-edit-builder-funcs-norm": "{{doc-important|Do not translate \"'''norm'''\".}} Abuse filter syntax option in a dropdown from the group {{msg-mw|abusefilter-edit-builder-group-funcs}}.",

View file

@ -69,6 +69,7 @@ class AbuseFilter {
'lcase(string)' => 'lcase',
'ucase(string)' => 'ucase',
'ccnorm(string)' => 'ccnorm',
'ccnorm_contains_any(haystack,needle1,needle2,..)' => 'ccnorm-contains-any',
'rmdoubles(string)' => 'rmdoubles',
'specialratio(string)' => 'specialratio',
'norm(string)' => 'norm',
@ -77,7 +78,7 @@ class AbuseFilter {
'rmwhitespace(text)' => 'rmwhitespace',
'rmspecials(text)' => 'rmspecials',
'ip_in_range(ip, range)' => 'ip_in_range',
'contains_any(haystack,needle1,needle2,needle3)' => 'contains-any',
'contains_any(haystack,needle1,needle2,...)' => 'contains-any',
'substr(subject, offset, length)' => 'substr',
'strpos(haystack, needle)' => 'strpos',
'str_replace(subject, search, replace)' => 'str_replace',

View file

@ -1,6 +1,5 @@
<?php
use MediaWiki\Auth\AuthManager;
use MediaWiki\MediaWikiServices;
class AbuseFilterHooks {

View file

@ -19,6 +19,7 @@ class AbuseFilterParser {
'bool' => 'castBool',
'norm' => 'funcNorm',
'ccnorm' => 'funcCCNorm',
'ccnorm_contains_any' => 'funcCCNormContainsAny',
'specialratio' => 'funcSpecialRatio',
'rmspecials' => 'funcRMSpecials',
'rmdoubles' => 'funcRMDoubles',
@ -48,7 +49,7 @@ class AbuseFilterParser {
'contains' => 'keywordContains',
'rlike' => 'keywordRegex',
'irlike' => 'keywordRegexInsensitive',
'regex' => 'keywordRegex'
'regex' => 'keywordRegex',
];
public static $funcCache = [];
@ -1074,38 +1075,85 @@ class AbuseFilterParser {
}
$s = array_shift( $args );
$s = $s->toString();
$searchStrings = [];
return new AFPData( AFPData::DBOOL, self::containsAny( $s, $args ) );
}
foreach ( $args as $arg ) {
$searchStrings[] = $arg->toString();
/**
* Normalize and search a string for multiple substrings
*
* @param $args array
* @return AFPData
* @throws AFPUserVisibleException
*/
protected function funcCCNormContainsAny( $args ) {
if ( count( $args ) < 2 ) {
throw new AFPUserVisibleException(
'notenoughargs',
$this->mCur->pos,
[ 'ccnorm_contains_any', 2, count( $args ) ]
);
}
if ( function_exists( 'fss_prep_search' ) ) {
$fss = fss_prep_search( $searchStrings );
$result = fss_exec_search( $fss, $s );
$s = array_shift( $args );
$ok = is_array( $result );
return new AFPData( AFPData::DBOOL, self::containsAny( $s, $args, true ) );
}
/**
* Search for substrings in a string
*
* Use normalize = true to make use of ccnorm and
* normalize both sides of the search.
*
* @param AFData $string
* @param AFData[] $values
* @param bool $normalize
*
* @return bool
*/
protected static function containsAny( $string, $values, $normalize = false ) {
$string = $string->toString();
if ( $string == '' ) {
return false;
}
if ( $normalize ) {
$string = self::ccnorm( $string );
}
$values = array_map( function ( $val ) use ( $normalize ) {
$str = $val->toString();
if ( $normalize ) {
$str = AbuseFilterParser::ccnorm( $str );
}
return $str;
}, $values );
if ( function_exists( 'fss_prep_search' ) ) {
$fss = fss_prep_search( $values );
$result = fss_exec_search( $fss, $string );
return ( $result !== false );
} else {
$ok = false;
foreach ( $searchStrings as $needle ) {
foreach ( $values as $needle ) {
// Bug #60203: Keep empty parameters from causing PHP warnings
if ( $needle !== '' && strpos( $s, $needle ) !== false ) {
$ok = true;
break;
if ( $needle !== '' && strpos( $string, $needle ) !== false ) {
return true;
}
}
}
return new AFPData( AFPData::DBOOL, $ok );
return false;
}
/**
* @param $s
* @return mixed
*/
protected function ccnorm( $s ) {
protected static function ccnorm( $s ) {
if ( is_callable( 'AntiSpoof::normalizeString' ) ) {
$s = AntiSpoof::normalizeString( $s );
} else {

View file

@ -0,0 +1 @@
MATCH

View file

@ -0,0 +1 @@
ccnorm_contains_any("like 4ny0ne else", "foo", "aNyon3") & ccnorm_contains_any("street f1ghter","F1ght")

View file

@ -0,0 +1 @@
MATCH

View file

@ -0,0 +1 @@
contains_any("like anyone else", "else", "someone") & contains_any("street fighter", "fight")

View file

@ -55,7 +55,7 @@ class AbuseFilterParserTest extends MediaWikiTestCase {
* @dataProvider readTests
*/
public function testParser( $testName, $rule, $expected ) {
if ( !class_exists( 'AntiSpoof' ) && preg_match( '/(cc)?norm\(/i', $rule ) ) {
if ( !class_exists( 'AntiSpoof' ) && preg_match( '/(?:cc)?norm(?:\(|_)/i', $rule ) ) {
// The norm and ccnorm parser functions aren't working correctly without AntiSpoof
$this->markTestSkipped( 'Parser test ' . $testName . ' requires the AntiSpoof extension' );
}