Refactored generation of mention events.

To increase reusability in future changes.

Change-Id: Ia56f4c587361f0214d738cdf690cf9abf93a2021
This commit is contained in:
WMDE-Fisch 2016-07-05 16:45:14 +02:00
parent 0b1c0a8530
commit 86d3352464
2 changed files with 229 additions and 73 deletions

View file

@ -147,64 +147,18 @@ abstract class EchoDiscussionParser {
} }
$content = self::stripHeader( $content ); $content = self::stripHeader( $content );
$output = self::parseNonEditWikitext( $content, new Article( $title ) ); $userLinks = self::getUserLinks( $content, $title );
$links = $output->getLinks(); if ( !$userLinks ) {
if ( !isset( $links[NS_USER] ) || !is_array( $links[NS_USER] ) ) {
return; return;
} }
$mentionedUsers = array();
$unknownUsers = array();
$anonymousUsers = array();
$count = 0;
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
foreach ( $links[NS_USER] as $dbk => $page_id ) { $userMentions = self::getUserMentions( $title, $revision->getUser( Revision::RAW ), $userLinks );
// we should not add user to 'mention' notification list if $overallMentionsCount = self::getOverallUserMentionsCount( $userMentions );
// 1. the user link links to a subpage if ( $overallMentionsCount === 0 ) {
if ( self::hasSubpage( $dbk ) ) { return;
continue;
} }
// 2. user is an anonymous IP if ( $overallMentionsCount > $wgEchoMaxMentionsCount ) {
if ( User::isIP( $dbk ) ) {
$anonymousUsers[] = $dbk;
$count++;
$stats->increment( 'echo.event.mention.error.anonUser' );
continue;
}
$user = User::newFromName( $dbk );
// 3. the user name is not valid
if ( !$user ) {
$unknownUsers[] = str_replace( '_', ' ', $dbk );
$count++;
$stats->increment( 'echo.event.mention.error.invalidUser' );
continue;
}
// 4. the user mentions themselves
if ( $user->getId() == $revision->getUser( Revision::RAW ) ) {
$stats->increment( 'echo.event.mention.error.sameUser' );
continue;
}
// 5. the user is the owner of the talk page
if ( $title->getNamespace() === NS_USER_TALK && $title->getDBkey() === $dbk ) {
$stats->increment( 'echo.event.mention.error.ownPage' );
continue;
}
// 6. user does not exist
if ( $user->getId() === 0 ) {
$unknownUsers[] = str_replace( '_', ' ', $dbk );
$count++;
$stats->increment( 'echo.event.mention.error.unknownUser' );
continue;
}
$mentionedUsers[$user->getId()] = $user->getId();
$count++;
// If more users are being pinged this is likely a spam/attack vector
// Don't send any mention notifications.
if ( $count > $wgEchoMaxMentionsCount ) {
if ( $wgEchoMentionStatusNotifications ) { if ( $wgEchoMentionStatusNotifications ) {
EchoEvent::create( array( EchoEvent::create( array(
'type' => 'mention-failure-too-many', 'type' => 'mention-failure-too-many',
@ -217,14 +171,12 @@ abstract class EchoDiscussionParser {
'agent' => $agent, 'agent' => $agent,
) ); ) );
} }
$stats->increment( 'echo.event.mention.error.tooMany' );
return; return;
} }
}
if ( $wgEchoMentionStatusNotifications ) { if ( $wgEchoMentionStatusNotifications ) {
// TODO batch? // TODO batch?
foreach ( $unknownUsers as $unknownUser ) { foreach ( $userMentions['unknownUsers'] as $unknownUser ) {
EchoEvent::create( array( EchoEvent::create( array(
'type' => 'mention-failure', 'type' => 'mention-failure',
'title' => $title, 'title' => $title,
@ -239,7 +191,7 @@ abstract class EchoDiscussionParser {
} }
// TODO batch? // TODO batch?
foreach ( $anonymousUsers as $anonymousUser ) { foreach ( $userMentions['anonymousUsers'] as $anonymousUser ) {
EchoEvent::create( array( EchoEvent::create( array(
'type' => 'mention-failure', 'type' => 'mention-failure',
'title' => $title, 'title' => $title,
@ -254,7 +206,7 @@ abstract class EchoDiscussionParser {
} }
} }
if ( !$mentionedUsers ) { if ( !$userMentions['validMentions'] ) {
return; return;
} }
@ -265,12 +217,107 @@ abstract class EchoDiscussionParser {
'content' => $content, 'content' => $content,
'section-title' => $header, 'section-title' => $header,
'revid' => $revision->getId(), 'revid' => $revision->getId(),
'mentioned-users' => $mentionedUsers, 'mentioned-users' => $userMentions['validMentions'],
), ),
'agent' => $agent, 'agent' => $agent,
) ); ) );
} }
private static function getOverallUserMentionsCount( $userMentions ) {
return count( $userMentions, COUNT_RECURSIVE ) - count( $userMentions );
}
/**
* @return array[]
* Set of arrays containing valid mentions and possible intended but failed mentions.
* - [validMentions]: An array of valid users to mention with ID => ID.
* - [unknownUsers]: An array of DBKey strings representing unknown users.
* - [validMentions]: An array of DBKey strings representing anonymous IP users.
*/
private static function getUserMentions( Title $title, $revisionUserId, array $userLinks ) {
global $wgEchoMaxMentionsCount;
$userMentions = array(
'validMentions' => array(),
'unknownUsers' => array(),
'anonymousUsers' => array(),
);
$count = 0;
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
foreach ( $userLinks as $dbk => $page_id ) {
// If more users are being pinged this is likely a spam/attack vector
// Don't send any mention notifications.
if ( $count > $wgEchoMaxMentionsCount ) {
$stats->increment( 'echo.event.mention.error.tooMany' );
break;
}
// we should not add user to 'mention' notification list if
// 1. the user link links to a subpage
if ( self::hasSubpage( $dbk ) ) {
continue;
}
// 2. user is an anonymous IP
if ( User::isIP( $dbk ) ) {
$userMentions['anonymousUsers'][] = $dbk;
$count++;
$stats->increment( 'echo.event.mention.error.anonUser' );
continue;
}
$user = User::newFromName( $dbk );
// 3. the user name is not valid
if ( !$user ) {
$userMentions['unknownUsers'][] = str_replace( '_', ' ', $dbk );
$count++;
$stats->increment( 'echo.event.mention.error.invalidUser' );
continue;
}
// 4. the user mentions themselves
if ( $user->getId() === $revisionUserId ) {
$stats->increment( 'echo.event.mention.error.sameUser' );
continue;
}
// 5. the user is the owner of the talk page
if ( $title->getNamespace() === NS_USER_TALK && $title->getDBkey() === $dbk ) {
$stats->increment( 'echo.event.mention.error.ownPage' );
continue;
}
// 6. user does not exist
if ( $user->getId() === 0 ) {
$userMentions['unknownUsers'][] = str_replace( '_', ' ', $dbk );
$count++;
$stats->increment( 'echo.event.mention.error.unknownUser' );
continue;
}
$userMentions['validMentions'][$user->getId()] = $user->getId();
$count++;
}
return $userMentions;
}
/**
* @return bool|array
* Array of links in the user namespace with DBKey => ID.
*/
private static function getUserLinks( $content, Title $title ) {
$output = self::parseNonEditWikitext( $content, new Article( $title ) );
$links = $output->getLinks();
if ( !isset( $links[NS_USER] ) || !is_array( $links[NS_USER] ) ) {
return false;
}
return $links[NS_USER];
}
private static function hasSubpage( $dbk ) { private static function hasSubpage( $dbk ) {
return strpos( $dbk, '/' ) !== false; return strpos( $dbk, '/' ) !== false;
} }

View file

@ -174,6 +174,7 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
array( array(
'type' => 'mention', 'type' => 'mention',
'agent' => 'Cwobeel', 'agent' => 'Cwobeel',
'section-title' => 'Grand jury no bill reception',
/* /*
* I wish I could also compare EchoEvent::$extra data to * I wish I could also compare EchoEvent::$extra data to
* compare user ids of mentioned users. However, due to * compare user ids of mentioned users. However, due to
@ -196,6 +197,7 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
array( array(
'type' => 'mention', 'type' => 'mention',
'agent' => 'Schnark', 'agent' => 'Schnark',
'section-title' => 'Echo-Test',
), ),
), ),
), ),
@ -212,6 +214,7 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
array( array(
'type' => 'mention', 'type' => 'mention',
'agent' => 'PauloEduardo', 'agent' => 'PauloEduardo',
'section-title' => 'Notificações',
), ),
), ),
), ),
@ -247,10 +250,12 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
array( array(
'type' => 'mention', 'type' => 'mention',
'agent' => 'PatHadley', 'agent' => 'PatHadley',
'section-title' => 'Wizardry required',
), ),
array( array(
'type' => 'edit-user-talk', 'type' => 'edit-user-talk',
'agent' => 'PatHadley', 'agent' => 'PatHadley',
'section-title' => 'Wizardry required',
), ),
), ),
'precondition' => 'isParserFunctionsInstalled', 'precondition' => 'isParserFunctionsInstalled',
@ -268,10 +273,12 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
array( array(
'type' => 'mention', 'type' => 'mention',
'agent' => 'Kudpung', 'agent' => 'Kudpung',
'section-title' => 'Me?',
), ),
array( array(
'type' => 'edit-user-talk', 'type' => 'edit-user-talk',
'agent' => 'Kudpung', 'agent' => 'Kudpung',
'section-title' => 'Me?',
), ),
), ),
), ),
@ -287,10 +294,12 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
array( array(
'type' => 'mention', 'type' => 'mention',
'agent' => 'Admin', 'agent' => 'Admin',
'section-title' => 'Hi',
), ),
array( array(
'type' => 'edit-user-talk', 'type' => 'edit-user-talk',
'agent' => 'Admin', 'agent' => 'Admin',
'section-title' => 'Hi',
), ),
), ),
'precondition' => 'isParserFunctionsInstalled', 'precondition' => 'isParserFunctionsInstalled',
@ -306,13 +315,14 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
array( array(
'type' => 'mention-failure', 'type' => 'mention-failure',
'agent' => 'Admin', 'agent' => 'Admin',
'section-title' => 'Hello Users',
), ),
array( array(
'type' => 'mention-failure', 'type' => 'mention-failure',
'agent' => 'Admin', 'agent' => 'Admin',
'section-title' => 'Hello Users',
), ),
), ),
), ),
); );
} }
@ -338,6 +348,7 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
$events[] = array( $events[] = array(
'type' => $event->getType(), 'type' => $event->getType(),
'agent' => $event->getAgent()->getName(), 'agent' => $event->getAgent()->getName(),
'section-title' => $event->getExtraParam( 'section-title' ),
); );
return false; return false;
} }
@ -356,6 +367,7 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
array( array(
'type' => 'mention-failure-too-many', 'type' => 'mention-failure-too-many',
'agent' => 'Admin', 'agent' => 'Admin',
'section-title' => 'Hello Users',
'max-mentions' => 5, 'max-mentions' => 5,
), ),
); );
@ -368,6 +380,7 @@ class EchoDiscussionParserTest extends MediaWikiTestCase {
$events[] = array( $events[] = array(
'type' => $event->getType(), 'type' => $event->getType(),
'agent' => $event->getAgent()->getName(), 'agent' => $event->getAgent()->getName(),
'section-title' => $event->getExtraParam( 'section-title' ),
'max-mentions' => $event->getExtraParam( 'max-mentions' ), 'max-mentions' => $event->getExtraParam( 'max-mentions' ),
); );
return false; return false;
@ -1188,6 +1201,102 @@ TEXT
$this->assertEquals( 3, EchoDiscussionParser::getSectionCount( $one . $two . $three ) ); $this->assertEquals( 3, EchoDiscussionParser::getSectionCount( $one . $two . $three ) );
} }
public function testGetOverallUserMentionsCount() {
$userMentions = array(
'validMentions' => array( 1 => 1 ),
'unknownUsers' => array( 'NotKnown1', 'NotKnown2' ),
'anonymousUsers' => array( '127.0.0.1' ),
);
$discussionParser = TestingAccessWrapper::newFromClass( 'EchoDiscussionParser' );
$this->assertEquals( 4, $discussionParser->getOverallUserMentionsCount( $userMentions ) );
}
public function provider_getUserMentions() {
return array(
array(
array( 'NotKnown1' => 0 ),
array(
'validMentions' => array(),
'unknownUsers' => array( 'NotKnown1' ),
'anonymousUsers' => array(),
),
1
),
array(
array( '127.0.0.1' => 0 ),
array(
'validMentions' => array(),
'unknownUsers' => array(),
'anonymousUsers' => array( '127.0.0.1' ),
),
1
),
);
}
/**
* @dataProvider provider_getUserMentions
*/
public function testGetUserMentions( $userLinks, $expectedUserMentions, $agent ) {
$title = Title::newFromText( 'Test' );
$discussionParser = TestingAccessWrapper::newFromClass( 'EchoDiscussionParser' );
$this->assertEquals( $expectedUserMentions, $discussionParser->getUserMentions( $title, $agent, $userLinks ) );
}
public function testGetUserMentions_validMention() {
$userName = 'Admin';
$userId = User::newFromName( $userName )->getId();
$expectedUserMentions = array(
'validMentions' => array( $userId => $userId ),
'unknownUsers' => array(),
'anonymousUsers' => array(),
);
$userLinks = array( $userName => $userId );
$this->testGetUserMentions( $userLinks, $expectedUserMentions, 1 );
}
public function testGetUserMentions_ownMention() {
$userName = 'Admin';
$userId = User::newFromName( 'Admin' )->getId();
$expectedUserMentions = array(
'validMentions' => array(),
'unknownUsers' => array(),
'anonymousUsers' => array(),
);
$userLinks = array( $userName => $userId );
$this->testGetUserMentions( $userLinks, $expectedUserMentions, $userId );
}
public function testGetUserMentions_tooManyMentions() {
$userLinks = array(
'NotKnown1' => 0,
'NotKnown2' => 0,
'NotKnown3' => 0,
'127.0.0.1' => 0,
'127.0.0.2' => 0,
);
$this->setMwGlobals( array(
// lower limit for the mention-too-many notification
'wgEchoMaxMentionsCount' => 3
) );
$title = Title::newFromText( 'Test' );
$discussionParser = TestingAccessWrapper::newFromClass( 'EchoDiscussionParser' );
$this->assertEquals( 4, $discussionParser->getOverallUserMentionsCount( $discussionParser->getUserMentions( $title, 1, $userLinks ) ) );
}
private function generateUserIdsForValidUserMentions( $userMentions ) {
$validMentionsWithIds = array();
foreach ( $userMentions['validMentions'] as $userName ) {
$userId = User::newFromName( $userName )->getId();
$validMentionsWithIds[$userId] = $userId;
}
$userMentions['validMentions'] = $validMentionsWithIds;
return $userMentions;
}
protected function isParserFunctionsInstalled() { protected function isParserFunctionsInstalled() {
if ( class_exists( 'ExtParserFunctions' ) ) { if ( class_exists( 'ExtParserFunctions' ) ) {
return true; return true;