mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Echo
synced 2024-11-27 17:20:40 +00:00
Echo: Implement advanced notifications for vanilla talk pages.
Uses the class EchoDiscussionParser to understand actions taken on vanilla MediaWiki discussion pages. Currently notifies on these occasions: * A new comment is added to a discussion on your talk page or that you have participated in. * A new topic is added to your talk page. There are vague plans to expand to these classes of events: * Your comment is edited or removed. * A large section is moved to your talk page. and these classes of users: * Users watching discussion pages. Change-Id: Ie6cae76ed2e0ecf607059e39ac1aa480a275ec89
This commit is contained in:
parent
4f2d7b933e
commit
6ff170cada
|
@ -28,6 +28,8 @@ $messages['en'] = array(
|
|||
// Notification
|
||||
'notification-edit-talk-page' => '$2 {{GENDER:$1|edited}} your talk page $3',
|
||||
'notification-edit' => '$2 {{GENDER:$1|edited}} $3 $4',
|
||||
'notification-add-comment' => '$2 {{GENDER:$1|posted}} a comment to the discussion "$3" on $4',
|
||||
'notification-add-talkpage-topic' => '$2 {{GENDER:$1|posted}} a new topic "$3" on $4',
|
||||
|
||||
'notification-edit-email-subject' => '{{SITENAME}} notification: $3 has been edited by $2',
|
||||
'notification-edit-email-body' => 'Hello $5,
|
||||
|
@ -90,6 +92,16 @@ $messages['qqq'] = array(
|
|||
* $2 is the username of the person who edited, formatted.
|
||||
* $3 is the page that was edited, formatted.
|
||||
* $4 is a diff link, possibly formatted.',
|
||||
'notification-add-comment' => 'Format for displaying notifications of a comment being added to an existing discussion.
|
||||
$1 is the username of the person who edited, plain text. Can be used for GENDER.
|
||||
$2 is the username of the person who edited, formatted.
|
||||
$3 is the section title of the discussion.
|
||||
$4 is the page on which the discussion exists.',
|
||||
'notification-add-talkpage-topic' => 'Format for displaying notifications of a new discussion being added
|
||||
$1 is the username of the person who edited, plain text. Can be used for GENDER.
|
||||
$2 is the username of the person who edited, formatted.
|
||||
$3 is the section title of the discussion.
|
||||
$4 is the page on which the discussion was added.',
|
||||
'echo-email-subject-default' => 'Default subject for Echo email notifications',
|
||||
'echo-email-body-default' => 'Default message content for Echo email notifications.
|
||||
* $1 is a plain text description of the notification.',
|
||||
|
|
12
Echo.php
12
Echo.php
|
@ -54,10 +54,12 @@ $wgAutoloadClasses['EchoNotification'] = "$dir/model/Notification.php";
|
|||
$wgAutoloadClasses['EchoNotificationFormatter'] = "$dir/formatters/NotificationFormatter.php";
|
||||
$wgAutoloadClasses['EchoBasicFormatter'] = "$dir/formatters/BasicFormatter.php";
|
||||
$wgAutoloadClasses['EchoEditFormatter'] = "$dir/formatters/EditFormatter.php";
|
||||
$wgAutoloadClasses['EchoCommentFormatter'] = "$dir/formatters/CommentFormatter.php";
|
||||
|
||||
// Internal stuff
|
||||
$wgAutoloadClasses['EchoNotifier'] = "$dir/Notifier.php";
|
||||
$wgAutoloadClasses['EchoNotificationController'] = "$dir/controller/NotificationController.php";
|
||||
$wgAutoloadClasses['EchoDiscussionParser'] = "$dir/includes/DiscussionParser.php";
|
||||
|
||||
// Job queue
|
||||
$wgAutoloadClasses['EchoNotificationJob'] = "$dir/jobs/NotificationJob.php";
|
||||
|
@ -162,4 +164,14 @@ $wgEchoNotificationFormatters = array(
|
|||
'email-subject-message' => 'notification-edit-email-subject',
|
||||
'email-body-message' => 'notification-edit-email-body',
|
||||
),
|
||||
'add-comment' => array(
|
||||
'type' => 'comment',
|
||||
'message-key' => 'notification-add-comment',
|
||||
'message-params' => array( 'agent', 'subject', 'title' ),
|
||||
),
|
||||
'add-talkpage-topic' => array(
|
||||
'type' => 'comment',
|
||||
'message-key' => 'notification-add-talkpage-topic',
|
||||
'message-params' => array( 'agent', 'subject', 'title' ),
|
||||
),
|
||||
);
|
||||
|
|
48
Hooks.php
48
Hooks.php
|
@ -12,6 +12,8 @@ class EchoHooks {
|
|||
"$dir/db_patches/patch-event_agent-split.sql", true );
|
||||
$updater->modifyField( 'echo_event', 'event_variant',
|
||||
"$dir/db_patches/patch-event_variant_nullability.sql", true );
|
||||
$updater->modifyField( 'echo_event', 'event_extra',
|
||||
"$dir/db_patches/patch-event_extra-size.sql", true );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -29,6 +31,21 @@ class EchoHooks {
|
|||
$users[$user->getId()] = $user;
|
||||
}
|
||||
break;
|
||||
case 'add-comment':
|
||||
case 'add-talkpage-topic':
|
||||
$extraData = $event->getExtra();
|
||||
|
||||
if ( !isset( $extraData['revid'] ) || !$extraData['revid'] ) {
|
||||
break;
|
||||
}
|
||||
|
||||
$revision = Revision::newFromId( $extraData['revid'] );
|
||||
|
||||
$users = array_merge(
|
||||
$users,
|
||||
EchoDiscussionParser::getNotifiedUsersForComment($revision)
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -90,27 +107,20 @@ class EchoHooks {
|
|||
}
|
||||
|
||||
public static function onArticleSaved( &$article, &$user, $text, $summary, $minoredit, $watchthis, $sectionanchor, &$flags, $revision, &$status ) {
|
||||
if ( $revision ) {
|
||||
$event = EchoEvent::create( array(
|
||||
'type' => 'edit',
|
||||
'title' => $article->getTitle(),
|
||||
'extra' => array('revid' => $revision->getID()),
|
||||
'agent' => $user,
|
||||
) );
|
||||
if ( !$revision ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$possibleUser = $article->getTitle()->getText();
|
||||
$event = EchoEvent::create( array(
|
||||
'type' => 'edit',
|
||||
'title' => $article->getTitle(),
|
||||
'extra' => array('revid' => $revision->getID()),
|
||||
'agent' => $user,
|
||||
) );
|
||||
|
||||
if (
|
||||
$article->getTitle()->getNamespace() === NS_USER_TALK &&
|
||||
User::newFromName($possibleUser)->getID()
|
||||
) {
|
||||
$event = EchoEvent::create( array(
|
||||
'type' => 'edit-user-talk',
|
||||
'title' => $article->getTitle(),
|
||||
'extra' => array('revid' => $revision->getID()),
|
||||
'agent' => $user,
|
||||
) );
|
||||
}
|
||||
|
||||
if ( $article->getTitle()->isTalkPage() ) {
|
||||
EchoDiscussionParser::generateEventsForRevision( $revision );
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
2
db_patches/patch-event_extra-size.sql
Normal file
2
db_patches/patch-event_extra-size.sql
Normal file
|
@ -0,0 +1,2 @@
|
|||
-- Patch to add extra space to event_extra
|
||||
alter table /*_*/echo_event change column event_extra event_extra BLOB NULL;
|
2
echo.sql
2
echo.sql
|
@ -21,7 +21,7 @@ CREATE TABLE /*_*/echo_event (
|
|||
event_agent_ip varchar(255) binary null, -- IP address who triggered it, if any
|
||||
event_page_namespace int unsigned null,
|
||||
event_page_title varchar(255) binary null,
|
||||
event_extra TINYBLOB NULL
|
||||
event_extra BLOB NULL
|
||||
) /*$wgDBTableOptions*/;
|
||||
|
||||
CREATE INDEX /*i*/type_page ON /*_*/echo_event (event_type,event_page_namespace,event_page_title,event_timestamp);
|
||||
|
|
29
formatters/CommentFormatter.php
Normal file
29
formatters/CommentFormatter.php
Normal file
|
@ -0,0 +1,29 @@
|
|||
<?php
|
||||
|
||||
class EchoCommentFormatter extends EchoEditFormatter {
|
||||
protected function processParam( $event, $param, $message, $user ) {
|
||||
$extra = $event->getExtra();
|
||||
if ( $param === 'subject' ) {
|
||||
if ( isset( $extra['section-title'] ) && $extra['section-title'] ) {
|
||||
$message->params( $extra['section-title'] );
|
||||
} else {
|
||||
$message->params( '' );
|
||||
}
|
||||
} elseif ( $param === 'commentText' ) {
|
||||
global $wgLang; // Message::language is protected :(
|
||||
|
||||
if ( isset( $extra['content'] ) && $extra['content'] ) {
|
||||
$content = $extra['content'];
|
||||
|
||||
$content = EchoDiscussionParser::stripHeader( $content );
|
||||
$content = $wgLang->truncate( $content, 200 );
|
||||
|
||||
$message->params( $content );
|
||||
} else {
|
||||
$message->params( '' );
|
||||
}
|
||||
} else {
|
||||
parent::processParam( $event, $param, $message, $user );
|
||||
}
|
||||
}
|
||||
}
|
|
@ -4,6 +4,7 @@ abstract class EchoNotificationFormatter {
|
|||
static $formatterClasses = array(
|
||||
'basic' => 'EchoBasicFormatter',
|
||||
'edit' => 'EchoEditFormatter',
|
||||
'comment' => 'EchoCommentFormatter',
|
||||
);
|
||||
protected $validOutputFormats = array('text', 'html', 'email');
|
||||
protected $outputFormat = 'text';
|
||||
|
|
725
includes/DiscussionParser.php
Normal file
725
includes/DiscussionParser.php
Normal file
|
@ -0,0 +1,725 @@
|
|||
<?php
|
||||
|
||||
abstract class EchoDiscussionParser {
|
||||
static $timestampRegex;
|
||||
static $headerRegex = '^\=\=\s*([^=].*)\s*\=\=$';
|
||||
static $revisionInterpretationCache = array();
|
||||
|
||||
/**
|
||||
* Given a Revision object, generates EchoEvent objects for
|
||||
* the discussion-related actions that occurred in that Revision.
|
||||
*
|
||||
* @param $revision Revision object
|
||||
* @return null
|
||||
*/
|
||||
static function generateEventsForRevision( $revision ) {
|
||||
$interpretation = self::getChangeInterpretationForRevision( $revision );
|
||||
$createdEvents = false;
|
||||
$title = $revision->getTitle();
|
||||
|
||||
$userID = $revision->getUser();
|
||||
$userName = $revision->getUserText();
|
||||
$user = $userID != 0 ? User::newFromId( $userID ) : User::newFromName( $userName, false );
|
||||
|
||||
foreach( $interpretation as $action ) {
|
||||
if ( $action['type'] == 'add-comment' ) {
|
||||
$fullSection = $action['full-section'];
|
||||
$header = self::extractHeader( $fullSection );
|
||||
|
||||
EchoEvent::create( array(
|
||||
'type' => 'add-comment',
|
||||
'title' => $title,
|
||||
'extra' => array(
|
||||
'revid' => $revision->getID(),
|
||||
'section-title' => $header,
|
||||
'content' => $action['content'],
|
||||
),
|
||||
'agent' => $user,
|
||||
) );
|
||||
$createdEvents = true;
|
||||
} elseif ( $action['type'] == 'new-section-with-comment' ) {
|
||||
$content = $action['content'];
|
||||
$header = self::extractHeader( $content );
|
||||
EchoEvent::create( array(
|
||||
'type' => 'add-talkpage-topic',
|
||||
'title' => $title,
|
||||
'extra' => array(
|
||||
'revid' => $revision->getID(),
|
||||
'section-title' => $header,
|
||||
'content' => $content,
|
||||
),
|
||||
'agent' => $user,
|
||||
) );
|
||||
$createdEvents = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !$createdEvents && $title->getNamespace() == NS_USER_TALK ) {
|
||||
$user = User::newFromName( $revision->getText() );
|
||||
if ( $user && $user->getID() ) {
|
||||
$event = EchoEvent::create( array(
|
||||
'type' => 'edit-user-talk',
|
||||
'title' => $article->getTitle(),
|
||||
'extra' => array('revid' => $revision->getID()),
|
||||
'agent' => $user,
|
||||
) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a Revision object, determines which users are interested
|
||||
* in related EchoEvents.
|
||||
*
|
||||
* @param $revision Revision object.
|
||||
* @return Array of User objects
|
||||
*/
|
||||
static function getNotifiedUsersForComment( $revision ) {
|
||||
$interpretation = self::getChangeInterpretationForRevision( $revision );
|
||||
$users = array();
|
||||
|
||||
foreach( $interpretation as $action ) {
|
||||
if ( $action['type'] == 'add-comment' ) {
|
||||
$fullSection = $action['full-section'];
|
||||
$interestedUsers = array_keys( self::extractSignatures( $fullSection ) );
|
||||
|
||||
foreach( $interestedUsers as $userName ) {
|
||||
$user = User::newFromName( $userName );
|
||||
|
||||
// Deliberately ignoring anonymous users
|
||||
if ( $user && $user->getID() ) {
|
||||
$users[$user->getID()] = $user;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( $revision->getTitle()->getNamespace() == NS_USER_TALK ) {
|
||||
$userName = $revision->getTitle()->getText();
|
||||
$user = User::newFromName( $userName );
|
||||
|
||||
if ( $user ) {
|
||||
$users[$user->getID()] = $user;
|
||||
}
|
||||
}
|
||||
|
||||
return $users;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a Revision object, returns a talk-page-centric interpretation
|
||||
* of the changes made in it.
|
||||
*
|
||||
* @param $revision Revision object
|
||||
* @see EchoDiscussionParser::interpretDiff
|
||||
* @return Array, see interpretDiff for details.
|
||||
*/
|
||||
static function getChangeInterpretationForRevision( $revision ) {
|
||||
if ( $revision->getID() && isset( self::$revisionInterpretationCache[$revision->getID()] ) ) {
|
||||
return self::$revisionInterpretationCache[$revision->getID()];
|
||||
}
|
||||
|
||||
$userID = $revision->getUser();
|
||||
$userName = $revision->getUserText();
|
||||
$user = $userID != 0 ? User::newFromId( $userID ) : User::newFromName( $userName, false );
|
||||
$prevRevision = $revision->getPrevious();
|
||||
|
||||
$changes = self::getMachineReadableDiff( $prevRevision->getText(), $revision->getText() );
|
||||
$output = self::interpretDiff( $changes, $user );
|
||||
|
||||
self::$revisionInterpretationCache[$revision->getID()] = $output;
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a machine-readable diff, interprets the changes
|
||||
* in terms of discussion page actions
|
||||
*
|
||||
* @todo Expand recognisable actions.
|
||||
* @param $changes Output of EchoEvent::getMachineReadableDiff
|
||||
* @return Array of associative arrays.
|
||||
* Each entry represents an action, which is classified in the 'action' field.
|
||||
* All types contain a 'content' field except 'unknown'
|
||||
* (which instead passes through the machine-readable diff in 'details')
|
||||
* and 'unknown-change' (which provides 'new_content' and 'old_content')
|
||||
* action may be:
|
||||
* - add-comment: A comment signed by the user is added to an
|
||||
* existing section.
|
||||
* - new-section-with-comment: A new section is added, containing
|
||||
* a single comment signed by the user in question.
|
||||
* - unknown-signed-addition: Some signed content is added, but it
|
||||
* includes section headers, is signed by another user or
|
||||
* otherwise confuses the interpretation engine.
|
||||
* - unknown-multi-signed-addition: Some signed content is added,
|
||||
* but it contains multiple signatures.
|
||||
* - unknown-unsigned-addition: Some content is added, but it is
|
||||
* unsigned.
|
||||
* - unknown-subtraction: Some content was removed. These actions are
|
||||
* not currently analysed.
|
||||
* - unknown-change: Some content was replaced with other content.
|
||||
* These actions are not currently analysed.
|
||||
* - unknown: Unrecognised change type.
|
||||
*/
|
||||
static function interpretDiff( $changes, $user ) {
|
||||
// One extra item in $changes for _info
|
||||
$actions = array();
|
||||
|
||||
foreach( $changes as $index => $change ) {
|
||||
if ( !is_numeric( $index ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( ! $change['action'] ) {
|
||||
// Unknown action; skip
|
||||
continue;
|
||||
}
|
||||
if ( $change['action'] == 'add' ) {
|
||||
$content = trim( $change['content'] );
|
||||
$startSection = preg_match( "/\A" . self::$headerRegex.'/um', $content );
|
||||
$sectionCount = self::getSectionCount( $content );
|
||||
$signedUsers = array_keys( self::extractSignatures( $content ) );
|
||||
|
||||
if (
|
||||
count( $signedUsers ) == 1 &&
|
||||
in_array( $user, $signedUsers )
|
||||
) {
|
||||
if ( $sectionCount === 0 ) {
|
||||
$fullSection = self::getFullSection( $changes['_info']['rhs'], $change['right-pos'] );
|
||||
$actions[] = array(
|
||||
'type' => 'add-comment',
|
||||
'content' => $content,
|
||||
'full-section' => $fullSection,
|
||||
);
|
||||
} elseif ( $startSection && $sectionCount === 1 ) {
|
||||
$actions[] = array(
|
||||
'type' => 'new-section-with-comment',
|
||||
'content' => $content,
|
||||
);
|
||||
} else {
|
||||
$actions[] = array(
|
||||
'type' => 'unknown-signed-addition',
|
||||
'content' => $content,
|
||||
);
|
||||
}
|
||||
} elseif ( count( $signedUsers ) >= 1 ) {
|
||||
$actions[] = array(
|
||||
'type' => 'unknown-multi-signed-addition',
|
||||
'content' => $content,
|
||||
);
|
||||
} else {
|
||||
$actions[] = array(
|
||||
'type' => 'unknown-unsigned-addition',
|
||||
'content' => $content,
|
||||
);
|
||||
}
|
||||
} elseif ( $change['action'] == 'subtract' ) {
|
||||
$actions[] = array(
|
||||
'type' => 'unknown-subtraction',
|
||||
'content' => $change['content'],
|
||||
);
|
||||
} elseif ( $change['action'] == 'change' ) {
|
||||
$actions[] = array(
|
||||
'type' => 'unknown-change',
|
||||
'old_content' => $change['old_content'],
|
||||
'new_content' => $change['new_content'],
|
||||
);
|
||||
} else {
|
||||
$actions[] = array(
|
||||
'type' => 'unknown',
|
||||
'details' => $change,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// $actions['_diff'] = $changes;
|
||||
// unset( $actions['_diff']['_info'] );
|
||||
|
||||
return $actions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the section that a given line is in.
|
||||
*
|
||||
* @param $lines Array of lines in the page.
|
||||
* @param $offset The line to find the full section for.
|
||||
* @return Content of the section, as a string.
|
||||
*/
|
||||
static function getFullSection( $lines, $offset ) {
|
||||
$content = $lines[$offset];
|
||||
$headerRegex = '/' . self::$headerRegex . '/um';
|
||||
|
||||
// Expand backwards...
|
||||
$continue = ! preg_match( $headerRegex, $lines[$offset] );
|
||||
$i = $offset;
|
||||
while ( $continue && $i > 0 ) {
|
||||
--$i;
|
||||
$line = $lines[$i];
|
||||
$content = "$line\n$content";
|
||||
if ( preg_match( $headerRegex, $line ) ) {
|
||||
$continue = false;
|
||||
}
|
||||
}
|
||||
|
||||
// And then forwards...
|
||||
|
||||
$continue = true;
|
||||
$i = $offset;
|
||||
while ( $continue && $i < count($lines) - 1 ) {
|
||||
++$i;
|
||||
$line = $lines[$i];
|
||||
if ( preg_match( $headerRegex, $line ) ) {
|
||||
$continue = false;
|
||||
} else {
|
||||
$content .= "\n$line";
|
||||
}
|
||||
}
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the number of section headers in a string.
|
||||
*
|
||||
* @param $text The text, as a string.
|
||||
* @return Number of section headers found.
|
||||
*/
|
||||
static function getSectionCount( $text ) {
|
||||
$text = trim($text);
|
||||
|
||||
$matches = array();
|
||||
preg_match_all( '/'.self::$headerRegex.'/um', $text, &$matches );
|
||||
|
||||
return count( $matches[0] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the title of a section
|
||||
*
|
||||
* @param $text The text of the section, as a string.
|
||||
* @return The title of the section, as a string.
|
||||
*/
|
||||
static function extractHeader( $text ) {
|
||||
$text = trim($text);
|
||||
|
||||
$matches = array();
|
||||
|
||||
if ( !preg_match( '/'.self::$headerRegex.'/um', $text, &$matches ) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return trim( $matches[1] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips out a section header
|
||||
* @param $text The text to strip out the section header from.
|
||||
* @return String: The same text, with the section header stripped out.
|
||||
*/
|
||||
static function stripHeader( $text ) {
|
||||
$text = preg_replace( '/'.self::$headerRegex.'/um', '', $text );
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the input is a signed comment.
|
||||
*
|
||||
* @param $text The text to check.
|
||||
* @param $user If set, will only return true if the comment is
|
||||
* signed by this user.
|
||||
* @return boolean: true or false.
|
||||
*/
|
||||
static function isSignedComment( $text, $user = false ) {
|
||||
$timestampRegex = self::getTimestampRegex();
|
||||
$endOfLine = self::getLineEndingRegex();
|
||||
$tsMatches = array();
|
||||
if ( ! preg_match(
|
||||
"/$timestampRegex$endOfLine/mu",
|
||||
$line,
|
||||
$tsMatches,
|
||||
PREG_OFFSET_CAPTURE
|
||||
) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$userData = self::getUserFromLine( $line, $tsMatches[0][0] );
|
||||
|
||||
if ( $userData === false ) {
|
||||
return false;
|
||||
} elseif ( $user === false ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
list( $signaturePos, $foundUser ) = $userData;
|
||||
|
||||
return User::getCanonicalName( $foundUser, false ) === User::getCanonicalName( $user, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds differences between $oldText and $newText
|
||||
* and returns the result in a machine-readable format.
|
||||
*
|
||||
* @param $oldText The "left hand side" of the diff.
|
||||
* @param $newText The "right hand side" of the diff.
|
||||
* @return Array of changes.
|
||||
* Each change consists of:
|
||||
* * An 'action', one of:
|
||||
* - add
|
||||
* - subtract
|
||||
* - change
|
||||
* * 'content' that was added or removed, or in the case
|
||||
* of a change, 'old_content' and 'new_content'
|
||||
* * 'left_pos' and 'right_pos' (in lines) of the change.
|
||||
*/
|
||||
static function getMachineReadableDiff( $oldText, $newText ) {
|
||||
$oldText = trim($oldText)."\n";
|
||||
$newText = trim($newText)."\n";
|
||||
$diff = wfDiff( $oldText, $newText, '-u -w' );
|
||||
|
||||
$old_lines = explode("\n", $oldText);
|
||||
$new_lines = explode("\n", $newText);
|
||||
|
||||
// First break down the diff into additions and subtractions
|
||||
$diff_lines = explode( "\n", $diff );
|
||||
$left_pos = 0;
|
||||
$right_pos = 0;
|
||||
$changes = array();
|
||||
$change_run = false;
|
||||
$sub_lines = 0;
|
||||
|
||||
for( $i = 0; $i < count( $diff_lines ); ++$i ) {
|
||||
$line = $diff_lines[$i];
|
||||
|
||||
if ( strlen($line) == 0 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$line_type = $line[0];
|
||||
|
||||
if ( $line_type == ' ' ) {
|
||||
++$left_pos;
|
||||
++$right_pos;
|
||||
} elseif ( $line_type == '@' ) {
|
||||
list($at, $lhs_pos, $rhs_pos, $at) = explode( ' ', $line );
|
||||
$lhs_pos = substr( $lhs_pos, 1 );
|
||||
$rhs_pos = substr( $rhs_pos, 1 );
|
||||
list( $left_pos ) = explode( ',', $lhs_pos );
|
||||
list( $right_pos ) = explode( ',', $rhs_pos );
|
||||
$change_run = false;
|
||||
} elseif ( $line_type == '-' ) {
|
||||
$subtracted_line = substr( $line, 1 );
|
||||
|
||||
if ( trim( $subtracted_line ) === '' ) {
|
||||
++$left_pos;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( $change_run && $changes[$change_run]['action'] == 'subtract' ) {
|
||||
++$sub_lines;
|
||||
$changes[$change_run]['content'] .= "\n" . $subtracted_line;
|
||||
} else {
|
||||
$sub_lines = 1;
|
||||
$changes[] = array(
|
||||
'action' => 'subtract',
|
||||
'left-pos' => $left_pos,
|
||||
'right-pos' => $right_pos,
|
||||
'content' => $subtracted_line,
|
||||
);
|
||||
$change_run = count($changes)-1;
|
||||
}
|
||||
|
||||
// Consistency check
|
||||
if ( $old_lines[$left_pos-1] != $subtracted_line ) {
|
||||
throw new MWException( "Left offset consistency error.\nOffset: $right_pos\nExpected: {$old_lines[$left_pos-1]}\nActual: $subtracted_line" );
|
||||
}
|
||||
++$left_pos;
|
||||
} elseif ( $line_type == '+' ) {
|
||||
$added_line = substr( $line, 1 );
|
||||
|
||||
if ( $change_run !== false && $changes[$change_run]['action'] == 'add' ) {
|
||||
$changes[$change_run]['content'] .= "\n" . $added_line;
|
||||
} elseif ( $change_run !== false && $changes[$change_run]['action'] == 'subtract' ) {
|
||||
$changes[$change_run]['action'] = 'change';
|
||||
$changes[$change_run]['old_content'] = $changes[$change_run]['content'];
|
||||
$changes[$change_run]['new_content'] = $added_line;
|
||||
unset( $changes[$change_run]['content'] );
|
||||
} elseif ( $change_run !== false && $changes[$change_run]['action'] == 'change' && $sub_lines > 0 ) {
|
||||
--$sub_lines;
|
||||
$changes[$change_run]['new_content'] .= "\n" . $added_line;
|
||||
} else {
|
||||
$changes[] = array(
|
||||
'action' => 'add',
|
||||
'left-pos' => $left_pos,
|
||||
'right-pos' => $right_pos,
|
||||
'content' => $added_line,
|
||||
);
|
||||
$change_run = count($changes)-1;
|
||||
}
|
||||
|
||||
// Consistency check
|
||||
if ( $new_lines[$right_pos-1] != $added_line ) {
|
||||
throw new MWException( "Right offset consistency error.\nOffset: $right_pos\nExpected: {$new_lines[$right_pos-1]}\nActual: $added_line\n" );
|
||||
}
|
||||
++$right_pos;
|
||||
}
|
||||
}
|
||||
|
||||
$changes['_info'] = array(
|
||||
'lhs-length' => count($old_lines),
|
||||
'rhs-length' => count($new_lines),
|
||||
'lhs' => $old_lines,
|
||||
'rhs' => $new_lines,
|
||||
);
|
||||
|
||||
return $changes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds and extracts signatures in $text
|
||||
*
|
||||
* @param $text The text in which to look for signed comments.
|
||||
* @return Associative array. The key is the username, the value
|
||||
* is the last signature that was found.
|
||||
*/
|
||||
static function extractSignatures( $text ) {
|
||||
global $wgContLang;
|
||||
|
||||
$lines = explode( "\n", $text );
|
||||
$timestampRegex = self::getTimestampRegex();
|
||||
$endOfLine = self::getLineEndingRegex();
|
||||
|
||||
$output = array();
|
||||
|
||||
$lineNumber = 0;
|
||||
|
||||
foreach( $lines as $line ) {
|
||||
++$lineNumber;
|
||||
$tsMatches = array();
|
||||
if ( ! preg_match(
|
||||
"/$timestampRegex$endOfLine/mu",
|
||||
$line,
|
||||
$tsMatches,
|
||||
PREG_OFFSET_CAPTURE
|
||||
) ) {
|
||||
// Ignore lines that don't finish with a timestamp
|
||||
// print "I\tNo timestamp\n";
|
||||
// print "$line\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
// Now that we know we have a timestamp, look for
|
||||
// the last user link on the line.
|
||||
$userData = self::getUserFromLine( $line, $tsMatches[0][0] );
|
||||
if ( $userData === false ) {
|
||||
// print "F\t$lineNumber\t$line\n";
|
||||
continue;
|
||||
} else {
|
||||
// print "S\t$lineNumber\n";
|
||||
}
|
||||
|
||||
list( $signaturePos, $user ) = $userData;
|
||||
|
||||
$signature = substr( $line, $signaturePos );
|
||||
$output[$user] = $signature;
|
||||
}
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* From a line in a wiki page, determine which user, if any,
|
||||
* has signed it.
|
||||
*
|
||||
* @param $line The line, as a string.
|
||||
* @param $timestampPos The offset of the start of the timestamp.
|
||||
* @return false for none, Array for success.
|
||||
* - First element is the position of the signature.
|
||||
* - Second element is the normalised user name.
|
||||
*/
|
||||
static function getUserFromLine( $line, $timestampPos ) {
|
||||
global $wgContLang;
|
||||
$possiblePrefixes = array( // Later entries have a higher precedence
|
||||
'[[' . $wgContLang->getNsText( NS_USER ) . ':',
|
||||
'[[' . $wgContLang->getNsText( NS_USER_TALK ) . ':',
|
||||
'[[' . SpecialPage::getTitleFor('Contributions')->getPrefixedText() . '/',
|
||||
);
|
||||
|
||||
foreach( $possiblePrefixes as $prefix ) {
|
||||
if ( strpos( $prefix, '_' ) !== false ) {
|
||||
$possiblePrefixes[] = str_replace( '_', ' ', $prefix );
|
||||
}
|
||||
}
|
||||
|
||||
$winningUser = false;
|
||||
$winningPos = false;
|
||||
|
||||
// Look for the leftmost link to the rightmost user
|
||||
foreach( $possiblePrefixes as $prefix ) {
|
||||
$output = self::getLinkFromLine( $line, $prefix );
|
||||
|
||||
if ( $output === false ) {
|
||||
continue;
|
||||
} else {
|
||||
list( $pos, $user ) = $output;
|
||||
}
|
||||
|
||||
// Couldn't be a signature
|
||||
if ( ( $timestampPos - $pos ) > 255 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
$winningPos === false ||
|
||||
( $pos > $winningPos && $user !== $winningUser ) ||
|
||||
(
|
||||
$pos < $winningPos &&
|
||||
$user === $winningUser
|
||||
)
|
||||
) {
|
||||
$winningPos = $pos;
|
||||
$winningUser = ucfirst( trim( $user ) );
|
||||
}
|
||||
}
|
||||
|
||||
if ( $winningUser === false ) {
|
||||
// print "E\tNo winning user\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
return array( $winningPos, $winningUser );
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the last link beginning with a given prefix on a line.
|
||||
*
|
||||
* @param $line The line to search, as a string.
|
||||
* @param $linkPrefix The prefix to search for.
|
||||
* @return false for failure, array for success.
|
||||
* - First element is the string offset of the link.
|
||||
* - Second element is the user the link refers to.
|
||||
*/
|
||||
static function getLinkFromLine( $line, $linkPrefix, $failureOffset = false ) {
|
||||
$offset = 0;
|
||||
|
||||
// If extraction failed at another offset, try again.
|
||||
if ( $failureOffset !== false ) {
|
||||
$offset = $failureOffset - strlen( $line ) - 1;
|
||||
}
|
||||
|
||||
$linkPos = strripos( $line, $linkPrefix, $offset );
|
||||
|
||||
if ( $linkPos === false ) {
|
||||
// print "I\tNo match for $linkPrefix\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
$linkUser = self::extractUserFromLink( $line, $linkPrefix, $linkPos );
|
||||
|
||||
if ( $linkUser === false ) {
|
||||
// print "E\tExtraction failed\t$linkPrefix\n";
|
||||
// Look for another place.
|
||||
return self::getLinkFromLine( $line, $linkPrefix, $linkPos );
|
||||
} else {
|
||||
return array( $linkPos, $linkUser );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given text including a link, gives the user that that link refers to
|
||||
*
|
||||
* @param $text The text to extract from.
|
||||
* @param $prefix The link prefix that was used to find the link.
|
||||
* @param $offset Optionally, the offset of the start of the link.
|
||||
* @return type description
|
||||
*/
|
||||
static function extractUserFromLink( $text, $prefix, $offset = 0 ) {
|
||||
$userPart = substr( $text, strlen($prefix) + $offset );
|
||||
|
||||
$userMatches = array();
|
||||
if ( ! preg_match(
|
||||
'/^[^\|\]\#]+/u',
|
||||
$userPart,
|
||||
&$userMatches
|
||||
) ) {
|
||||
// user link is invalid
|
||||
// print "I\tUser link invalid\t$userPart\n";
|
||||
// print "E\tCannot find user info to extract\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
$user = $userMatches[0];
|
||||
|
||||
if (
|
||||
! User::isIP($user) &&
|
||||
User::getCanonicalName($user) === false
|
||||
) {
|
||||
// Not a real username
|
||||
// print "E\tInvalid username\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
return User::getCanonicalName( $userMatches[0], false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a regular expression fragmentmatching characters that
|
||||
* can appear in a line after the signature.
|
||||
*
|
||||
* @return String regular expression fragment.
|
||||
*/
|
||||
static function getLineEndingRegex() {
|
||||
$ignoredEndings = array(
|
||||
'\s*',
|
||||
preg_quote('}'),
|
||||
preg_quote('{'),
|
||||
'\<[^\>]+\>',
|
||||
preg_quote('{{').'[^}]+'.preg_quote('}}'),
|
||||
);
|
||||
|
||||
$regex = '(?:' . implode( '|', $ignoredEndings ) . ')*';
|
||||
|
||||
return $regex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a regular expression that will match this wiki's
|
||||
* timestamps as given by ~~~~.
|
||||
*
|
||||
* @return String regular expression fragment.
|
||||
*/
|
||||
static function getTimestampRegex() {
|
||||
if ( self::$timestampRegex !== null ) {
|
||||
return self::$timestampRegex;
|
||||
}
|
||||
|
||||
// Step 1: Get an exemplar timestamp
|
||||
$title = Title::newMainPage();
|
||||
$user = User::newFromName('Test');
|
||||
$options = new ParserOptions;
|
||||
|
||||
global $wgParser;
|
||||
$exemplarTimestamp =
|
||||
$wgParser->preSaveTransform( '~~~~~', $title, $user, $options );
|
||||
|
||||
// Step 2: Generalise it
|
||||
// Trim off the timezone to replace at the end
|
||||
$output = $exemplarTimestamp;
|
||||
$tzRegex = '/\s*\(\w+\)\s*$/';
|
||||
$tzMatches = array();
|
||||
preg_match( $tzRegex, $output, &$tzMatches );
|
||||
$output = preg_replace( $tzRegex, '', $output );
|
||||
$output = preg_quote( $output, '/' );
|
||||
$output = preg_replace( '/[^\d\W]+/u', '[^\d\W]+', $output );
|
||||
$output = preg_replace( '/\d+/u', '\d+', $output );
|
||||
|
||||
$output .= preg_quote($tzMatches[0]);
|
||||
|
||||
if ( ! preg_match( "/$output/u", $exemplarTimestamp ) ) {
|
||||
throw new MWException( "Timestamp regex does not match exemplar" );
|
||||
}
|
||||
|
||||
self::$timestampRegex = $output;
|
||||
|
||||
return $output;
|
||||
}
|
||||
}
|
87
testDiscussionParser.php
Normal file
87
testDiscussionParser.php
Normal file
|
@ -0,0 +1,87 @@
|
|||
<?php
|
||||
|
||||
require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
|
||||
? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
|
||||
: dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
|
||||
|
||||
class TestDiscussionParser extends Maintenance {
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
$this->mDescription = "Takes enwiki revision IDs and attempts to identify interested users";
|
||||
|
||||
$this->addArg( 'revisions', 'Revision IDs, separated by commas', true /*required*/ );
|
||||
}
|
||||
|
||||
public function execute() {
|
||||
$apiURL = 'http://en.wikipedia.org/w/api.php';
|
||||
|
||||
$revisions = explode( ',', $this->getArg(0) );
|
||||
|
||||
// Retrieve original revisions and their predecessors
|
||||
$requestData = array(
|
||||
'format' => 'php',
|
||||
'action' => 'query',
|
||||
'prop' => 'revisions',
|
||||
'revids' => implode( '|', $revisions ),
|
||||
);
|
||||
|
||||
$originalData = Http::post(
|
||||
$apiURL,
|
||||
array(
|
||||
'postData' => $requestData,
|
||||
)
|
||||
);
|
||||
|
||||
$data = unserialize( $originalData );
|
||||
|
||||
$pages = $data['query']['pages'];
|
||||
|
||||
foreach( $pages as $page ) {
|
||||
if ( count($page['revisions']) != 1 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$revid = $page['revisions'][0]['revid'];
|
||||
|
||||
$newRequest = array(
|
||||
'format' => 'php',
|
||||
'action' => 'query',
|
||||
'prop' => 'revisions',
|
||||
'titles' => $page['title'],
|
||||
'rvstartid' => $revid,
|
||||
'rvlimit' => 2,
|
||||
'rvprop' => 'ids|content|user',
|
||||
);
|
||||
|
||||
$newData = Http::post(
|
||||
$apiURL,
|
||||
array(
|
||||
'postData' => $newRequest,
|
||||
)
|
||||
);
|
||||
|
||||
$newData = unserialize( $newData );
|
||||
|
||||
$allData = $newData['query']['pages'];
|
||||
$pageData = array_shift( $allData );
|
||||
if ( count( $pageData['revisions'] ) == 2 ) {
|
||||
$revision1 = $pageData['revisions'][0];
|
||||
$revision2 = $pageData['revisions'][1];
|
||||
$oldText = trim($revision2['*']) . "\n";
|
||||
$newText = trim($revision1['*']) . "\n";
|
||||
} elseif ( count( $pageData['revisions'] ) == 1 ) {
|
||||
$revision1 = $pageData['revisions'][0];
|
||||
$newText = trim($revision1['*']) . "\n";
|
||||
$oldText = '';
|
||||
}
|
||||
|
||||
$user = $pageData['revisions'][0]['user'];
|
||||
|
||||
print "http://en.wikipedia.org/w/index.php?diff=prev&oldid=$revid\n";
|
||||
EchoDiscussionParser::getInterestedUsers($oldText, $newText, $user);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$maintClass = "TestDiscussionParser";
|
||||
require_once( RUN_MAINTENANCE_IF_MAIN );
|
Loading…
Reference in a new issue