Echo: Implement advanced notifications for vanilla talk pages.

Uses the class EchoDiscussionParser to understand actions taken on vanilla MediaWiki discussion pages. Currently notifies on these occasions: * A new comment is added to a discussion on your talk page or that you have participated in. * A new topic is added to your talk page. There are vague plans to expand to these classes of events: * Your comment is edited or removed. * A large section is moved to your talk page. and these classes of users: * Users watching discussion pages. Change-Id: Ie6cae76ed2e0ecf607059e39ac1aa480a275ec89
2024-11-27 17:20:40 +00:00 · 2012-07-27 15:16:19 -07:00 · 2012-07-27 15:16:19 -07:00 · 6ff170cada
parent 4f2d7b933e
commit 6ff170cada
9 changed files with 898 additions and 20 deletions
--- a/Echo.i18n.php
+++ b/Echo.i18n.php
@ -28,6 +28,8 @@ $messages['en'] = array(
 	// Notification
 	'notification-edit-talk-page' => '$2 {{GENDER:$1|edited}} your talk page $3',
 	'notification-edit' => '$2 {{GENDER:$1|edited}} $3 $4',
+	'notification-add-comment' => '$2 {{GENDER:$1|posted}} a comment to the discussion "$3" on $4',
+	'notification-add-talkpage-topic' => '$2 {{GENDER:$1|posted}} a new topic "$3" on $4',

 	'notification-edit-email-subject' => '{{SITENAME}} notification: $3 has been edited by $2',
 	'notification-edit-email-body' => 'Hello $5,
@ -90,6 +92,16 @@ $messages['qqq'] = array(
 * $2 is the username of the person who edited, formatted.
 * $3 is the page that was edited, formatted.
 * $4 is a diff link, possibly formatted.',
+	'notification-add-comment' => 'Format for displaying notifications of a comment being added to an existing discussion.
+$1 is the username of the person who edited, plain text. Can be used for GENDER.
+$2 is the username of the person who edited, formatted.
+$3 is the section title of the discussion.
+$4 is the page on which the discussion exists.',
+	'notification-add-talkpage-topic' => 'Format for displaying notifications of a new discussion being added
+$1 is the username of the person who edited, plain text. Can be used for GENDER.
+$2 is the username of the person who edited, formatted.
+$3 is the section title of the discussion.
+$4 is the page on which the discussion was added.',
 	'echo-email-subject-default' => 'Default subject for Echo email notifications',
 	'echo-email-body-default' => 'Default message content for Echo email notifications.
 * $1 is a plain text description of the notification.',
--- a/Echo.php
+++ b/Echo.php
@ -54,10 +54,12 @@ $wgAutoloadClasses['EchoNotification'] = "$dir/model/Notification.php";
 $wgAutoloadClasses['EchoNotificationFormatter'] = "$dir/formatters/NotificationFormatter.php";
 $wgAutoloadClasses['EchoBasicFormatter'] = "$dir/formatters/BasicFormatter.php";
 $wgAutoloadClasses['EchoEditFormatter'] = "$dir/formatters/EditFormatter.php";
+$wgAutoloadClasses['EchoCommentFormatter'] = "$dir/formatters/CommentFormatter.php";

 // Internal stuff
 $wgAutoloadClasses['EchoNotifier'] = "$dir/Notifier.php";
 $wgAutoloadClasses['EchoNotificationController'] = "$dir/controller/NotificationController.php";
+$wgAutoloadClasses['EchoDiscussionParser'] = "$dir/includes/DiscussionParser.php";

 // Job queue
 $wgAutoloadClasses['EchoNotificationJob'] = "$dir/jobs/NotificationJob.php";
@ -162,4 +164,14 @@ $wgEchoNotificationFormatters = array(
 		'email-subject-message' => 'notification-edit-email-subject',
 		'email-body-message' => 'notification-edit-email-body',
 	),
+	'add-comment' => array(
+		'type' => 'comment',
+		'message-key' => 'notification-add-comment',
+		'message-params' => array( 'agent', 'subject', 'title' ),
+	),
+	'add-talkpage-topic' => array(
+		'type' => 'comment',
+		'message-key' => 'notification-add-talkpage-topic',
+		'message-params' => array( 'agent', 'subject', 'title' ),
+	),
 );
--- a/Hooks.php
+++ b/Hooks.php
@ -12,6 +12,8 @@ class EchoHooks {
 			"$dir/db_patches/patch-event_agent-split.sql", true );
 		$updater->modifyField( 'echo_event', 'event_variant',
 			"$dir/db_patches/patch-event_variant_nullability.sql", true );
+		$updater->modifyField( 'echo_event', 'event_extra',
+			"$dir/db_patches/patch-event_extra-size.sql", true );
 		return true;
 	}

@ -29,6 +31,21 @@ class EchoHooks {
 					$users[$user->getId()] = $user;
 				}
 			break;
+			case 'add-comment':
+			case 'add-talkpage-topic':
+				$extraData = $event->getExtra();
+
+				if ( !isset( $extraData['revid'] ) || !$extraData['revid'] ) {
+					break;
+				}
+
+				$revision = Revision::newFromId( $extraData['revid'] );
+
+				$users = array_merge(
+					$users,
+					EchoDiscussionParser::getNotifiedUsersForComment($revision)
+				);
+			break;
 		}

 		return true;
@ -90,27 +107,20 @@ class EchoHooks {
 	}

 	public static function onArticleSaved( &$article, &$user, $text, $summary, $minoredit, $watchthis, $sectionanchor, &$flags, $revision, &$status ) {	
-		if ( $revision ) {
-			$event = EchoEvent::create( array(
-				'type' => 'edit',
-				'title' => $article->getTitle(),
-				'extra' => array('revid' => $revision->getID()),
-				'agent' => $user,
-			) );
+		if ( !$revision ) {
+			return true;
+		}

-			$possibleUser = $article->getTitle()->getText();
+		$event = EchoEvent::create( array(
+			'type' => 'edit',
+			'title' => $article->getTitle(),
+			'extra' => array('revid' => $revision->getID()),
+			'agent' => $user,
+		) );

-			if (
-				$article->getTitle()->getNamespace() === NS_USER_TALK &&
-				User::newFromName($possibleUser)->getID()
-			) {
-				$event = EchoEvent::create( array(
-					'type' => 'edit-user-talk',
-					'title' => $article->getTitle(),
-					'extra' => array('revid' => $revision->getID()),
-					'agent' => $user,
-				) );
-			}
+
+		if ( $article->getTitle()->isTalkPage() ) {
+			EchoDiscussionParser::generateEventsForRevision( $revision );
 		}

 		return true;
--- a/db_patches/patch-event_extra-size.sql
+++ b/db_patches/patch-event_extra-size.sql
@ -0,0 +1,2 @@
+-- Patch to add extra space to event_extra
+alter table /*_*/echo_event change column event_extra event_extra BLOB NULL;
--- a/echo.sql
+++ b/echo.sql
@ -21,7 +21,7 @@ CREATE TABLE /*_*/echo_event (
 	event_agent_ip varchar(255) binary null, -- IP address who triggered it, if any
 	event_page_namespace int unsigned null,
 	event_page_title varchar(255) binary null,
-	event_extra TINYBLOB NULL
+	event_extra BLOB NULL
 ) /*$wgDBTableOptions*/;

 CREATE INDEX /*i*/type_page ON /*_*/echo_event (event_type,event_page_namespace,event_page_title,event_timestamp);
--- a/formatters/CommentFormatter.php
+++ b/formatters/CommentFormatter.php
@ -0,0 +1,29 @@
+<?php
+
+class EchoCommentFormatter extends EchoEditFormatter {
+	protected function processParam( $event, $param, $message, $user ) {
+		$extra = $event->getExtra();
+		if ( $param === 'subject' ) {
+			if ( isset( $extra['section-title'] ) && $extra['section-title'] ) {
+				$message->params( $extra['section-title'] );
+			} else {
+				$message->params( '' );
+			}
+		} elseif ( $param === 'commentText' ) {
+			global $wgLang; // Message::language is protected :(
+
+			if ( isset( $extra['content'] ) && $extra['content'] ) {
+				$content = $extra['content'];
+
+				$content = EchoDiscussionParser::stripHeader( $content );
+				$content = $wgLang->truncate( $content, 200 );
+
+				$message->params( $content );
+			} else {
+				$message->params( '' );
+			}
+		} else {
+			parent::processParam( $event, $param, $message, $user );
+		}
+	}
+}
--- a/formatters/NotificationFormatter.php
+++ b/formatters/NotificationFormatter.php
@ -4,6 +4,7 @@ abstract class EchoNotificationFormatter {
 	static $formatterClasses = array(
 		'basic' => 'EchoBasicFormatter',
 		'edit' => 'EchoEditFormatter',
+		'comment' => 'EchoCommentFormatter',
 	);
 	protected $validOutputFormats = array('text', 'html', 'email');
 	protected $outputFormat = 'text';
--- a/includes/DiscussionParser.php
+++ b/includes/DiscussionParser.php
@ -0,0 +1,725 @@
+<?php
+
+abstract class EchoDiscussionParser {
+	static $timestampRegex;
+	static $headerRegex = '^\=\=\s*([^=].*)\s*\=\=$';
+	static $revisionInterpretationCache = array();
+
+	/**
+	 * Given a Revision object, generates EchoEvent objects for
+	 * the discussion-related actions that occurred in that Revision.
+	 *
+	 * @param $revision Revision object
+	 * @return null
+	 */
+	static function generateEventsForRevision( $revision ) {
+		$interpretation = self::getChangeInterpretationForRevision( $revision );
+		$createdEvents = false;
+		$title = $revision->getTitle();
+
+		$userID = $revision->getUser();
+		$userName = $revision->getUserText();
+		$user = $userID != 0 ? User::newFromId( $userID ) : User::newFromName( $userName, false );
+
+		foreach( $interpretation as $action ) {
+			if ( $action['type'] == 'add-comment' ) {
+				$fullSection = $action['full-section'];
+				$header = self::extractHeader( $fullSection );
+
+				EchoEvent::create( array(
+					'type' => 'add-comment',
+					'title' => $title,
+					'extra' => array(
+						'revid' => $revision->getID(),
+						'section-title' => $header,
+						'content' => $action['content'],
+					),
+					'agent' => $user,
+				) );
+				$createdEvents = true;
+			} elseif ( $action['type'] == 'new-section-with-comment' ) {
+				$content = $action['content'];
+				$header = self::extractHeader( $content );
+				EchoEvent::create( array(
+					'type' => 'add-talkpage-topic',
+					'title' => $title,
+					'extra' => array(
+						'revid' => $revision->getID(),
+						'section-title' => $header,
+						'content' => $content,
+					),
+					'agent' => $user,
+				) );
+				$createdEvents = true;
+			}
+		}
+
+		if ( !$createdEvents && $title->getNamespace() == NS_USER_TALK ) {
+			$user = User::newFromName( $revision->getText() );
+			if ( $user && $user->getID() ) {
+				$event = EchoEvent::create( array(
+					'type' => 'edit-user-talk',
+					'title' => $article->getTitle(),
+					'extra' => array('revid' => $revision->getID()),
+					'agent' => $user,
+				) );
+			}
+		}
+	}
+
+	/**
+	 * Given a Revision object, determines which users are interested
+	 * in related EchoEvents.
+	 *
+	 * @param $revision Revision object.
+	 * @return Array of User objects
+	 */
+	static function getNotifiedUsersForComment( $revision ) {
+		$interpretation = self::getChangeInterpretationForRevision( $revision );
+		$users = array();
+
+		foreach( $interpretation as $action ) {
+			if ( $action['type'] == 'add-comment' ) {
+				$fullSection = $action['full-section'];
+				$interestedUsers = array_keys( self::extractSignatures( $fullSection ) );
+
+				foreach( $interestedUsers as $userName ) {
+					$user = User::newFromName( $userName );
+
+					// Deliberately ignoring anonymous users
+					if ( $user && $user->getID() ) {
+						$users[$user->getID()] = $user;
+					}
+				}
+			}
+		}
+
+		if ( $revision->getTitle()->getNamespace() == NS_USER_TALK ) {
+			$userName = $revision->getTitle()->getText();
+			$user = User::newFromName( $userName );
+
+			if ( $user ) {
+				$users[$user->getID()] = $user;
+			}
+		}
+
+		return $users;
+	}
+
+	/**
+	 * Given a Revision object, returns a talk-page-centric interpretation
+	 * of the changes made in it.
+	 *
+	 * @param $revision Revision object
+	 * @see EchoDiscussionParser::interpretDiff
+	 * @return Array, see interpretDiff for details.
+	 */
+	static function getChangeInterpretationForRevision( $revision ) {
+		if ( $revision->getID() && isset( self::$revisionInterpretationCache[$revision->getID()] ) ) {
+			return self::$revisionInterpretationCache[$revision->getID()];
+		}
+
+		$userID = $revision->getUser();
+		$userName = $revision->getUserText();
+		$user = $userID != 0 ? User::newFromId( $userID ) : User::newFromName( $userName, false );
+		$prevRevision = $revision->getPrevious();
+
+		$changes = self::getMachineReadableDiff( $prevRevision->getText(), $revision->getText() );
+		$output = self::interpretDiff( $changes, $user );
+
+		self::$revisionInterpretationCache[$revision->getID()] = $output;
+		return $output;
+	}
+
+	/**
+	 * Given a machine-readable diff, interprets the changes
+	 * in terms of discussion page actions
+	 *
+	 * @todo Expand recognisable actions.
+	 * @param $changes Output of EchoEvent::getMachineReadableDiff
+	 * @return Array of associative arrays.
+	 * Each entry represents an action, which is classified in the 'action' field.
+	 * All types contain a 'content' field except 'unknown'
+	 *  (which instead passes through the machine-readable diff in 'details')
+	 *  and 'unknown-change' (which provides 'new_content' and 'old_content')
+	 * action may be:
+	 * - add-comment: A comment signed by the user is added to an
+	 *    existing section.
+	 * - new-section-with-comment: A new section is added, containing
+	 *    a single comment signed by the user in question.
+	 * - unknown-signed-addition: Some signed content is added, but it
+	 *    includes section headers, is signed by another user or
+	 *    otherwise confuses the interpretation engine.
+	 * - unknown-multi-signed-addition: Some signed content is added,
+	 *    but it contains multiple signatures.
+	 * - unknown-unsigned-addition: Some content is added, but it is
+	 *    unsigned.
+	 * - unknown-subtraction: Some content was removed. These actions are
+	 *    not currently analysed.
+	 * - unknown-change: Some content was replaced with other content.
+	 *    These actions are not currently analysed.
+	 * - unknown: Unrecognised change type.
+	 */
+	static function interpretDiff( $changes, $user ) {
+		// One extra item in $changes for _info
+		$actions = array();
+
+		foreach( $changes as $index => $change ) {
+			if ( !is_numeric( $index ) ) {
+				continue;
+			}
+
+			if ( ! $change['action'] ) {
+				// Unknown action; skip
+				continue;
+			}
+			if ( $change['action'] == 'add' ) {
+				$content = trim( $change['content'] );
+				$startSection = preg_match( "/\A" . self::$headerRegex.'/um', $content );
+				$sectionCount = self::getSectionCount( $content );
+				$signedUsers = array_keys( self::extractSignatures( $content ) );
+
+				if (
+					count( $signedUsers ) == 1 &&
+					in_array( $user, $signedUsers )
+				) {
+					if ( $sectionCount === 0 ) {
+						$fullSection = self::getFullSection( $changes['_info']['rhs'], $change['right-pos'] );
+						$actions[] = array(
+							'type' => 'add-comment',
+							'content' => $content,
+							'full-section' => $fullSection,
+						);
+					} elseif ( $startSection && $sectionCount === 1 ) {
+						$actions[] = array(
+							'type' => 'new-section-with-comment',
+							'content' => $content,
+						);
+					} else {
+						$actions[] = array(
+							'type' => 'unknown-signed-addition',
+							'content' => $content,
+						);
+					}
+				} elseif ( count( $signedUsers ) >= 1 ) {
+					$actions[] = array(
+						'type' => 'unknown-multi-signed-addition',
+						'content' => $content,
+					);
+				} else {
+					$actions[] = array(
+						'type' => 'unknown-unsigned-addition',
+						'content' => $content,
+					);
+				}
+			} elseif ( $change['action'] == 'subtract' ) {
+				$actions[] = array(
+					'type' => 'unknown-subtraction',
+					'content' => $change['content'],
+				);
+			} elseif ( $change['action'] == 'change' ) {
+				$actions[] = array(
+					'type' => 'unknown-change',
+					'old_content' => $change['old_content'],
+					'new_content' => $change['new_content'],
+				);
+			} else {
+				$actions[] = array(
+					'type' => 'unknown',
+					'details' => $change,
+				);
+			}
+		}
+
+		// $actions['_diff'] = $changes;
+		// unset( $actions['_diff']['_info'] );
+
+		return $actions;
+	}
+
+	/**
+	 * Finds the section that a given line is in.
+	 *
+	 * @param $lines Array of lines in the page.
+	 * @param $offset The line to find the full section for.
+	 * @return Content of the section, as a string.
+	 */
+	static function getFullSection( $lines, $offset ) {
+		$content = $lines[$offset];
+		$headerRegex = '/' . self::$headerRegex . '/um';
+
+		// Expand backwards...
+		$continue = ! preg_match( $headerRegex, $lines[$offset] );
+		$i = $offset;
+		while ( $continue && $i > 0 ) {
+			--$i;
+			$line = $lines[$i];
+			$content = "$line\n$content";
+			if ( preg_match( $headerRegex, $line ) ) {
+				$continue = false;
+			}
+		}
+
+		// And then forwards...
+
+		$continue = true;
+		$i = $offset;
+		while ( $continue && $i < count($lines) - 1 ) {
+			++$i;
+			$line = $lines[$i];
+			if ( preg_match( $headerRegex, $line ) ) {
+				$continue = false;
+			} else {
+				$content .= "\n$line";
+			}
+		}
+
+		return $content;
+	}
+
+	/**
+	 * Gets the number of section headers in a string.
+	 *
+	 * @param $text The text, as a string.
+	 * @return Number of section headers found.
+	 */
+	static function getSectionCount( $text ) {
+		$text = trim($text);
+
+		$matches = array();
+		preg_match_all( '/'.self::$headerRegex.'/um', $text, &$matches );
+
+		return count( $matches[0] );
+	}
+
+	/**
+	 * Gets the title of a section
+	 *
+	 * @param $text The text of the section, as a string.
+	 * @return The title of the section, as a string.
+	 */
+	static function extractHeader( $text ) {
+		$text = trim($text);
+
+		$matches = array();
+
+		if ( !preg_match( '/'.self::$headerRegex.'/um', $text, &$matches ) ) {
+			return false;
+		}
+
+		return trim( $matches[1] );
+	}
+
+	/**
+	 * Strips out a section header
+	 * @param $text The text to strip out the section header from.
+	 * @return String: The same text, with the section header stripped out.
+	 */
+	static function stripHeader( $text ) {
+		$text = preg_replace( '/'.self::$headerRegex.'/um', '', $text );
+
+		return $text;
+	}
+
+	/**
+	 * Determines whether the input is a signed comment.
+	 *
+	 * @param $text The text to check.
+	 * @param $user If set, will only return true if the comment is
+	 *  signed by this user.
+	 * @return boolean: true or false.
+	 */
+	static function isSignedComment( $text, $user = false ) {
+		$timestampRegex = self::getTimestampRegex();
+		$endOfLine = self::getLineEndingRegex();
+		$tsMatches = array();
+		if ( ! preg_match(
+				"/$timestampRegex$endOfLine/mu",
+				$line,
+				$tsMatches,
+				PREG_OFFSET_CAPTURE
+		) ) {
+			return false;
+		}
+
+		$userData = self::getUserFromLine( $line, $tsMatches[0][0] );
+
+		if ( $userData === false ) {
+			return false;
+		} elseif ( $user === false ) {
+			return true;
+		}
+
+		list( $signaturePos, $foundUser ) = $userData;
+
+		return User::getCanonicalName( $foundUser, false ) === User::getCanonicalName( $user, false );
+	}
+
+	/**
+	 * Finds differences between $oldText and $newText
+	 * and returns the result in a machine-readable format.
+	 *
+	 * @param $oldText The "left hand side" of the diff.
+	 * @param $newText The "right hand side" of the diff.
+	 * @return Array of changes.
+	 * Each change consists of:
+	 * * An 'action', one of:
+	 *   - add
+	 *   - subtract
+	 *   - change
+	 * * 'content' that was added or removed, or in the case
+	 *    of a change, 'old_content' and 'new_content'
+	 * * 'left_pos' and 'right_pos' (in lines) of the change.
+	 */
+	static function getMachineReadableDiff( $oldText, $newText ) {
+		$oldText = trim($oldText)."\n";
+		$newText = trim($newText)."\n";
+		$diff = wfDiff( $oldText, $newText, '-u -w' );
+
+		$old_lines = explode("\n", $oldText);
+		$new_lines = explode("\n", $newText);
+
+		// First break down the diff into additions and subtractions
+		$diff_lines = explode( "\n", $diff );
+		$left_pos = 0;
+		$right_pos = 0;
+		$changes = array();
+		$change_run = false;
+		$sub_lines = 0;
+
+		for( $i = 0; $i < count( $diff_lines ); ++$i ) {
+			$line = $diff_lines[$i];
+
+			if ( strlen($line) == 0 ) {
+				continue;
+			}
+
+			$line_type = $line[0];
+
+			if ( $line_type == ' ' ) {
+				++$left_pos;
+				++$right_pos;
+			} elseif ( $line_type == '@' ) {
+				list($at, $lhs_pos, $rhs_pos, $at) = explode( ' ', $line );
+				$lhs_pos = substr( $lhs_pos, 1 );
+				$rhs_pos = substr( $rhs_pos, 1 );
+				list( $left_pos ) = explode( ',', $lhs_pos );
+				list( $right_pos ) = explode( ',', $rhs_pos );
+				$change_run = false;
+			} elseif ( $line_type == '-' ) {
+				$subtracted_line = substr( $line, 1 );
+
+				if ( trim( $subtracted_line ) === '' ) {
+					++$left_pos;
+					continue;
+				}
+
+				if ( $change_run && $changes[$change_run]['action'] == 'subtract' ) {
+					++$sub_lines;
+					$changes[$change_run]['content'] .= "\n" . $subtracted_line;
+				} else {
+					$sub_lines = 1;
+					$changes[] = array(
+						'action' => 'subtract',
+						'left-pos' => $left_pos,
+						'right-pos' => $right_pos,
+						'content' => $subtracted_line,
+					);
+					$change_run = count($changes)-1;
+				}
+
+				// Consistency check
+				if ( $old_lines[$left_pos-1] != $subtracted_line ) {
+					throw new MWException( "Left offset consistency error.\nOffset: $right_pos\nExpected: {$old_lines[$left_pos-1]}\nActual: $subtracted_line" );
+				}
+				++$left_pos;
+			} elseif ( $line_type == '+' ) {
+				$added_line = substr( $line, 1 );
+
+				if ( $change_run !== false && $changes[$change_run]['action'] == 'add' ) {
+					$changes[$change_run]['content'] .= "\n" . $added_line;
+				} elseif ( $change_run !== false && $changes[$change_run]['action'] == 'subtract' ) {
+					$changes[$change_run]['action'] = 'change';
+					$changes[$change_run]['old_content'] = $changes[$change_run]['content'];
+					$changes[$change_run]['new_content'] = $added_line;
+					unset( $changes[$change_run]['content'] );
+				} elseif ( $change_run !== false && $changes[$change_run]['action'] == 'change' && $sub_lines > 0 ) {
+					--$sub_lines;
+					$changes[$change_run]['new_content'] .= "\n" . $added_line;
+				} else {
+					$changes[] = array(
+						'action' => 'add',
+						'left-pos' => $left_pos,
+						'right-pos' => $right_pos,
+						'content' => $added_line,
+					);
+					$change_run = count($changes)-1;
+				}
+
+				// Consistency check
+				if ( $new_lines[$right_pos-1] != $added_line ) {
+					throw new MWException( "Right offset consistency error.\nOffset: $right_pos\nExpected: {$new_lines[$right_pos-1]}\nActual: $added_line\n" );
+				}
+				++$right_pos;
+			}
+		}
+
+		$changes['_info'] = array(
+			'lhs-length' => count($old_lines),
+			'rhs-length' => count($new_lines),
+			'lhs' => $old_lines,
+			'rhs' => $new_lines,
+		);
+
+		return $changes;
+	}
+
+	/**
+	 * Finds and extracts signatures in $text
+	 *
+	 * @param $text The text in which to look for signed comments.
+	 * @return Associative array. The key is the username, the value
+	 *  is the last signature that was found.
+	 */
+	static function extractSignatures( $text ) {
+		global $wgContLang;
+
+		$lines = explode( "\n", $text );
+		$timestampRegex = self::getTimestampRegex();
+		$endOfLine = self::getLineEndingRegex();
+
+		$output = array();
+
+		$lineNumber = 0;
+
+		foreach( $lines as $line ) {
+			++$lineNumber;
+			$tsMatches = array();
+			if ( ! preg_match(
+					"/$timestampRegex$endOfLine/mu",
+					$line,
+					$tsMatches,
+					PREG_OFFSET_CAPTURE
+			) ) {
+				// Ignore lines that don't finish with a timestamp
+				// print "I\tNo timestamp\n";
+				// print "$line\n";
+				continue;
+			}
+
+			// Now that we know we have a timestamp, look for
+			// the last user link on the line.
+			$userData = self::getUserFromLine( $line, $tsMatches[0][0] );
+			if ( $userData === false ) {
+				// print "F\t$lineNumber\t$line\n";
+				continue;
+			} else {
+				// print "S\t$lineNumber\n";
+			}
+
+			list( $signaturePos, $user ) = $userData;
+
+			$signature = substr( $line, $signaturePos );
+			$output[$user] = $signature;
+		}
+
+		return $output;
+	}
+
+	/**
+	 * From a line in a wiki page, determine which user, if any,
+	 *  has signed it.
+	 *
+	 * @param $line The line, as a string.
+	 * @param $timestampPos The offset of the start of the timestamp.
+	 * @return false for none, Array for success.
+	 * - First element is the position of the signature.
+	 * - Second element is the normalised user name.
+	 */
+	static function getUserFromLine( $line, $timestampPos ) {
+		global $wgContLang;
+		$possiblePrefixes = array( // Later entries have a higher precedence
+			'[[' . $wgContLang->getNsText( NS_USER ) . ':',
+			'[[' . $wgContLang->getNsText( NS_USER_TALK ) . ':',
+			'[[' . SpecialPage::getTitleFor('Contributions')->getPrefixedText() . '/',
+		);
+
+		foreach( $possiblePrefixes as $prefix ) {
+			if ( strpos( $prefix, '_' ) !== false ) {
+				$possiblePrefixes[] = str_replace( '_', ' ', $prefix );
+			}
+		}
+
+		$winningUser = false;
+		$winningPos = false;
+
+		// Look for the leftmost link to the rightmost user
+		foreach( $possiblePrefixes as $prefix ) {
+			$output = self::getLinkFromLine( $line, $prefix );
+
+			if ( $output === false ) {
+				continue;
+			} else {
+				list( $pos, $user ) = $output;
+			}
+
+			// Couldn't be a signature
+			if ( ( $timestampPos - $pos ) > 255 ) {
+				continue;
+			}
+
+			if (
+				$winningPos === false ||
+				( $pos > $winningPos && $user !== $winningUser ) ||
+				(
+					$pos < $winningPos &&
+					$user === $winningUser
+				)
+			) {
+				$winningPos = $pos;
+				$winningUser = ucfirst( trim( $user ) );
+			}
+		}
+
+		if ( $winningUser === false ) {
+			// print "E\tNo winning user\n";
+			return false;
+		}
+
+		return array( $winningPos, $winningUser );
+	}
+
+	/**
+	 * Find the last link beginning with a given prefix on a line.
+	 *
+	 * @param $line The line to search, as a string.
+	 * @param $linkPrefix The prefix to search for.
+	 * @return false for failure, array for success.
+	 * - First element is the string offset of the link.
+	 * - Second element is the user the link refers to.
+	 */
+	static function getLinkFromLine( $line, $linkPrefix, $failureOffset = false ) {
+		$offset = 0;
+
+		// If extraction failed at another offset, try again.
+		if ( $failureOffset !== false ) {
+			$offset = $failureOffset - strlen( $line ) - 1;
+		}
+
+		$linkPos = strripos( $line, $linkPrefix, $offset );
+
+		if ( $linkPos === false ) {
+			// print "I\tNo match for $linkPrefix\n";
+			return false;
+		}
+
+		$linkUser = self::extractUserFromLink( $line, $linkPrefix, $linkPos );
+
+		if ( $linkUser === false ) {
+			// print "E\tExtraction failed\t$linkPrefix\n";
+			// Look for another place.
+			return self::getLinkFromLine( $line, $linkPrefix, $linkPos );
+		} else {
+			return array( $linkPos, $linkUser );
+		}
+	}
+
+	/**
+	 * Given text including a link, gives the user that that link refers to
+	 *
+	 * @param $text The text to extract from.
+	 * @param $prefix The link prefix that was used to find the link.
+	 * @param $offset Optionally, the offset of the start of the link.
+	 * @return type description
+	 */
+	static function extractUserFromLink( $text, $prefix, $offset = 0 ) {
+		$userPart = substr( $text, strlen($prefix) + $offset );
+
+		$userMatches = array();
+		if ( ! preg_match(
+			'/^[^\|\]\#]+/u',
+			$userPart,
+			&$userMatches
+		) ) {
+			// user link is invalid
+			// print "I\tUser link invalid\t$userPart\n";
+			// print "E\tCannot find user info to extract\n";
+			return false;
+		}
+
+		$user = $userMatches[0];
+
+		if (
+			! User::isIP($user) &&
+			User::getCanonicalName($user) === false
+		) {
+			// Not a real username
+			// print "E\tInvalid username\n";
+			return false;
+		}
+
+		return User::getCanonicalName( $userMatches[0], false );
+	}
+
+	/**
+	 * Gets a regular expression fragmentmatching characters that
+	 * can appear in a line after the signature.
+	 * 
+	 * @return String regular expression fragment.
+	 */
+	static function getLineEndingRegex() {
+		$ignoredEndings = array(
+			'\s*',
+			preg_quote('}'),
+			preg_quote('{'),
+			'\<[^\>]+\>',
+			preg_quote('{{').'[^}]+'.preg_quote('}}'),
+		);
+
+		$regex = '(?:' . implode( '|', $ignoredEndings ) . ')*';
+
+		return $regex;
+	}
+
+	/**
+	 * Gets a regular expression that will match this wiki's
+	 * timestamps as given by ~~~~.
+	 * 
+	 * @return String regular expression fragment.
+	 */
+	static function getTimestampRegex() {
+		if ( self::$timestampRegex !== null ) {
+			return self::$timestampRegex;
+		}
+
+		// Step 1: Get an exemplar timestamp
+		$title = Title::newMainPage();
+		$user = User::newFromName('Test');
+		$options = new ParserOptions;
+
+		global $wgParser;
+		$exemplarTimestamp =
+			$wgParser->preSaveTransform( '~~~~~', $title, $user, $options );
+
+		// Step 2: Generalise it
+		// Trim off the timezone to replace at the end
+		$output = $exemplarTimestamp;
+		$tzRegex = '/\s*\(\w+\)\s*$/';
+		$tzMatches = array();
+		preg_match( $tzRegex, $output, &$tzMatches );
+		$output = preg_replace( $tzRegex, '', $output );
+		$output = preg_quote( $output, '/' );
+		$output = preg_replace( '/[^\d\W]+/u', '[^\d\W]+', $output );
+		$output = preg_replace( '/\d+/u', '\d+', $output );
+
+		$output .= preg_quote($tzMatches[0]);
+
+		if ( ! preg_match( "/$output/u", $exemplarTimestamp ) ) {
+			throw new MWException( "Timestamp regex does not match exemplar" );
+		}
+
+		self::$timestampRegex = $output;
+
+		return $output;
+	}
+}
--- a/testDiscussionParser.php
+++ b/testDiscussionParser.php
@ -0,0 +1,87 @@
+<?php
+
+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
+	? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
+	: dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+
+class TestDiscussionParser extends Maintenance {
+	public function __construct() {
+		parent::__construct();
+		$this->mDescription = "Takes enwiki revision IDs and attempts to identify interested users";
+
+		$this->addArg( 'revisions', 'Revision IDs, separated by commas', true /*required*/ );
+	}
+
+	public function execute() {
+		$apiURL = 'http://en.wikipedia.org/w/api.php';
+
+		$revisions = explode( ',', $this->getArg(0) );
+
+		// Retrieve original revisions and their predecessors
+		$requestData = array(
+			'format' => 'php',
+			'action' => 'query',
+			'prop' => 'revisions',
+			'revids' => implode( '|', $revisions ),
+		);
+
+		$originalData = Http::post(
+			$apiURL,
+			array(
+				'postData' => $requestData,
+			)
+		);
+
+		$data = unserialize( $originalData );
+
+		$pages = $data['query']['pages'];
+
+		foreach( $pages as $page ) {
+			if ( count($page['revisions']) != 1 ) {
+				continue;
+			}
+
+			$revid = $page['revisions'][0]['revid'];
+
+			$newRequest = array(
+				'format' => 'php',
+				'action' => 'query',
+				'prop' => 'revisions',
+				'titles' => $page['title'],
+				'rvstartid' => $revid,
+				'rvlimit' => 2,
+				'rvprop' => 'ids|content|user',
+			);
+
+			$newData = Http::post(
+				$apiURL,
+				array(
+					'postData' => $newRequest,
+				)
+			);
+
+			$newData = unserialize( $newData );
+
+			$allData = $newData['query']['pages'];
+			$pageData = array_shift( $allData );
+			if ( count( $pageData['revisions'] ) == 2 ) {
+				$revision1 = $pageData['revisions'][0];
+				$revision2 = $pageData['revisions'][1];
+				$oldText = trim($revision2['*']) . "\n";
+				$newText = trim($revision1['*']) . "\n";
+			} elseif ( count( $pageData['revisions'] ) == 1 ) {
+				$revision1 = $pageData['revisions'][0];
+				$newText = trim($revision1['*']) . "\n";
+				$oldText = '';
+			}
+
+			$user = $pageData['revisions'][0]['user'];
+
+			print "http://en.wikipedia.org/w/index.php?diff=prev&oldid=$revid\n";
+			EchoDiscussionParser::getInterestedUsers($oldText, $newText, $user);
+		}
+	}
+}
+
+$maintClass = "TestDiscussionParser";
+require_once( RUN_MAINTENANCE_IF_MAIN );