mediawiki-extensions-Discus.../includes/Notifications/EventDispatcher.php

514 lines
18 KiB
PHP
Raw Normal View History

<?php
/**
* DiscussionTools event dispatcher
*
* @file
* @ingroup Extensions
* @license MIT
*/
namespace MediaWiki\Extension\DiscussionTools\Notifications;
use ChangeTags;
use DateInterval;
use DateTimeImmutable;
use DeferredUpdates;
use EchoEvent;
use Error;
use ExtensionRegistry;
use IDBAccessObject;
use Iterator;
use MediaWiki\Extension\DiscussionTools\ContentThreadItemSet;
use MediaWiki\Extension\DiscussionTools\Hooks\HookUtils;
use MediaWiki\Extension\DiscussionTools\SubscriptionItem;
use MediaWiki\Extension\DiscussionTools\SubscriptionStore;
use MediaWiki\Extension\DiscussionTools\ThreadItem\CommentItem;
use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentCommentItem;
use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentHeadingItem;
use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentThreadItem;
use MediaWiki\Extension\DiscussionTools\ThreadItem\HeadingItem;
use MediaWiki\Extension\EventLogging\EventLogging;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\User\UserIdentity;
use ParserOptions;
use RequestContext;
use Title;
use TitleValue;
use Wikimedia\Assert\Assert;
use Wikimedia\Parsoid\Utils\DOMCompat;
use Wikimedia\Parsoid\Utils\DOMUtils;
class EventDispatcher {
/**
* @param RevisionRecord $revRecord
* @return ContentThreadItemSet
*/
private static function getParsedRevision( RevisionRecord $revRecord ): ContentThreadItemSet {
$services = MediaWikiServices::getInstance();
$pageRecord = $services->getPageStore()->getPageById( $revRecord->getPageId() ) ?:
$services->getPageStore()->getPageById( $revRecord->getPageId(), IDBAccessObject::READ_LATEST );
Assert::postcondition( $pageRecord !== null, 'Revision had no page' );
// If the $revRecord was fetched from the primary database, this will also fetch the content
// from the primary database (using the same query flags)
$status = $services->getParserOutputAccess()->getParserOutput(
$pageRecord,
ParserOptions::newFromAnon(),
$revRecord
);
if ( !$status->isOK() ) {
throw new Error( 'Could not load revision for notifications' );
}
$title = TitleValue::newFromPage( $revRecord->getPage() );
$parserOutput = $status->getValue();
$html = $parserOutput->getText();
$doc = DOMUtils::parseHTML( $html );
$container = DOMCompat::getBody( $doc );
Change CommentParser into a service Goal: ----- To have a method like CommentParser::parse(), which just takes a node to parse and a title and returns plain data, so that we don't need to keep track of the config to construct a CommentParser object (the required config like content language is provided by services) and we don't need to keep that object around after parsing. Changes: -------- CommentParser.php: * …is now a service. Constructor only takes services as arguments. The node and title are passed to a new parse() method. * parse() should return plain data, but I split this part to a separate patch for ease of review: I49bfe019aa460651447fd383f73eafa9d7180a92. * CommentParser still cheats and accesses global state in a few places, e.g. calling Title::makeTitleSafe or CommentUtils::getTitleFromUrl, so we can't turn its tests into true unit tests. This work is left for future commits. LanguageData.php: * …is now a service, instead of a static class. Parser.js: * …is not a real service, but it's changed to behave in a similar way. Constructor takes only the required config as argument, and node and title are instead passed to a new parse() method. CommentParserTest.php: parser.test.js: * Can be simplified, now that we don't need a useless node and title to test internal methods that don't use them. testUtils.js: * Can be simplified, now that we don't need to override internal ResourceLoader stuff just to change the parser config. Change-Id: Iadb7757debe000025e52770ca51ebcf24ca8ee66
2022-02-19 02:43:21 +00:00
$parser = $services->getService( 'DiscussionTools.CommentParser' );
return $parser->parse( $container, $title );
}
/**
* @param array &$events
* @param RevisionRecord $newRevRecord
*/
public static function generateEventsForRevision( array &$events, RevisionRecord $newRevRecord ): void {
$services = MediaWikiServices::getInstance();
$revisionStore = $services->getRevisionStore();
$oldRevRecord = $revisionStore->getPreviousRevision( $newRevRecord, IDBAccessObject::READ_LATEST );
$title = Title::newFromLinkTarget(
$newRevRecord->getPageAsLinkTarget()
);
if ( !HookUtils::isAvailableForTitle( $title ) ) {
// Not a talk page
return;
}
$user = $newRevRecord->getUser();
if ( !$user ) {
// User can be null if the user is deleted, but this is unlikely
// to be the case if the user just made an edit
return;
}
if ( $oldRevRecord !== null ) {
$oldItemSet = static::getParsedRevision( $oldRevRecord );
} else {
// Page creation
$doc = DOMUtils::parseHTML( '' );
$container = DOMCompat::getBody( $doc );
$oldItemSet = $services->getService( 'DiscussionTools.CommentParser' )
->parse( $container, $title->getTitleValue() );
}
$newItemSet = static::getParsedRevision( $newRevRecord );
static::generateEventsFromItemSets( $events, $oldItemSet, $newItemSet, $newRevRecord, $title, $user );
}
/**
* For each level 2 heading, get a list of comments in the thread grouped by names, then IDs.
* (Compare by name first, as ID could be changed by a parent comment being moved/deleted.)
* Comments in level 3+ sub-threads are grouped together with the parent thread.
*
* For any other headings (including level 3+ before the first level 2 heading, level 1, and
* section zero placeholder headings), ignore comments in those threads.
*
* @param ContentThreadItem[] $items
* @return ContentCommentItem[][][]
*/
private static function groupCommentsByThreadAndName( array $items ): array {
$comments = [];
$threadName = null;
foreach ( $items as $item ) {
if ( $item instanceof HeadingItem && ( $item->getHeadingLevel() < 2 || $item->isPlaceholderHeading() ) ) {
$threadName = null;
} elseif ( $item instanceof HeadingItem && $item->getHeadingLevel() === 2 ) {
$threadName = $item->getName();
} elseif ( $item instanceof CommentItem && $threadName !== null ) {
$comments[ $threadName ][ $item->getName() ][ $item->getId() ] = $item;
}
}
return $comments;
}
/**
* Get a list of all subscribable headings, grouped by name in case there are duplicates.
*
* @param ContentHeadingItem[] $items
* @return ContentHeadingItem[][]
*/
private static function groupSubscribableHeadings( array $items ): array {
$headings = [];
foreach ( $items as $item ) {
if ( $item->isSubscribable() ) {
$headings[ $item->getName() ][ $item->getId() ] = $item;
}
}
return $headings;
}
/**
* Compare two lists of thread items, return those in $new but not $old.
*
* @param ContentThreadItem[][] $old
* @param ContentThreadItem[][] $new
* @return iterable<ContentThreadItem>
*/
private static function findAddedItems( array $old, array $new ) {
foreach ( $new as $itemName => $nameNewItems ) {
// Usually, there will be 0 or 1 $nameNewItems, and 0 $nameOldItems,
// and $addedCount will be 0 or 1.
//
// But when multiple replies are added in one edit, or in multiple edits within the same
// minute, there may be more, and the complex logic below tries to make the best guess
// as to which items are actually new. See the 'multiple' and 'sametime' test cases.
//
$nameOldItems = $old[ $itemName ] ?? [];
$addedCount = count( $nameNewItems ) - count( $nameOldItems );
if ( $addedCount > 0 ) {
// For any name that occurs more times in new than old, report that many new items,
// preferring IDs that did not occur in old, then preferring items lower on the page.
foreach ( array_reverse( $nameNewItems ) as $itemId => $newItem ) {
if ( $addedCount > 0 && !isset( $nameOldItems[ $itemId ] ) ) {
yield $newItem;
$addedCount--;
}
}
foreach ( array_reverse( $nameNewItems ) as $itemId => $newItem ) {
if ( $addedCount > 0 ) {
yield $newItem;
$addedCount--;
}
}
Assert::postcondition( $addedCount === 0, 'Reported expected number of items' );
}
}
}
/**
* Helper for generateEventsForRevision(), separated out for easier testing.
*
* @param array &$events
* @param ContentThreadItemSet $oldItemSet
* @param ContentThreadItemSet $newItemSet
* @param RevisionRecord $newRevRecord
* @param PageIdentity $title
* @param UserIdentity $user
*/
protected static function generateEventsFromItemSets(
array &$events,
ContentThreadItemSet $oldItemSet,
ContentThreadItemSet $newItemSet,
RevisionRecord $newRevRecord,
PageIdentity $title,
UserIdentity $user
): void {
$newComments = static::groupCommentsByThreadAndName( $newItemSet->getThreadItems() );
$oldComments = static::groupCommentsByThreadAndName( $oldItemSet->getThreadItems() );
$addedComments = [];
foreach ( $newComments as $threadName => $threadNewComments ) {
$threadOldComments = $oldComments[ $threadName ] ?? [];
foreach ( static::findAddedItems( $threadOldComments, $threadNewComments ) as $newComment ) {
Assert::precondition( $newComment instanceof ContentCommentItem, 'Must be ContentCommentItem' );
$addedComments[] = $newComment;
}
}
$newHeadings = static::groupSubscribableHeadings( $newItemSet->getThreads() );
$oldHeadings = static::groupSubscribableHeadings( $oldItemSet->getThreads() );
$removedHeadings = [];
// Pass swapped parameters to findAddedItems() to find *removed* items
foreach ( static::findAddedItems( $newHeadings, $oldHeadings ) as $oldHeading ) {
Assert::precondition( $oldHeading instanceof ContentHeadingItem, 'Must be ContentHeadingItem' );
$removedHeadings[] = $oldHeading;
}
$mentionedUsers = [];
foreach ( $events as &$event ) {
if ( $event['type'] === 'mention' || $event['type'] === 'mention-summary' ) {
// Save mentioned users in our events, so that we can exclude them from our notification,
// to avoid duplicate notifications for a single comment.
// Array is keyed by user id so we can do a simple array merge.
$mentionedUsers += $event['extra']['mentioned-users'];
}
if ( count( $addedComments ) === 1 ) {
// If this edit was a new user talk message according to Echo,
// and we also found exactly one new comment,
// add some extra information to the edit-user-talk event.
if ( $event['type'] === 'edit-user-talk' ) {
$event['extra'] += [
'comment-id' => $addedComments[0]->getId(),
'comment-name' => $addedComments[0]->getName(),
'content' => $addedComments[0]->getBodyText( true ),
];
}
// Similarly for mentions.
// We don't handle 'content' in this case, as Echo makes its own snippets.
if ( $event['type'] === 'mention' ) {
$event['extra'] += [
'comment-id' => $addedComments[0]->getId(),
'comment-name' => $addedComments[0]->getName(),
];
}
}
}
if ( $addedComments ) {
// It's a bit weird to do this here, in the middle of the hook handler for Echo. However:
// * Echo calls this from a PageSaveComplete hook handler as a DeferredUpdate,
// which is exactly how we would do this otherwise
// * It allows us to reuse the generated comment trees without any annoying caching
static::addCommentChangeTag( $newRevRecord );
// For very similar reasons, we do logging here
static::logAddedComments( $addedComments, $newRevRecord, $title, $user );
}
foreach ( $addedComments as $newComment ) {
// Ignore comments by other users, e.g. in case of reverts or a discussion being moved.
// TODO: But what about someone signing another's comment?
if ( $newComment->getAuthor() !== $user->getName() ) {
continue;
}
// Ignore comments which are more than 10 minutes old, as this may be a user archiving
// their own comment. (T290803)
$revTimestamp = new DateTimeImmutable( $newRevRecord->getTimestamp() );
$threshold = $revTimestamp->sub( new DateInterval( 'PT10M' ) );
if ( $newComment->getTimestamp() <= $threshold ) {
continue;
}
$heading = $newComment->getSubscribableHeading();
if ( !$heading ) {
continue;
}
$events[] = [
'type' => 'dt-subscribed-new-comment',
'title' => $title,
'extra' => [
'subscribed-comment-name' => $heading->getName(),
'comment-id' => $newComment->getId(),
'comment-name' => $newComment->getName(),
'content' => $newComment->getBodyText( true ),
'section-title' => $heading->getLinkableTitle(),
'revid' => $newRevRecord->getId(),
'mentioned-users' => $mentionedUsers,
],
'agent' => $user,
];
$titleForSubscriptions = Title::castFromPageIdentity( $title )->createFragmentTarget( $heading->getText() );
static::addAutoSubscription( $user, $titleForSubscriptions, $heading->getName() );
}
foreach ( $removedHeadings as $oldHeading ) {
$events[] = [
'type' => 'dt-removed-topic',
'title' => $title,
'extra' => [
'subscribed-comment-name' => $oldHeading->getName(),
'heading-id' => $oldHeading->getId(),
'heading-name' => $oldHeading->getName(),
'section-title' => $oldHeading->getLinkableTitle(),
'revid' => $newRevRecord->getId(),
],
'agent' => $user,
];
}
}
/**
* Add our change tag for a revision that adds new comments.
*
* @param RevisionRecord $newRevRecord
*/
protected static function addCommentChangeTag( RevisionRecord $newRevRecord ): void {
// Unclear if DeferredUpdates::addCallableUpdate() is needed,
// but every extension does it that way.
DeferredUpdates::addCallableUpdate( static function () use ( $newRevRecord ) {
ChangeTags::addTags( [ 'discussiontools-added-comment' ], null, $newRevRecord->getId() );
} );
}
/**
* Add an automatic subscription to the given item, assuming the user has automatic subscriptions
* enabled.
*
* @param UserIdentity $user
* @param Title $title
* @param string $itemName
*/
protected static function addAutoSubscription( UserIdentity $user, Title $title, string $itemName ): void {
$dtConfig = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'discussiontools' );
if (
$dtConfig->get( 'DiscussionToolsAutoTopicSubEditor' ) === 'any' &&
HookUtils::shouldAddAutoSubscription( $user, $title )
) {
$subscriptionStore = MediaWikiServices::getInstance()->getService( 'DiscussionTools.SubscriptionStore' );
$subscriptionStore->addAutoSubscriptionForUser( $user, $title, $itemName );
}
}
/**
* Return all users subscribed to a comment
*
* @param EchoEvent $event
* @param int $batchSize
* @return UserIdentity[]|Iterator<UserIdentity>
*/
public static function locateSubscribedUsers( EchoEvent $event, $batchSize = 500 ) {
$commentName = $event->getExtraParam( 'subscribed-comment-name' );
$subscriptionStore = MediaWikiServices::getInstance()->getService( 'DiscussionTools.SubscriptionStore' );
$subscriptionItems = $subscriptionStore->getSubscriptionItemsForTopic(
$commentName,
[ SubscriptionStore::STATE_SUBSCRIBED, SubscriptionStore::STATE_AUTOSUBSCRIBED ]
);
// Update notified timestamps
$subscriptionStore->updateSubscriptionNotifiedTimestamp(
null,
$commentName
);
// TODD: Have this return an Iterator instead?
$users = array_map( static function ( SubscriptionItem $item ) {
return $item->getUserIdentity();
}, $subscriptionItems );
return $users;
}
/**
* Log stuff to EventLogging's Schema:TalkPageEvent
* If you don't have EventLogging installed, does nothing.
*
* @param array $addedComments
* @param RevisionRecord $newRevRecord
* @param PageIdentity $title
* @param UserIdentity $identity
* @return bool Whether events were logged
*/
protected static function logAddedComments(
array $addedComments,
RevisionRecord $newRevRecord,
PageIdentity $title,
UserIdentity $identity
): bool {
global $wgDTSchemaEditAttemptStepOversample, $wgDBname;
$context = RequestContext::getMain();
$request = $context->getRequest();
// We've reached here through Echo's post-save deferredupdate, which
// might either be after an API request from DiscussionTools or a
// regular POST from WikiEditor. Both should have this value snuck
// into their request if their session is being logged.
if ( !$request->getCheck( 'editingStatsId' ) ) {
return false;
}
$editingStatsId = $request->getVal( 'editingStatsId' );
$isDiscussionTools = $request->getCheck( 'dttags' );
$extensionRegistry = ExtensionRegistry::getInstance();
if ( !$extensionRegistry->isLoaded( 'EventLogging' ) ) {
return false;
}
$inSample = static::inEventSample( $editingStatsId );
$shouldOversample = ( $isDiscussionTools && $wgDTSchemaEditAttemptStepOversample ) || (
$extensionRegistry->isLoaded( 'WikimediaEvents' ) &&
// @phan-suppress-next-line PhanUndeclaredClassMethod
\WikimediaEvents\WikimediaEventsHooks::shouldSchemaEditAttemptStepOversample( $context )
);
if ( !$inSample && !$shouldOversample ) {
return false;
}
$editTracker = MediaWikiServices::getInstance()
->getUserEditTracker();
$commonData = [
'$schema' => '/analytics/mediawiki/talk_page_edit/1.1.0',
'action' => 'publish',
'session_id' => $editingStatsId,
'page_id' => $newRevRecord->getPageId(),
'page_namespace' => $title->getNamespace(),
'revision_id' => $newRevRecord->getId() ?: 0,
'performer' => [
// Note: we're logging the user who made the edit, not the user who's signed on the comment
'user_id' => $identity->getId(),
'user_edit_count' => $editTracker->getUserEditCount( $identity ) ?: 0,
// Retention-safe values:
'user_is_anonymous' => !$identity->isRegistered(),
'user_edit_count_bucket' => \UserBucketProvider::getUserEditCountBucket( $identity ) ?: 'N/A',
],
'database' => $wgDBname,
// This is unreliable, but sufficient for our purposes; we
// mostly just want to see the difference between DT and
// everything-else:
'integration' => $isDiscussionTools ? 'discussiontools' : 'page',
];
foreach ( $addedComments as $comment ) {
$heading = $comment->getSubscribableHeading();
$parent = $comment->getParent();
if ( !$heading || !$parent ) {
continue;
}
if ( $parent->getType() === 'heading' ) {
if ( count( $heading->getReplies() ) === 1 ) {
// A new heading was added when this comment was created
$component_type = 'topic';
} else {
$component_type = 'comment';
}
} else {
$component_type = 'response';
}
EventLogging::submit( 'mediawiki.talk_page_edit', array_merge( $commonData, [
'component_type' => $component_type,
'topic_id' => $heading->getId(),
'comment_id' => $comment->getId(),
'comment_parent_id' => $parent->getId(),
] ) );
}
return true;
}
/**
* Should the current session be sampled for EventLogging?
*
* @param string $sessionId
* @return bool Whether to sample the session
*/
protected static function inEventSample( string $sessionId ): bool {
global $wgDTSchemaEditAttemptStepSamplingRate, $wgWMESchemaEditAttemptStepSamplingRate;
// Sample 6.25%
$samplingRate = 0.0625;
if ( isset( $wgDTSchemaEditAttemptStepSamplingRate ) ) {
$samplingRate = $wgDTSchemaEditAttemptStepSamplingRate;
}
if ( isset( $wgWMESchemaEditAttemptStepSamplingRate ) ) {
$samplingRate = $wgWMESchemaEditAttemptStepSamplingRate;
}
if ( $samplingRate === 0 ) {
return false;
}
$inSample = EventLogging::sessionInSample(
(int)( 1 / $samplingRate ), $sessionId
);
return $inSample;
}
}