mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/DiscussionTools
synced 2024-11-12 01:16:19 +00:00
50853a3db6
At the time of writing, $wgWMESchemaEditAttemptStepOversample is not a global variable in PHP [0]. [0] https://codesearch.wmcloud.org/search/?q=WMESchemaEditAttemptStepOversample Bug: T312016 Change-Id: I6a8abceddc9fce2f28f7a154fa2fc566538ed5e8
514 lines
18 KiB
PHP
514 lines
18 KiB
PHP
<?php
|
|
/**
|
|
* DiscussionTools event dispatcher
|
|
*
|
|
* @file
|
|
* @ingroup Extensions
|
|
* @license MIT
|
|
*/
|
|
|
|
namespace MediaWiki\Extension\DiscussionTools\Notifications;
|
|
|
|
use ChangeTags;
|
|
use DateInterval;
|
|
use DateTimeImmutable;
|
|
use DeferredUpdates;
|
|
use EchoEvent;
|
|
use Error;
|
|
use ExtensionRegistry;
|
|
use IDBAccessObject;
|
|
use Iterator;
|
|
use MediaWiki\Extension\DiscussionTools\ContentThreadItemSet;
|
|
use MediaWiki\Extension\DiscussionTools\Hooks\HookUtils;
|
|
use MediaWiki\Extension\DiscussionTools\SubscriptionItem;
|
|
use MediaWiki\Extension\DiscussionTools\SubscriptionStore;
|
|
use MediaWiki\Extension\DiscussionTools\ThreadItem\CommentItem;
|
|
use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentCommentItem;
|
|
use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentHeadingItem;
|
|
use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentThreadItem;
|
|
use MediaWiki\Extension\DiscussionTools\ThreadItem\HeadingItem;
|
|
use MediaWiki\Extension\EventLogging\EventLogging;
|
|
use MediaWiki\MediaWikiServices;
|
|
use MediaWiki\Page\PageIdentity;
|
|
use MediaWiki\Revision\RevisionRecord;
|
|
use MediaWiki\User\UserIdentity;
|
|
use ParserOptions;
|
|
use RequestContext;
|
|
use Title;
|
|
use TitleValue;
|
|
use Wikimedia\Assert\Assert;
|
|
use Wikimedia\Parsoid\Utils\DOMCompat;
|
|
use Wikimedia\Parsoid\Utils\DOMUtils;
|
|
|
|
class EventDispatcher {
|
|
/**
|
|
* @param RevisionRecord $revRecord
|
|
* @return ContentThreadItemSet
|
|
*/
|
|
private static function getParsedRevision( RevisionRecord $revRecord ): ContentThreadItemSet {
|
|
$services = MediaWikiServices::getInstance();
|
|
|
|
$pageRecord = $services->getPageStore()->getPageById( $revRecord->getPageId() ) ?:
|
|
$services->getPageStore()->getPageById( $revRecord->getPageId(), IDBAccessObject::READ_LATEST );
|
|
|
|
Assert::postcondition( $pageRecord !== null, 'Revision had no page' );
|
|
|
|
// If the $revRecord was fetched from the primary database, this will also fetch the content
|
|
// from the primary database (using the same query flags)
|
|
$status = $services->getParserOutputAccess()->getParserOutput(
|
|
$pageRecord,
|
|
ParserOptions::newFromAnon(),
|
|
$revRecord
|
|
);
|
|
if ( !$status->isOK() ) {
|
|
throw new Error( 'Could not load revision for notifications' );
|
|
}
|
|
|
|
$title = TitleValue::newFromPage( $revRecord->getPage() );
|
|
|
|
$parserOutput = $status->getValue();
|
|
$html = $parserOutput->getText();
|
|
|
|
$doc = DOMUtils::parseHTML( $html );
|
|
$container = DOMCompat::getBody( $doc );
|
|
$parser = $services->getService( 'DiscussionTools.CommentParser' );
|
|
return $parser->parse( $container, $title );
|
|
}
|
|
|
|
/**
|
|
* @param array &$events
|
|
* @param RevisionRecord $newRevRecord
|
|
*/
|
|
public static function generateEventsForRevision( array &$events, RevisionRecord $newRevRecord ): void {
|
|
$services = MediaWikiServices::getInstance();
|
|
|
|
$revisionStore = $services->getRevisionStore();
|
|
$oldRevRecord = $revisionStore->getPreviousRevision( $newRevRecord, IDBAccessObject::READ_LATEST );
|
|
|
|
$title = Title::newFromLinkTarget(
|
|
$newRevRecord->getPageAsLinkTarget()
|
|
);
|
|
if ( !HookUtils::isAvailableForTitle( $title ) ) {
|
|
// Not a talk page
|
|
return;
|
|
}
|
|
|
|
$user = $newRevRecord->getUser();
|
|
if ( !$user ) {
|
|
// User can be null if the user is deleted, but this is unlikely
|
|
// to be the case if the user just made an edit
|
|
return;
|
|
}
|
|
|
|
if ( $oldRevRecord !== null ) {
|
|
$oldItemSet = static::getParsedRevision( $oldRevRecord );
|
|
} else {
|
|
// Page creation
|
|
$doc = DOMUtils::parseHTML( '' );
|
|
$container = DOMCompat::getBody( $doc );
|
|
$oldItemSet = $services->getService( 'DiscussionTools.CommentParser' )
|
|
->parse( $container, $title->getTitleValue() );
|
|
}
|
|
$newItemSet = static::getParsedRevision( $newRevRecord );
|
|
|
|
static::generateEventsFromItemSets( $events, $oldItemSet, $newItemSet, $newRevRecord, $title, $user );
|
|
}
|
|
|
|
/**
|
|
* For each level 2 heading, get a list of comments in the thread grouped by names, then IDs.
|
|
* (Compare by name first, as ID could be changed by a parent comment being moved/deleted.)
|
|
* Comments in level 3+ sub-threads are grouped together with the parent thread.
|
|
*
|
|
* For any other headings (including level 3+ before the first level 2 heading, level 1, and
|
|
* section zero placeholder headings), ignore comments in those threads.
|
|
*
|
|
* @param ContentThreadItem[] $items
|
|
* @return ContentCommentItem[][][]
|
|
*/
|
|
private static function groupCommentsByThreadAndName( array $items ): array {
|
|
$comments = [];
|
|
$threadName = null;
|
|
foreach ( $items as $item ) {
|
|
if ( $item instanceof HeadingItem && ( $item->getHeadingLevel() < 2 || $item->isPlaceholderHeading() ) ) {
|
|
$threadName = null;
|
|
} elseif ( $item instanceof HeadingItem && $item->getHeadingLevel() === 2 ) {
|
|
$threadName = $item->getName();
|
|
} elseif ( $item instanceof CommentItem && $threadName !== null ) {
|
|
$comments[ $threadName ][ $item->getName() ][ $item->getId() ] = $item;
|
|
}
|
|
}
|
|
return $comments;
|
|
}
|
|
|
|
/**
|
|
* Get a list of all subscribable headings, grouped by name in case there are duplicates.
|
|
*
|
|
* @param ContentHeadingItem[] $items
|
|
* @return ContentHeadingItem[][]
|
|
*/
|
|
private static function groupSubscribableHeadings( array $items ): array {
|
|
$headings = [];
|
|
foreach ( $items as $item ) {
|
|
if ( $item->isSubscribable() ) {
|
|
$headings[ $item->getName() ][ $item->getId() ] = $item;
|
|
}
|
|
}
|
|
return $headings;
|
|
}
|
|
|
|
/**
|
|
* Compare two lists of thread items, return those in $new but not $old.
|
|
*
|
|
* @param ContentThreadItem[][] $old
|
|
* @param ContentThreadItem[][] $new
|
|
* @return iterable<ContentThreadItem>
|
|
*/
|
|
private static function findAddedItems( array $old, array $new ) {
|
|
foreach ( $new as $itemName => $nameNewItems ) {
|
|
// Usually, there will be 0 or 1 $nameNewItems, and 0 $nameOldItems,
|
|
// and $addedCount will be 0 or 1.
|
|
//
|
|
// But when multiple replies are added in one edit, or in multiple edits within the same
|
|
// minute, there may be more, and the complex logic below tries to make the best guess
|
|
// as to which items are actually new. See the 'multiple' and 'sametime' test cases.
|
|
//
|
|
$nameOldItems = $old[ $itemName ] ?? [];
|
|
$addedCount = count( $nameNewItems ) - count( $nameOldItems );
|
|
|
|
if ( $addedCount > 0 ) {
|
|
// For any name that occurs more times in new than old, report that many new items,
|
|
// preferring IDs that did not occur in old, then preferring items lower on the page.
|
|
foreach ( array_reverse( $nameNewItems ) as $itemId => $newItem ) {
|
|
if ( $addedCount > 0 && !isset( $nameOldItems[ $itemId ] ) ) {
|
|
yield $newItem;
|
|
$addedCount--;
|
|
}
|
|
}
|
|
foreach ( array_reverse( $nameNewItems ) as $itemId => $newItem ) {
|
|
if ( $addedCount > 0 ) {
|
|
yield $newItem;
|
|
$addedCount--;
|
|
}
|
|
}
|
|
Assert::postcondition( $addedCount === 0, 'Reported expected number of items' );
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper for generateEventsForRevision(), separated out for easier testing.
|
|
*
|
|
* @param array &$events
|
|
* @param ContentThreadItemSet $oldItemSet
|
|
* @param ContentThreadItemSet $newItemSet
|
|
* @param RevisionRecord $newRevRecord
|
|
* @param PageIdentity $title
|
|
* @param UserIdentity $user
|
|
*/
|
|
protected static function generateEventsFromItemSets(
|
|
array &$events,
|
|
ContentThreadItemSet $oldItemSet,
|
|
ContentThreadItemSet $newItemSet,
|
|
RevisionRecord $newRevRecord,
|
|
PageIdentity $title,
|
|
UserIdentity $user
|
|
): void {
|
|
$newComments = static::groupCommentsByThreadAndName( $newItemSet->getThreadItems() );
|
|
$oldComments = static::groupCommentsByThreadAndName( $oldItemSet->getThreadItems() );
|
|
$addedComments = [];
|
|
foreach ( $newComments as $threadName => $threadNewComments ) {
|
|
$threadOldComments = $oldComments[ $threadName ] ?? [];
|
|
foreach ( static::findAddedItems( $threadOldComments, $threadNewComments ) as $newComment ) {
|
|
Assert::precondition( $newComment instanceof ContentCommentItem, 'Must be ContentCommentItem' );
|
|
$addedComments[] = $newComment;
|
|
}
|
|
}
|
|
|
|
$newHeadings = static::groupSubscribableHeadings( $newItemSet->getThreads() );
|
|
$oldHeadings = static::groupSubscribableHeadings( $oldItemSet->getThreads() );
|
|
$removedHeadings = [];
|
|
// Pass swapped parameters to findAddedItems() to find *removed* items
|
|
foreach ( static::findAddedItems( $newHeadings, $oldHeadings ) as $oldHeading ) {
|
|
Assert::precondition( $oldHeading instanceof ContentHeadingItem, 'Must be ContentHeadingItem' );
|
|
$removedHeadings[] = $oldHeading;
|
|
}
|
|
|
|
$mentionedUsers = [];
|
|
foreach ( $events as &$event ) {
|
|
if ( $event['type'] === 'mention' || $event['type'] === 'mention-summary' ) {
|
|
// Save mentioned users in our events, so that we can exclude them from our notification,
|
|
// to avoid duplicate notifications for a single comment.
|
|
// Array is keyed by user id so we can do a simple array merge.
|
|
$mentionedUsers += $event['extra']['mentioned-users'];
|
|
}
|
|
|
|
if ( count( $addedComments ) === 1 ) {
|
|
// If this edit was a new user talk message according to Echo,
|
|
// and we also found exactly one new comment,
|
|
// add some extra information to the edit-user-talk event.
|
|
if ( $event['type'] === 'edit-user-talk' ) {
|
|
$event['extra'] += [
|
|
'comment-id' => $addedComments[0]->getId(),
|
|
'comment-name' => $addedComments[0]->getName(),
|
|
'content' => $addedComments[0]->getBodyText( true ),
|
|
];
|
|
}
|
|
|
|
// Similarly for mentions.
|
|
// We don't handle 'content' in this case, as Echo makes its own snippets.
|
|
if ( $event['type'] === 'mention' ) {
|
|
$event['extra'] += [
|
|
'comment-id' => $addedComments[0]->getId(),
|
|
'comment-name' => $addedComments[0]->getName(),
|
|
];
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( $addedComments ) {
|
|
// It's a bit weird to do this here, in the middle of the hook handler for Echo. However:
|
|
// * Echo calls this from a PageSaveComplete hook handler as a DeferredUpdate,
|
|
// which is exactly how we would do this otherwise
|
|
// * It allows us to reuse the generated comment trees without any annoying caching
|
|
static::addCommentChangeTag( $newRevRecord );
|
|
// For very similar reasons, we do logging here
|
|
static::logAddedComments( $addedComments, $newRevRecord, $title, $user );
|
|
}
|
|
|
|
foreach ( $addedComments as $newComment ) {
|
|
// Ignore comments by other users, e.g. in case of reverts or a discussion being moved.
|
|
// TODO: But what about someone signing another's comment?
|
|
if ( $newComment->getAuthor() !== $user->getName() ) {
|
|
continue;
|
|
}
|
|
// Ignore comments which are more than 10 minutes old, as this may be a user archiving
|
|
// their own comment. (T290803)
|
|
$revTimestamp = new DateTimeImmutable( $newRevRecord->getTimestamp() );
|
|
$threshold = $revTimestamp->sub( new DateInterval( 'PT10M' ) );
|
|
if ( $newComment->getTimestamp() <= $threshold ) {
|
|
continue;
|
|
}
|
|
$heading = $newComment->getSubscribableHeading();
|
|
if ( !$heading ) {
|
|
continue;
|
|
}
|
|
$events[] = [
|
|
'type' => 'dt-subscribed-new-comment',
|
|
'title' => $title,
|
|
'extra' => [
|
|
'subscribed-comment-name' => $heading->getName(),
|
|
'comment-id' => $newComment->getId(),
|
|
'comment-name' => $newComment->getName(),
|
|
'content' => $newComment->getBodyText( true ),
|
|
'section-title' => $heading->getLinkableTitle(),
|
|
'revid' => $newRevRecord->getId(),
|
|
'mentioned-users' => $mentionedUsers,
|
|
],
|
|
'agent' => $user,
|
|
];
|
|
|
|
$titleForSubscriptions = Title::castFromPageIdentity( $title )->createFragmentTarget( $heading->getText() );
|
|
static::addAutoSubscription( $user, $titleForSubscriptions, $heading->getName() );
|
|
}
|
|
|
|
foreach ( $removedHeadings as $oldHeading ) {
|
|
$events[] = [
|
|
'type' => 'dt-removed-topic',
|
|
'title' => $title,
|
|
'extra' => [
|
|
'subscribed-comment-name' => $oldHeading->getName(),
|
|
'heading-id' => $oldHeading->getId(),
|
|
'heading-name' => $oldHeading->getName(),
|
|
'section-title' => $oldHeading->getLinkableTitle(),
|
|
'revid' => $newRevRecord->getId(),
|
|
],
|
|
'agent' => $user,
|
|
];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add our change tag for a revision that adds new comments.
|
|
*
|
|
* @param RevisionRecord $newRevRecord
|
|
*/
|
|
protected static function addCommentChangeTag( RevisionRecord $newRevRecord ): void {
|
|
// Unclear if DeferredUpdates::addCallableUpdate() is needed,
|
|
// but every extension does it that way.
|
|
DeferredUpdates::addCallableUpdate( static function () use ( $newRevRecord ) {
|
|
ChangeTags::addTags( [ 'discussiontools-added-comment' ], null, $newRevRecord->getId() );
|
|
} );
|
|
}
|
|
|
|
/**
|
|
* Add an automatic subscription to the given item, assuming the user has automatic subscriptions
|
|
* enabled.
|
|
*
|
|
* @param UserIdentity $user
|
|
* @param Title $title
|
|
* @param string $itemName
|
|
*/
|
|
protected static function addAutoSubscription( UserIdentity $user, Title $title, string $itemName ): void {
|
|
$dtConfig = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'discussiontools' );
|
|
|
|
if (
|
|
$dtConfig->get( 'DiscussionToolsAutoTopicSubEditor' ) === 'any' &&
|
|
HookUtils::shouldAddAutoSubscription( $user, $title )
|
|
) {
|
|
$subscriptionStore = MediaWikiServices::getInstance()->getService( 'DiscussionTools.SubscriptionStore' );
|
|
$subscriptionStore->addAutoSubscriptionForUser( $user, $title, $itemName );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Return all users subscribed to a comment
|
|
*
|
|
* @param EchoEvent $event
|
|
* @param int $batchSize
|
|
* @return UserIdentity[]|Iterator<UserIdentity>
|
|
*/
|
|
public static function locateSubscribedUsers( EchoEvent $event, $batchSize = 500 ) {
|
|
$commentName = $event->getExtraParam( 'subscribed-comment-name' );
|
|
|
|
$subscriptionStore = MediaWikiServices::getInstance()->getService( 'DiscussionTools.SubscriptionStore' );
|
|
$subscriptionItems = $subscriptionStore->getSubscriptionItemsForTopic(
|
|
$commentName,
|
|
[ SubscriptionStore::STATE_SUBSCRIBED, SubscriptionStore::STATE_AUTOSUBSCRIBED ]
|
|
);
|
|
|
|
// Update notified timestamps
|
|
$subscriptionStore->updateSubscriptionNotifiedTimestamp(
|
|
null,
|
|
$commentName
|
|
);
|
|
|
|
// TODD: Have this return an Iterator instead?
|
|
$users = array_map( static function ( SubscriptionItem $item ) {
|
|
return $item->getUserIdentity();
|
|
}, $subscriptionItems );
|
|
|
|
return $users;
|
|
}
|
|
|
|
/**
|
|
* Log stuff to EventLogging's Schema:TalkPageEvent
|
|
* If you don't have EventLogging installed, does nothing.
|
|
*
|
|
* @param array $addedComments
|
|
* @param RevisionRecord $newRevRecord
|
|
* @param PageIdentity $title
|
|
* @param UserIdentity $identity
|
|
* @return bool Whether events were logged
|
|
*/
|
|
protected static function logAddedComments(
|
|
array $addedComments,
|
|
RevisionRecord $newRevRecord,
|
|
PageIdentity $title,
|
|
UserIdentity $identity
|
|
): bool {
|
|
global $wgDTSchemaEditAttemptStepOversample, $wgDBname;
|
|
$context = RequestContext::getMain();
|
|
$request = $context->getRequest();
|
|
// We've reached here through Echo's post-save deferredupdate, which
|
|
// might either be after an API request from DiscussionTools or a
|
|
// regular POST from WikiEditor. Both should have this value snuck
|
|
// into their request if their session is being logged.
|
|
if ( !$request->getCheck( 'editingStatsId' ) ) {
|
|
return false;
|
|
}
|
|
$editingStatsId = $request->getVal( 'editingStatsId' );
|
|
$isDiscussionTools = $request->getCheck( 'dttags' );
|
|
|
|
$extensionRegistry = ExtensionRegistry::getInstance();
|
|
if ( !$extensionRegistry->isLoaded( 'EventLogging' ) ) {
|
|
return false;
|
|
}
|
|
$inSample = static::inEventSample( $editingStatsId );
|
|
$shouldOversample = ( $isDiscussionTools && $wgDTSchemaEditAttemptStepOversample ) || (
|
|
$extensionRegistry->isLoaded( 'WikimediaEvents' ) &&
|
|
// @phan-suppress-next-line PhanUndeclaredClassMethod
|
|
\WikimediaEvents\WikimediaEventsHooks::shouldSchemaEditAttemptStepOversample( $context )
|
|
);
|
|
if ( !$inSample && !$shouldOversample ) {
|
|
return false;
|
|
}
|
|
|
|
$editTracker = MediaWikiServices::getInstance()
|
|
->getUserEditTracker();
|
|
|
|
$commonData = [
|
|
'$schema' => '/analytics/mediawiki/talk_page_edit/1.1.0',
|
|
'action' => 'publish',
|
|
'session_id' => $editingStatsId,
|
|
'page_id' => $newRevRecord->getPageId(),
|
|
'page_namespace' => $title->getNamespace(),
|
|
'revision_id' => $newRevRecord->getId() ?: 0,
|
|
'performer' => [
|
|
// Note: we're logging the user who made the edit, not the user who's signed on the comment
|
|
'user_id' => $identity->getId(),
|
|
'user_edit_count' => $editTracker->getUserEditCount( $identity ) ?: 0,
|
|
// Retention-safe values:
|
|
'user_is_anonymous' => !$identity->isRegistered(),
|
|
'user_edit_count_bucket' => \UserBucketProvider::getUserEditCountBucket( $identity ) ?: 'N/A',
|
|
],
|
|
'database' => $wgDBname,
|
|
// This is unreliable, but sufficient for our purposes; we
|
|
// mostly just want to see the difference between DT and
|
|
// everything-else:
|
|
'integration' => $isDiscussionTools ? 'discussiontools' : 'page',
|
|
];
|
|
|
|
foreach ( $addedComments as $comment ) {
|
|
$heading = $comment->getSubscribableHeading();
|
|
$parent = $comment->getParent();
|
|
if ( !$heading || !$parent ) {
|
|
continue;
|
|
}
|
|
if ( $parent->getType() === 'heading' ) {
|
|
if ( count( $heading->getReplies() ) === 1 ) {
|
|
// A new heading was added when this comment was created
|
|
$component_type = 'topic';
|
|
} else {
|
|
$component_type = 'comment';
|
|
}
|
|
} else {
|
|
$component_type = 'response';
|
|
}
|
|
EventLogging::submit( 'mediawiki.talk_page_edit', array_merge( $commonData, [
|
|
'component_type' => $component_type,
|
|
'topic_id' => $heading->getId(),
|
|
'comment_id' => $comment->getId(),
|
|
'comment_parent_id' => $parent->getId(),
|
|
] ) );
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Should the current session be sampled for EventLogging?
|
|
*
|
|
* @param string $sessionId
|
|
* @return bool Whether to sample the session
|
|
*/
|
|
protected static function inEventSample( string $sessionId ): bool {
|
|
global $wgDTSchemaEditAttemptStepSamplingRate, $wgWMESchemaEditAttemptStepSamplingRate;
|
|
// Sample 6.25%
|
|
$samplingRate = 0.0625;
|
|
if ( isset( $wgDTSchemaEditAttemptStepSamplingRate ) ) {
|
|
$samplingRate = $wgDTSchemaEditAttemptStepSamplingRate;
|
|
}
|
|
if ( isset( $wgWMESchemaEditAttemptStepSamplingRate ) ) {
|
|
$samplingRate = $wgWMESchemaEditAttemptStepSamplingRate;
|
|
}
|
|
if ( $samplingRate === 0 ) {
|
|
return false;
|
|
}
|
|
$inSample = EventLogging::sessionInSample(
|
|
(int)( 1 / $samplingRate ), $sessionId
|
|
);
|
|
return $inSample;
|
|
}
|
|
|
|
}
|