mediawiki-extensions-Discus.../includes/ThreadItemSet.php
Bartosz Dziewoński 0ecc8a4c05 Improve detecting already signed comments
Previously, we required a signature at the end of the comment.
This was a pretty rough heuristic that did not correctly handle
many comments that we would consider entirely properly signed
in CommentParser (e.g. comments wrapped in formatting like
<small>…</small>, comments with a post-scriptum or in parentheses,
or comments generated by various templates).

Now we process the user input using the same code that adds reply
links, and only add a signature when we detect that there really
isn't a signature (including template-generated), or if the signature
is in the wrong place and would result in the reply link showing up
in the wrong place as well (not at the end of the comment).

Bug: T278442
Bug: T268558
Bug: T278355
Bug: T291421
Bug: T282983
Change-Id: I46b6110af328ebdf93b7dfc2bd941e04391a1599
2022-02-21 21:21:26 +00:00

176 lines
5 KiB
PHP

<?php
namespace MediaWiki\Extension\DiscussionTools;
/**
* Groups thread items (headings and comments) generated by parsing a discussion page.
*/
class ThreadItemSet {
/** @var ThreadItem[] */
private $threadItems = [];
/** @var CommentItem[] */
private $commentItems = [];
/** @var ThreadItem[][] */
private $threadItemsByName = [];
/** @var ThreadItem[] */
private $threadItemsById = [];
/** @var HeadingItem[] */
private $threads = [];
/**
* @internal Only used by CommentParser
* @param ThreadItem $item
*/
public function addThreadItem( ThreadItem $item ) {
$this->threadItems[] = $item;
if ( $item instanceof CommentItem ) {
$this->commentItems[] = $item;
}
if ( $item instanceof HeadingItem ) {
$this->threads[] = $item;
}
}
/**
* @internal Only used by CommentParser
* @return bool
*/
public function isEmpty(): bool {
return !$this->threadItems;
}
/**
* @internal Only used by CommentParser
* @param ThreadItem $item
*/
public function updateIdAndNameMaps( ThreadItem $item ) {
$this->threadItemsByName[ $item->getName() ][] = $item;
$this->threadItemsById[ $item->getId() ] = $item;
$legacyId = $item->getLegacyId();
if ( $legacyId ) {
$this->threadItemsById[ $legacyId ] = $item;
}
}
/**
* Get all discussion comments (and headings) within a DOM subtree.
*
* This returns a flat list, use getThreads() to get a tree structure starting at section headings.
*
* For example, for a MediaWiki discussion like this (we're dealing with HTML DOM here,
* the wikitext syntax is just for illustration):
*
* == A ==
* B. ~~~~
* : C.
* : C. ~~~~
* :: D. ~~~~
* ::: E. ~~~~
* ::: F. ~~~~
* : G. ~~~~
* H. ~~~~
* : I. ~~~~
*
* This function would return a structure like:
*
* [
* HeadingItem( { level: 0, range: (h2: A) } ),
* CommentItem( { level: 1, range: (p: B) } ),
* CommentItem( { level: 2, range: (li: C, li: C) } ),
* CommentItem( { level: 3, range: (li: D) } ),
* CommentItem( { level: 4, range: (li: E) } ),
* CommentItem( { level: 4, range: (li: F) } ),
* CommentItem( { level: 2, range: (li: G) } ),
* CommentItem( { level: 1, range: (p: H) } ),
* CommentItem( { level: 2, range: (li: I) } )
* ]
*
* @return ThreadItem[] Thread items
*/
public function getThreadItems(): array {
return $this->threadItems;
}
/**
* Same as getFlatThreadItems, but only returns the CommentItems
*
* @return CommentItem[] Comment items
*/
public function getCommentItems(): array {
return $this->commentItems;
}
/**
* Find ThreadItems by their name
*
* This will usually return a single-element array, but it may return multiple comments if they're
* indistinguishable by name. In that case, use their IDs to disambiguate.
*
* @param string $name Name
* @return ThreadItem[] Thread items, empty array if not found
*/
public function findCommentsByName( string $name ): array {
return $this->threadItemsByName[$name] ?? [];
}
/**
* Find a ThreadItem by its ID
*
* @param string $id ID
* @return ThreadItem|null Thread item, null if not found
*/
public function findCommentById( string $id ): ?ThreadItem {
return $this->threadItemsById[$id] ?? null;
}
/**
* Group discussion comments into threads and associate replies to original messages.
*
* Each thread must begin with a heading. Original messages in the thread are treated as replies to
* its heading. Other replies are associated based on the order and indentation level.
*
* Note that the objects in `comments` are extended in-place with the additional data.
*
* For example, for a MediaWiki discussion like this (we're dealing with HTML DOM here,
* the wikitext syntax is just for illustration):
*
* == A ==
* B. ~~~~
* : C.
* : C. ~~~~
* :: D. ~~~~
* ::: E. ~~~~
* ::: F. ~~~~
* : G. ~~~~
* H. ~~~~
* : I. ~~~~
*
* This function would return a structure like:
*
* [
* HeadingItem( { level: 0, range: (h2: A), replies: [
* CommentItem( { level: 1, range: (p: B), replies: [
* CommentItem( { level: 2, range: (li: C, li: C), replies: [
* CommentItem( { level: 3, range: (li: D), replies: [
* CommentItem( { level: 4, range: (li: E), replies: [] } ),
* CommentItem( { level: 4, range: (li: F), replies: [] } ),
* ] } ),
* ] } ),
* CommentItem( { level: 2, range: (li: G), replies: [] } ),
* ] } ),
* CommentItem( { level: 1, range: (p: H), replies: [
* CommentItem( { level: 2, range: (li: I), replies: [] } ),
* ] } ),
* ] } )
* ]
*
* @return HeadingItem[] Tree structure of comments, top-level items are the headings.
*/
public function getThreads(): array {
return $this->threads;
}
}