Parser tweaks

Follow-up to Ic1438d516e223db462cb227f6668e856672f538c.
Minor corrections and comment improvements in PHP parser,
and "backporting" some changes to JS parser that I like.

Change-Id: I5e54121914ec6b323e556dd133bcb71b3aefbb61
This commit is contained in:
Bartosz Dziewoński 2020-05-13 01:21:02 +02:00
parent b1427163af
commit c848d8a90e
2 changed files with 17 additions and 20 deletions

View file

@ -296,7 +296,7 @@ class CommentParser {
$p = $endQuote;
}
} else {
# Quote at end of string, assume literal "
// Quote at end of string, assume literal "
$s .= '"';
}
break;
@ -733,8 +733,13 @@ class CommentParser {
* - 'range' (array): The extent of the comment, including the signature and timestamp.
* Comments can start or end in the middle of a DOM node.
* Keys: 'startContainer', 'startOffset', 'endContainer' and 'endOffset'
* - 'signatureRanges' (array): The extents of the comment's signatures (plus timestamps).
* There is always at least one signature, but there may be multiple.
* The author and timestamp of the comment is determined from the
* first signature. The last node in every signature range is the
* text node containing the timestamp.
* - 'level' (int): Indentation level of the comment. Headings are 0, comments start at 1.
* - 'timestamp' (string): Timestamp (TODO in what format?). Not set for headings.
* - 'timestamp' (string): ISO 8601 timestamp in UTC (ending in 'Z'). Not set for headings.
* - 'author' (string|null): Comment author's username, null for unsigned comments.
* Not set for headings.
*
@ -851,7 +856,8 @@ class CommentParser {
$curComment = (object)[
'type' => 'comment',
// Almost DateTimeInterface::RFC3339_EXTENDED
// ISO 8601 date. Almost DateTimeInterface::RFC3339_EXTENDED, but ending with 'Z' instead
// of '+00:00', like Date#toISOString in JavaScript.
'timestamp' => $dfParser( $match )->format( 'Y-m-d\TH:i:s.v\Z' ),
'author' => $author,
'range' => $range,
@ -943,6 +949,7 @@ class CommentParser {
}
$id = "$id|$number";
}
if ( $id !== null ) {
$commentsById[$id] = $comment;
}
@ -975,8 +982,7 @@ class CommentParser {
$replies[ $comment->level ] = $comment;
// Cut off more deeply nested replies
// TODO look up if there's a more convenient function to truncate arrays
array_splice( $replies, $comment->level + 1, count( $replies ) - $comment->level - 1 );
array_splice( $replies, $comment->level + 1 );
}
return $threads;

View file

@ -545,7 +545,7 @@ function findSignature( timestampNode, until ) {
title.getMainText().split( '/' )[ 0 ] === data.specialContributionsName
) {
username = title.getMainText().split( '/' )[ 1 ];
// Users may link to their contributions with non-standard name
// Normalize the username: users may link to their contributions with an unnormalized name
username = mw.Title.makeTitle( mw.config.get( 'wgNamespaceIds' ).user, username ).getMainText();
}
if ( !username ) {
@ -556,7 +556,7 @@ function findSignature( timestampNode, until ) {
username = username.toUpperCase();
}
// Check that every link points to the same user
// Accept the first link to the user namespace, then only accept links to that user
if ( !sigUsername ) {
sigUsername = username;
}
@ -912,33 +912,24 @@ function groupThreads( comments ) {
}
if ( comment.level === 0 ) {
// new root (thread)
// New root (thread)
threads.push( comment );
} else if ( replies[ comment.level - 1 ] ) {
// add as a reply to closest less nested comment
replies[ comment.level - 1 ].replies.push( comment );
// Add as a reply to the closest less-nested comment
comment.parent = replies[ comment.level - 1 ];
comment.parent.replies.push( comment );
} else {
console.log( 'Comment could not be connected to a thread', comment.range );
}
replies[ comment.level ] = comment;
// cut off more deeply nested replies
// Cut off more deeply nested replies
replies.length = comment.level + 1;
}
return threads;
}
/**
* Get the list of authors involved in a comment and its replies.
*
* You probably want to pass a thread root here (a heading).
*
* @param {Object} comment Comment object, as returned by #groupThreads
* @return {Object} Object with comment author usernames as keys
*/
/**
* Get the list of authors involved in a comment and its replies.
*