Match handling of "signature scan limit" between JS and PHP

PHP was counting UTF-8 bytes, JS was counting UTF-16 bytes.
Both should have been counting codepoints (although it doesn't
really matter as long as they both count the same things).

I noticed the issue after adding some tests using the Cyrillic
script, when one case had different results in PHP and JS:
Id25b537fecd789640c209ff7f30e777455a3aece.

Change-Id: Ic31240678f71ba48e6ec202126bf490cea12bb66
This commit is contained in:
Bartosz Dziewoński 2020-09-08 02:23:53 +02:00
parent 2ce305fcd0
commit 14fb013515
3 changed files with 4 additions and 2 deletions

View file

@ -68,6 +68,7 @@
"mediawiki.Uri",
"mediawiki.util",
"mediawiki.storage",
"mediawiki.String",
"ext.visualEditor.core.utils.parsing"
],
"messages": [

View file

@ -511,7 +511,7 @@ class CommentParser {
( $node = $node->previousSibling ) && $length < self::SIGNATURE_SCAN_LIMIT && $node !== $until
) {
$sigNodes[] = $node;
$length += $node->textContent ? strlen( $node->textContent ) : 0;
$length += $node->textContent ? mb_strlen( $node->textContent ) : 0;
if ( !( $node instanceof DOMElement ) ) {
continue;
}

View file

@ -7,6 +7,7 @@
var
utils = require( './utils.js' ),
codePointLength = require( 'mediawiki.String' ).codePointLength,
CommentItem = require( './CommentItem.js' ),
HeadingItem = require( './HeadingItem.js' ),
// Hooks::getLocalData()
@ -530,7 +531,7 @@ Parser.prototype.findSignature = function ( timestampNode, until ) {
while ( ( node = node.previousSibling ) && length < data.signatureScanLimit && node !== until ) {
sigNodes.push( node );
length += ( node.textContent || '' ).length;
length += node.textContent ? codePointLength( node.textContent ) : 0;
if ( node.nodeType !== Node.ELEMENT_NODE ) {
continue;
}