Match handling of "signature scan limit" between JS and PHP

PHP was counting UTF-8 bytes, JS was counting UTF-16 bytes. Both should have been counting codepoints (although it doesn't really matter as long as they both count the same things). I noticed the issue after adding some tests using the Cyrillic script, when one case had different results in PHP and JS: Id25b537fecd789640c209ff7f30e777455a3aece. Change-Id: Ic31240678f71ba48e6ec202126bf490cea12bb66
2024-11-24 00:13:36 +00:00 · 2020-09-08 02:23:53 +02:00 · 2020-09-08 02:23:53 +02:00 · 14fb013515
parent 2ce305fcd0
commit 14fb013515
3 changed files with 4 additions and 2 deletions
--- a/extension.json
+++ b/extension.json
@ -68,6 +68,7 @@
 				"mediawiki.Uri",
 				"mediawiki.util",
 				"mediawiki.storage",
+				"mediawiki.String",
 				"ext.visualEditor.core.utils.parsing"
 			],
 			"messages": [
--- a/includes/CommentParser.php
+++ b/includes/CommentParser.php
@ -511,7 +511,7 @@ class CommentParser {
 			( $node = $node->previousSibling ) && $length < self::SIGNATURE_SCAN_LIMIT && $node !== $until
 		) {
 			$sigNodes[] = $node;
-			$length += $node->textContent ? strlen( $node->textContent ) : 0;
+			$length += $node->textContent ? mb_strlen( $node->textContent ) : 0;
 			if ( !( $node instanceof DOMElement ) ) {
 				continue;
 			}
--- a/modules/Parser.js
+++ b/modules/Parser.js
@ -7,6 +7,7 @@

 var
 	utils = require( './utils.js' ),
+	codePointLength = require( 'mediawiki.String' ).codePointLength,
 	CommentItem = require( './CommentItem.js' ),
 	HeadingItem = require( './HeadingItem.js' ),
 	// Hooks::getLocalData()
@ -530,7 +531,7 @@ Parser.prototype.findSignature = function ( timestampNode, until ) {

 	while ( ( node = node.previousSibling ) && length < data.signatureScanLimit && node !== until ) {
 		sigNodes.push( node );
-		length += ( node.textContent || '' ).length;
+		length += node.textContent ? codePointLength( node.textContent ) : 0;
 		if ( node.nodeType !== Node.ELEMENT_NODE ) {
 			continue;
 		}