diff --git a/includes/DiscussionToolsCommentParser.php b/includes/DiscussionToolsCommentParser.php
index 7876267df..8ebd2fe9f 100644
--- a/includes/DiscussionToolsCommentParser.php
+++ b/includes/DiscussionToolsCommentParser.php
@@ -843,13 +843,13 @@ class DiscussionToolsCommentParser {
 					'startContainer' => $startNode->parentNode,
 					'startOffset' => self::childIndexOf( $startNode ),
 					'endContainer' => $node,
-					'endOffset' => $match[0][1] + mb_strlen( $match[0][0] )
+					'endOffset' => $match[0][1] + strlen( $match[0][0] )
 				];
 				$sigRange = (object)[
 					'startContainer' => $firstSigNode->parentNode,
 					'startOffset' => self::childIndexOf( $firstSigNode ),
 					'endContainer' => $node,
-					'endOffset' => $match[0][1] + mb_strlen( $match[0][0] )
+					'endOffset' => $match[0][1] + strlen( $match[0][0] )
 				];
 
 				$startLevel = $this->getIndentLevel( $startNode, $rootNode ) + 1;
diff --git a/tests/phpunit/DiscussionToolsCommentParserTest.php b/tests/phpunit/DiscussionToolsCommentParserTest.php
index 4f8be01f4..60b4fb9c3 100644
--- a/tests/phpunit/DiscussionToolsCommentParserTest.php
+++ b/tests/phpunit/DiscussionToolsCommentParserTest.php
@@ -7,7 +7,55 @@ use Wikimedia\TestingAccessWrapper;
  */
 class DiscussionToolsCommentParserTest extends DiscussionToolsTestCase {
 
-	private static function getOffsetPath( $ancestor, $node, $nodeOffset ) {
+	/**
+	 * Convert UTF-8 byte offsets to UTF-16 code unit offsets.
+	 *
+	 * @param DOMElement $ancestor
+	 * @param DOMNode $node
+	 * @param int $nodeOffset
+	 * @return int
+	 */
+	private static function getOffsetPath( DOMElement $ancestor, DOMNode $node, $nodeOffset ) {
+		if ( $node->nodeType === XML_TEXT_NODE ) {
+			$startNode = $node;
+			$nodeText = '';
+
+			while ( $node ) {
+				$nodeText .= $node->nodeValue;
+
+				// In Parsoid HTML, entities are represented as a 'mw:Entity' node, rather than normal HTML
+				// entities. On Arabic Wikipedia, the "UTC" timezone name contains some non-breaking spaces,
+				// which apparently are often turned into &nbsp; entities by buggy editing tools. To handle
+				// this, we must piece together the text, so that our regexp can match those timestamps.
+				if (
+					$node->nextSibling &&
+					$node->nextSibling->nodeType === XML_ELEMENT_NODE &&
+					$node->nextSibling->getAttribute( 'typeof' ) === 'mw:Entity'
+				) {
+					$nodeText .= $node->nextSibling->firstChild->nodeValue;
+
+					// If the entity is followed by more text, do this again
+					if (
+						$node->nextSibling->nextSibling &&
+						$node->nextSibling->nextSibling->nodeType === XML_TEXT_NODE
+					) {
+						$node = $node->nextSibling->nextSibling;
+					} else {
+						$node = null;
+					}
+				} else {
+					$node = null;
+				}
+			}
+
+			$str = substr( $nodeText, 0, $nodeOffset );
+			// Count characters that require two code units to encode in UTF-16
+			$count = preg_match_all( '/[\x{010000}-\x{10FFFF}]/u', $str );
+			$nodeOffset = mb_strlen( $str ) + $count;
+
+			$node = $startNode;
+		}
+
 		$path = [ $nodeOffset ];
 		while ( $node !== $ancestor ) {
 			if ( !$node->parentNode ) {
@@ -181,18 +229,6 @@ class DiscussionToolsCommentParserTest extends DiscussionToolsTestCase {
 	}
 
 	public function provideComments() {
-		return [
-			self::getJson( './cases/comments.json' )[0],
-			self::getJson( './cases/comments.json' )[1],
-			// self::getJson( './cases/comments.json' )[2],
-			// self::getJson( './cases/comments.json' )[3],
-			self::getJson( './cases/comments.json' )[4],
-			self::getJson( './cases/comments.json' )[5],
-			self::getJson( './cases/comments.json' )[6],
-			self::getJson( './cases/comments.json' )[7],
-			self::getJson( './cases/comments.json' )[8],
-			self::getJson( './cases/comments.json' )[9],
-			self::getJson( './cases/comments.json' )[10]
-		];
+		return self::getJson( './cases/comments.json' );
 	}
 }