Use instanceof for checking for text/element nodes in PHP

It is friendlier for static analysis tools like Phan, which can't
infer anything from the `->nodeType === …` checks, and we were already
using it in most places.

Fix newly revealed Phan failures (and one unneeded suppression).

Change-Id: Id789f05e16a210f7ba22ca7514587c392fac0741
This commit is contained in:
Bartosz Dziewoński 2022-02-21 19:42:36 +01:00
parent 1f7ff387a7
commit 063174e71c
4 changed files with 12 additions and 11 deletions

View file

@ -57,11 +57,11 @@ class CommentModifier {
// visible on the page. But if we insert an inline element (the reply link) after it, it becomes
// meaningful and gets rendered, which results in additional spacing before some reply links.
// Split the text node, so that we can insert the link before the trailing whitespace.
if ( $target->nodeType === XML_TEXT_NODE ) {
preg_match( '/\s*$/', $target->nodeValue, $matches, PREG_OFFSET_CAPTURE );
if ( $target instanceof Text ) {
preg_match( '/\s*$/', $target->nodeValue ?? '', $matches, PREG_OFFSET_CAPTURE );
$byteOffset = $matches[0][1];
$charOffset = mb_strlen(
substr( $target->nodeValue, 0, $byteOffset )
substr( $target->nodeValue ?? '', 0, $byteOffset )
);
$target->splitText( $charOffset );
}
@ -290,7 +290,7 @@ class CommentModifier {
private static function allOfType( array $nodes, string $type ): bool {
$hasElements = false;
foreach ( $nodes as $node ) {
if ( $node->nodeType === XML_ELEMENT_NODE ) {
if ( $node instanceof Element ) {
if ( strtolower( $node->nodeName ) !== strtolower( $type ) ) {
return false;
}
@ -371,7 +371,7 @@ class CommentModifier {
}
while ( $list->firstChild ) {
if ( $list->firstChild->nodeType === XML_ELEMENT_NODE ) {
if ( $list->firstChild instanceof Element ) {
// Move <dd> contents to <p>
$p = $doc->createElement( 'p' );
while ( $list->firstChild->firstChild ) {

View file

@ -849,8 +849,8 @@ class CommentParser {
}
);
$length = ( $endNode->nodeType === XML_TEXT_NODE ) ?
mb_strlen( rtrim( $endNode->nodeValue, "\t\n\f\r " ) ) :
$length = ( $endNode instanceof Text ) ?
mb_strlen( rtrim( $endNode->nodeValue ?? '', "\t\n\f\r " ) ) :
// PHP bug: childNodes can be null for comment nodes
// (it should always be a NodeList, even if the node can't have children)
( $endNode->childNodes ? $endNode->childNodes->length : 0 );

View file

@ -9,6 +9,7 @@ use Wikimedia\Assert\Assert;
use Wikimedia\Parsoid\DOM\Comment;
use Wikimedia\Parsoid\DOM\Element;
use Wikimedia\Parsoid\DOM\Node;
use Wikimedia\Parsoid\DOM\Text;
use Wikimedia\Parsoid\Utils\DOMCompat;
class CommentUtils {
@ -163,7 +164,7 @@ class CommentUtils {
*/
public static function isCommentContent( Node $node ) {
return (
$node->nodeType === XML_TEXT_NODE &&
$node instanceof Text &&
self::htmlTrim( $node->nodeValue ?? '' ) !== ''
) ||
(
@ -211,10 +212,9 @@ class CommentUtils {
public static function closestElement( Node $node, array $tagNames ): ?Element {
do {
if (
$node->nodeType === XML_ELEMENT_NODE &&
$node instanceof Element &&
in_array( strtolower( $node->nodeName ), $tagNames )
) {
// @phan-suppress-next-line PhanTypeMismatchReturn
return $node;
}
$node = $node->parentNode;

View file

@ -14,6 +14,7 @@ use stdClass;
use Title;
use Wikimedia\Parsoid\DOM\Element;
use Wikimedia\Parsoid\DOM\Node;
use Wikimedia\Parsoid\DOM\Text;
use Wikimedia\Parsoid\Utils\DOMCompat;
use Wikimedia\TestingAccessWrapper;
@ -37,7 +38,7 @@ class CommentParserTest extends IntegrationTestCase {
private static function getOffsetPath(
Element $ancestor, Node $node, int $nodeOffset
): string {
if ( $node->nodeType === XML_TEXT_NODE ) {
if ( $node instanceof Text ) {
$str = mb_substr( $node->nodeValue, 0, $nodeOffset );
// Count characters that require two code units to encode in UTF-16
$count = preg_match_all( '/[\x{010000}-\x{10FFFF}]/u', $str );