From 432a95943694e7dc2c2449308333d223617a7367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Dziewo=C5=84ski?= Date: Tue, 29 Sep 2020 22:05:58 +0200 Subject: [PATCH] Ignore empty paragraphs at the beginning of comments The wikitext parser outputs `


` for empty paragraphs, so we need to ignore `
` tags when searching for an "interesting" node that marks the beginning of a comment. Otherwise the empty paragraphs mess up the detection of indentation levels. Bug: T264116 Change-Id: I84a97ab577baa7336b78935ccdc48041ecfc231a --- includes/CommentParser.php | 8 +- modules/Parser.js | 5 +- .../ar-nbsp-timezone-parsoid.json | 2 +- .../ckb-big-oldparser/ckb-big-oldparser.json | 167 +++++++++--------- .../ckb-big-parsoid/ckb-big-parsoid.json | 167 +++++++++--------- .../cases/en-big-parsoid/en-big-parsoid.json | 2 +- 6 files changed, 169 insertions(+), 182 deletions(-) diff --git a/includes/CommentParser.php b/includes/CommentParser.php index a618ddd82..20caecdc9 100644 --- a/includes/CommentParser.php +++ b/includes/CommentParser.php @@ -86,7 +86,8 @@ class CommentParser { * Return the next leaf node in the tree order that is not an empty or whitespace-only text node. * * In other words, this returns a text node with content other than whitespace, or an element node - * with no children, that follows the given node. + * with no children, such as `` or `
` (with the exception of `
`), that follows the + * given node in the HTML source. * * @param DOMNode $node Node to start searching at. This node's children are ignored. * @return DOMNode @@ -111,7 +112,10 @@ class CommentParser { $n->nodeType === XML_CDATA_SECTION_NODE && CommentUtils::htmlTrim( $n->nodeValue ) !== '' ) || - ( $n->nodeType === XML_ELEMENT_NODE && !$n->firstChild ) + ( + $n->nodeType === XML_ELEMENT_NODE && + !$n->firstChild && strtolower( $n->nodeName ) !== 'br' + ) ) { return NodeFilter::FILTER_ACCEPT; } diff --git a/modules/Parser.js b/modules/Parser.js index 45665925e..eb8aadec5 100644 --- a/modules/Parser.js +++ b/modules/Parser.js @@ -625,7 +625,8 @@ Parser.prototype.findSignature = function ( timestampNode, until ) { * Return the next leaf node in the tree order that is not an empty or whitespace-only text node. * * In other words, this returns a Text node with content other than whitespace, or an Element node - * with no children, that follows the given node in the HTML source. + * with no children, such as `` or `
` (with the exception of `
`), that follows the + * given node in the HTML source. * * @private * @param {Node} node Node to start searching at. If it isn't a leaf node, its children are ignored. @@ -648,7 +649,7 @@ Parser.prototype.nextInterestingLeafNode = function ( node ) { if ( ( n.nodeType === Node.TEXT_NODE && utils.htmlTrim( n.textContent ) !== '' ) || ( n.nodeType === Node.CDATA_SECTION_NODE && utils.htmlTrim( n.textContent ) !== '' ) || - ( n.nodeType === Node.ELEMENT_NODE && !n.firstChild ) + ( n.nodeType === Node.ELEMENT_NODE && !n.firstChild && n.nodeName.toLowerCase() !== 'br' ) ) { return NodeFilter.FILTER_ACCEPT; } diff --git a/tests/cases/ar-nbsp-timezone-parsoid/ar-nbsp-timezone-parsoid.json b/tests/cases/ar-nbsp-timezone-parsoid/ar-nbsp-timezone-parsoid.json index 4d5a45c42..61e98ff70 100644 --- a/tests/cases/ar-nbsp-timezone-parsoid/ar-nbsp-timezone-parsoid.json +++ b/tests/cases/ar-nbsp-timezone-parsoid/ar-nbsp-timezone-parsoid.json @@ -171,7 +171,7 @@ "timestamp": "2020-04-30T11:58:00.000Z", "author": "Aws Al-mimari", "range": [ - "0/1/8/2/1", + "0/1/8/2/2", "0/1/8/2/10/2" ], "signatureRanges": [ diff --git a/tests/cases/ckb-big-oldparser/ckb-big-oldparser.json b/tests/cases/ckb-big-oldparser/ckb-big-oldparser.json index 11547c0f7..16fe9f20d 100644 --- a/tests/cases/ckb-big-oldparser/ckb-big-oldparser.json +++ b/tests/cases/ckb-big-oldparser/ckb-big-oldparser.json @@ -503,28 +503,25 @@ ], "level": 1, "id": "Sarchia Banokay|2015-07-16T14:01:00.000Z|0", - "replies": [] - }, - { - "type": "comment", - "timestamp": "2015-07-16T15:11:00.000Z", - "author": "Diyako kazm", - "range": [ - "0/45/0", - "0/47/0/5/33" - ], - "signatureRanges": [ - [ - "0/47/0/4", - "0/47/0/5/33" - ] - ], - "level": 1, - "id": "Diyako kazm|2015-07-16T15:11:00.000Z|0", - "warnings": [ - "Comment starts and ends with different indentation" - ], "replies": [ + { + "type": "comment", + "timestamp": "2015-07-16T15:11:00.000Z", + "author": "Diyako kazm", + "range": [ + "0/47/0/0", + "0/47/0/5/33" + ], + "signatureRanges": [ + [ + "0/47/0/4", + "0/47/0/5/33" + ] + ], + "level": 2, + "id": "Diyako kazm|2015-07-16T15:11:00.000Z|0", + "replies": [] + }, { "type": "comment", "timestamp": "2015-07-16T21:31:00.000Z", @@ -1101,117 +1098,111 @@ "replies": [] } ] - } - ] - }, - { - "type": "comment", - "timestamp": "2016-03-03T15:56:00.000Z", - "author": "Diyar se", - "range": [ - "0/119/0", - "0/121/0/3/29" - ], - "signatureRanges": [ - [ - "0/121/0/1", - "0/121/0/3/29" - ] - ], - "level": 1, - "id": "Diyar se|2016-03-03T15:56:00.000Z|0", - "warnings": [ - "Comment starts and ends with different indentation" - ], - "replies": [ + }, { "type": "comment", - "timestamp": "2016-03-03T16:29:00.000Z", - "author": "Calak", + "timestamp": "2016-03-03T15:56:00.000Z", + "author": "Diyar se", "range": [ - "0/121/0/4/0/0", - "0/121/0/4/0/4/29" + "0/121/0/0", + "0/121/0/3/29" ], "signatureRanges": [ [ - "0/121/0/4/0/1", - "0/121/0/4/0/4/29" + "0/121/0/1", + "0/121/0/3/29" ] ], - "level": 3, - "id": "Calak|2016-03-03T16:29:00.000Z|0", - "warnings": [ - "Comment skips indentation level" - ], + "level": 2, + "id": "Diyar se|2016-03-03T15:56:00.000Z|0", "replies": [ { "type": "comment", - "timestamp": "2016-03-07T13:22:00.000Z", - "author": "Serchia", + "timestamp": "2016-03-03T16:29:00.000Z", + "author": "Calak", "range": [ - "0/121/0/4/0/5/0/0", - "0/121/0/4/0/5/0/4/29" + "0/121/0/4/0/0", + "0/121/0/4/0/4/29" ], "signatureRanges": [ [ - "0/121/0/4/0/5/0/1", - "0/121/0/4/0/5/0/4/29" + "0/121/0/4/0/1", + "0/121/0/4/0/4/29" ] ], - "level": 4, - "id": "Serchia|2016-03-07T13:22:00.000Z|0", + "level": 3, + "id": "Calak|2016-03-03T16:29:00.000Z|0", "replies": [ { "type": "comment", - "timestamp": "2016-03-07T15:29:00.000Z", - "author": "Calak", + "timestamp": "2016-03-07T13:22:00.000Z", + "author": "Serchia", "range": [ - "0/121/0/4/0/5/0/5/0/0", - "0/121/0/4/0/5/0/5/0/4/29" + "0/121/0/4/0/5/0/0", + "0/121/0/4/0/5/0/4/29" ], "signatureRanges": [ [ - "0/121/0/4/0/5/0/5/0/1", - "0/121/0/4/0/5/0/5/0/4/29" + "0/121/0/4/0/5/0/1", + "0/121/0/4/0/5/0/4/29" ] ], - "level": 5, - "id": "Calak|2016-03-07T15:29:00.000Z|0", + "level": 4, + "id": "Serchia|2016-03-07T13:22:00.000Z|0", "replies": [ { "type": "comment", - "timestamp": "2016-03-07T16:00:00.000Z", - "author": "Serchia", + "timestamp": "2016-03-07T15:29:00.000Z", + "author": "Calak", "range": [ - "0/121/0/4/0/5/0/5/0/5/0/0", - "0/121/0/4/0/5/0/5/0/5/0/4/29" + "0/121/0/4/0/5/0/5/0/0", + "0/121/0/4/0/5/0/5/0/4/29" ], "signatureRanges": [ [ - "0/121/0/4/0/5/0/5/0/5/0/1", - "0/121/0/4/0/5/0/5/0/5/0/4/29" + "0/121/0/4/0/5/0/5/0/1", + "0/121/0/4/0/5/0/5/0/4/29" ] ], - "level": 6, - "id": "Serchia|2016-03-07T16:00:00.000Z|0", + "level": 5, + "id": "Calak|2016-03-07T15:29:00.000Z|0", "replies": [ { "type": "comment", - "timestamp": "2016-03-07T21:19:00.000Z", - "author": "Calak", + "timestamp": "2016-03-07T16:00:00.000Z", + "author": "Serchia", "range": [ - "0/121/0/4/0/5/0/5/0/5/0/5/0/0", - "0/121/0/4/0/5/0/5/0/5/0/5/0/4/29" + "0/121/0/4/0/5/0/5/0/5/0/0", + "0/121/0/4/0/5/0/5/0/5/0/4/29" ], "signatureRanges": [ [ - "0/121/0/4/0/5/0/5/0/5/0/5/0/1", - "0/121/0/4/0/5/0/5/0/5/0/5/0/4/29" + "0/121/0/4/0/5/0/5/0/5/0/1", + "0/121/0/4/0/5/0/5/0/5/0/4/29" ] ], - "level": 7, - "id": "Calak|2016-03-07T21:19:00.000Z|0", - "replies": [] + "level": 6, + "id": "Serchia|2016-03-07T16:00:00.000Z|0", + "replies": [ + { + "type": "comment", + "timestamp": "2016-03-07T21:19:00.000Z", + "author": "Calak", + "range": [ + "0/121/0/4/0/5/0/5/0/5/0/5/0/0", + "0/121/0/4/0/5/0/5/0/5/0/5/0/4/29" + ], + "signatureRanges": [ + [ + "0/121/0/4/0/5/0/5/0/5/0/5/0/1", + "0/121/0/4/0/5/0/5/0/5/0/5/0/4/29" + ] + ], + "level": 7, + "id": "Calak|2016-03-07T21:19:00.000Z|0", + "replies": [] + } + ] } ] } diff --git a/tests/cases/ckb-big-parsoid/ckb-big-parsoid.json b/tests/cases/ckb-big-parsoid/ckb-big-parsoid.json index 7c678ee62..7aea4958f 100644 --- a/tests/cases/ckb-big-parsoid/ckb-big-parsoid.json +++ b/tests/cases/ckb-big-parsoid/ckb-big-parsoid.json @@ -503,28 +503,25 @@ ], "level": 1, "id": "Sarchia Banokay|2015-07-16T14:01:00.000Z|0", - "replies": [] - }, - { - "type": "comment", - "timestamp": "2015-07-16T15:11:00.000Z", - "author": "Diyako kazm", - "range": [ - "0/6/4/1", - "0/6/6/0/5/33" - ], - "signatureRanges": [ - [ - "0/6/6/0/4", - "0/6/6/0/5/33" - ] - ], - "level": 1, - "id": "Diyako kazm|2015-07-16T15:11:00.000Z|0", - "warnings": [ - "Comment starts and ends with different indentation" - ], "replies": [ + { + "type": "comment", + "timestamp": "2015-07-16T15:11:00.000Z", + "author": "Diyako kazm", + "range": [ + "0/6/6/0/0/0/0", + "0/6/6/0/5/33" + ], + "signatureRanges": [ + [ + "0/6/6/0/4", + "0/6/6/0/5/33" + ] + ], + "level": 2, + "id": "Diyako kazm|2015-07-16T15:11:00.000Z|0", + "replies": [] + }, { "type": "comment", "timestamp": "2015-07-16T21:31:00.000Z", @@ -1101,117 +1098,111 @@ "replies": [] } ] - } - ] - }, - { - "type": "comment", - "timestamp": "2016-03-03T15:56:00.000Z", - "author": "Diyar se", - "range": [ - "0/15/12/1", - "0/15/14/0/3/29" - ], - "signatureRanges": [ - [ - "0/15/14/0/1", - "0/15/14/0/3/29" - ] - ], - "level": 1, - "id": "Diyar se|2016-03-03T15:56:00.000Z|0", - "warnings": [ - "Comment starts and ends with different indentation" - ], - "replies": [ + }, { "type": "comment", - "timestamp": "2016-03-03T16:29:00.000Z", - "author": "Calak", + "timestamp": "2016-03-03T15:56:00.000Z", + "author": "Diyar se", "range": [ - "0/15/14/0/4/0/0", - "0/15/14/0/4/0/4/29" + "0/15/14/0/0", + "0/15/14/0/3/29" ], "signatureRanges": [ [ - "0/15/14/0/4/0/1", - "0/15/14/0/4/0/4/29" + "0/15/14/0/1", + "0/15/14/0/3/29" ] ], - "level": 3, - "id": "Calak|2016-03-03T16:29:00.000Z|0", - "warnings": [ - "Comment skips indentation level" - ], + "level": 2, + "id": "Diyar se|2016-03-03T15:56:00.000Z|0", "replies": [ { "type": "comment", - "timestamp": "2016-03-07T13:22:00.000Z", - "author": "Serchia", + "timestamp": "2016-03-03T16:29:00.000Z", + "author": "Calak", "range": [ - "0/15/14/0/4/0/5/0/0", - "0/15/14/0/4/0/5/0/4/29" + "0/15/14/0/4/0/0", + "0/15/14/0/4/0/4/29" ], "signatureRanges": [ [ - "0/15/14/0/4/0/5/0/1", - "0/15/14/0/4/0/5/0/4/29" + "0/15/14/0/4/0/1", + "0/15/14/0/4/0/4/29" ] ], - "level": 4, - "id": "Serchia|2016-03-07T13:22:00.000Z|0", + "level": 3, + "id": "Calak|2016-03-03T16:29:00.000Z|0", "replies": [ { "type": "comment", - "timestamp": "2016-03-07T15:29:00.000Z", - "author": "Calak", + "timestamp": "2016-03-07T13:22:00.000Z", + "author": "Serchia", "range": [ - "0/15/14/0/4/0/5/0/5/0/0", - "0/15/14/0/4/0/5/0/5/0/4/29" + "0/15/14/0/4/0/5/0/0", + "0/15/14/0/4/0/5/0/4/29" ], "signatureRanges": [ [ - "0/15/14/0/4/0/5/0/5/0/1", - "0/15/14/0/4/0/5/0/5/0/4/29" + "0/15/14/0/4/0/5/0/1", + "0/15/14/0/4/0/5/0/4/29" ] ], - "level": 5, - "id": "Calak|2016-03-07T15:29:00.000Z|0", + "level": 4, + "id": "Serchia|2016-03-07T13:22:00.000Z|0", "replies": [ { "type": "comment", - "timestamp": "2016-03-07T16:00:00.000Z", - "author": "Serchia", + "timestamp": "2016-03-07T15:29:00.000Z", + "author": "Calak", "range": [ - "0/15/14/0/4/0/5/0/5/0/5/0/0", - "0/15/14/0/4/0/5/0/5/0/5/0/4/29" + "0/15/14/0/4/0/5/0/5/0/0", + "0/15/14/0/4/0/5/0/5/0/4/29" ], "signatureRanges": [ [ - "0/15/14/0/4/0/5/0/5/0/5/0/1", - "0/15/14/0/4/0/5/0/5/0/5/0/4/29" + "0/15/14/0/4/0/5/0/5/0/1", + "0/15/14/0/4/0/5/0/5/0/4/29" ] ], - "level": 6, - "id": "Serchia|2016-03-07T16:00:00.000Z|0", + "level": 5, + "id": "Calak|2016-03-07T15:29:00.000Z|0", "replies": [ { "type": "comment", - "timestamp": "2016-03-07T21:19:00.000Z", - "author": "Calak", + "timestamp": "2016-03-07T16:00:00.000Z", + "author": "Serchia", "range": [ - "0/15/14/0/4/0/5/0/5/0/5/0/5/0/0", - "0/15/14/0/4/0/5/0/5/0/5/0/5/0/4/29" + "0/15/14/0/4/0/5/0/5/0/5/0/0", + "0/15/14/0/4/0/5/0/5/0/5/0/4/29" ], "signatureRanges": [ [ - "0/15/14/0/4/0/5/0/5/0/5/0/5/0/1", - "0/15/14/0/4/0/5/0/5/0/5/0/5/0/4/29" + "0/15/14/0/4/0/5/0/5/0/5/0/1", + "0/15/14/0/4/0/5/0/5/0/5/0/4/29" ] ], - "level": 7, - "id": "Calak|2016-03-07T21:19:00.000Z|0", - "replies": [] + "level": 6, + "id": "Serchia|2016-03-07T16:00:00.000Z|0", + "replies": [ + { + "type": "comment", + "timestamp": "2016-03-07T21:19:00.000Z", + "author": "Calak", + "range": [ + "0/15/14/0/4/0/5/0/5/0/5/0/5/0/0", + "0/15/14/0/4/0/5/0/5/0/5/0/5/0/4/29" + ], + "signatureRanges": [ + [ + "0/15/14/0/4/0/5/0/5/0/5/0/5/0/1", + "0/15/14/0/4/0/5/0/5/0/5/0/5/0/4/29" + ] + ], + "level": 7, + "id": "Calak|2016-03-07T21:19:00.000Z|0", + "replies": [] + } + ] } ] } diff --git a/tests/cases/en-big-parsoid/en-big-parsoid.json b/tests/cases/en-big-parsoid/en-big-parsoid.json index 0cda6ad7c..77914f239 100644 --- a/tests/cases/en-big-parsoid/en-big-parsoid.json +++ b/tests/cases/en-big-parsoid/en-big-parsoid.json @@ -4165,7 +4165,7 @@ "timestamp": "2019-06-14T13:11:00.000Z", "author": "Nosebagbear", "range": [ - "0/24/2/1", + "0/24/2/2", "0/24/12/0/6/27" ], "signatureRanges": [