Don't flatten spans

... so that per-span information for different languages, i.e. lang and
dir attributes aren't lost.

Bug: T59582
Change-Id: If1b04714fdc0f4d581ddb858d8d53f6f340dc10b
This commit is contained in:
Sam Smith 2015-01-08 12:50:23 +00:00
parent fbd8e93a8b
commit 59633e2be9
2 changed files with 38 additions and 1 deletions

View file

@ -45,7 +45,7 @@ class ExtractFormatter extends HtmlFormatter {
if ( $plainText ) {
$this->flattenAllTags();
} else {
$this->flatten( array( 'span', 'a' ) );
$this->flatten( array( 'a' ) );
}
wfProfileOut( __METHOD__ );
}
@ -132,4 +132,24 @@ class ExtractFormatter extends HtmlFormatter {
wfProfileOut( __METHOD__ );
return $m[0];
}
/**
* Removes content we've chosen to remove then removes class and style
* attributes from the remaining span elements.
*
* @return array Array of removed DOMElements
*/
public function filterContent() {
$removed = parent::filterContent();
$doc = $this->getDoc();
$spans = $doc->getElementsByTagName( 'span' );
foreach ( $spans as $span ) {
$span->removeAttribute( 'class' );
$span->removeAttribute( 'style' );
}
return $removed;
}
}

View file

@ -28,12 +28,29 @@ class ExtractFormatterTest extends MediaWikiTestCase {
. "</span>&nbsp;<small class=\"metadata audiolinkinfo\" style=\"cursor:help;\">([[Wikipedia:Media help|<span style=\"cursor:help;\">"
. "help</span>]]·[[:File:nl-Nederlands.ogg|<span style=\"cursor:help;\">info</span>]])</small></span>) is a"
. " [[West Germanic languages|West Germanic language]] and the native language of most of the population of the [[Netherlands]]";
return array(
array(
"Dutch ( Nederlands ) is a West Germanic language and the native language of most of the population of the Netherlands",
$dutch,
true,
),
array(
"<p><span><span lang=\"baz\">qux</span></span>\n</p>",
'<span class="foo"><span lang="baz">qux</span></span>',
false,
),
array(
"<p><span><span lang=\"baz\">qux</span></span>\n</p>",
'<span style="foo: bar;"><span lang="baz">qux</span></span>',
false,
),
array(
"<p><span><span lang=\"qux\">quux</span></span>\n</p>",
'<span class="foo"><span style="bar: baz;" lang="qux">quux</span></span>',
false,
),
);
}