Normalize whitespace when processing infobox contents

This commit is contained in:
Sebastian Marzjan 2016-01-05 08:45:16 +01:00
parent a5ad283fca
commit 72392d0edd
2 changed files with 28 additions and 2 deletions

View file

@ -59,7 +59,7 @@ abstract class NodeSanitizer implements NodeTypeSanitizerInterface {
$this->removeNodesBySelector( $xpath, $this->selectorsForFullRemoval );
$nodes = $this->extractNeededNodes( $xpath );
return $this->generateHTML( $nodes, $dom );
return $this->normalizeWhitespace( $this->generateHTML( $nodes, $dom ) );
}
/**
@ -78,7 +78,18 @@ abstract class NodeSanitizer implements NodeTypeSanitizerInterface {
*/
$result[] = ( $node->nodeName === '#text' ) ? htmlspecialchars( $dom->saveHTML( $node ), ENT_QUOTES ) : $dom->saveHTML( $node );
}
return implode( '', $result );
return implode( ' ', $result );
}
/**
* Replaces multiple whitespaces with single ones.
* Transparent from non-preformatted HTML point of view
*
* @param $text string
* @return string
*/
protected function normalizeWhitespace( $text ) {
return mbereg_replace( "\s+", " ", $text );
}
/**

View file

@ -43,6 +43,21 @@ class NodeImageSanitizerTest extends WikiaBaseTest {
[
[ 'caption' => '<script>JSSnippetsStack.push({dependencies:[{"url":"http://i3.marzjan.wikia-dev.com/__am/1451462348/group/-/wikia_photo_gallery_js","type":"js"},{"url":"http://i2.marzjan.wikia-dev.com/__am/1451462348/sass/background-dynamic%3D1%26background-image%3D%26background-image-height%3D1185%26background-image-width%3D1600%26color-body%3D%2523bacdd8%26color-body-middle%3D%2523bacdd8%26color-buttons%3D%2523006cb0%26color-header%3D%25233a5766%26color-links%3D%2523006cb0%26color-page%3D%2523ffffff%26oasisTypography%3D1%26page-opacity%3D100%26widthType%3D0/extensions/wikia/WikiaPhotoGallery/css/gallery.scss","type":"css"}],callback:function(json){WikiaPhotoGalleryView.init(json)},id:"WikiaPhotoGalleryView.init"})</script>' ],
[ 'caption' => '' ]
],
[
[ 'caption' => '<ul><li>1
</li><li>2
</li><li>3
</li></ul>' ],
[ 'caption' => '1 2 3' ]
],
[
[ 'caption' => '<ol><li>1
</li><li>2
<ol><li>2.1
</li></ol>
</li></ol>' ],
[ 'caption' => '1 2 2.1' ]
]
];
}