mirror of
https://github.com/Universal-Omega/PortableInfobox.git
synced 2024-11-16 12:35:31 +00:00
f901a1642e
I carefully researched the topic and it seems that there is no way to instruct DOMDocument to accept "lower" quality HTML. Specifically in HTML <div> or <figure> (block elements) are not allowed inside <p> (which accepts only inline elements) - hence it's complaining about. Based on comments from stackoverflow it seems that such practice (silencing it) is commong practice in this situation.
241 lines
7.1 KiB
PHP
241 lines
7.1 KiB
PHP
<?php
|
|
namespace Wikia\PortableInfobox\Parser\Nodes;
|
|
|
|
use Wikia\PortableInfobox\Helpers\ImageFilenameSanitizer;
|
|
use Wikia\PortableInfobox\Helpers\PortableInfoboxDataBag;
|
|
use WikiaFileHelper;
|
|
|
|
class NodeImage extends Node {
|
|
const ALT_TAG_NAME = 'alt';
|
|
const CAPTION_TAG_NAME = 'caption';
|
|
const MEDIA_TYPE_VIDEO = 'VIDEO';
|
|
|
|
public static function getMarkers( $value, $ext ) {
|
|
if ( preg_match_all('/\x7fUNIQ[A-Z0-9]*-' . $ext . '-[A-F0-9]{8}-QINU\x7f/is', $value, $out ) ) {
|
|
return $out[0];
|
|
} else {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
public static function getGalleryData( $html ) {
|
|
global $wgArticleAsJson;
|
|
$data = array();
|
|
if ( $wgArticleAsJson ) {
|
|
if ( preg_match( '/data-ref=\'([^\']+)\'/', $html, $out ) ) {
|
|
$media = \ArticleAsJson::$media[$out[1]];
|
|
for( $i = 0; $i < count( $media ); $i++ ) {
|
|
$data[] = array( 'label' => strip_tags( $media[$i]['caption'] ), 'title' => $media[$i]['title'] );
|
|
}
|
|
}
|
|
} else {
|
|
if ( preg_match( '#\sdata-model="([^"]+)"#', $html, $galleryOut ) ) {
|
|
$model = json_decode( htmlspecialchars_decode( $galleryOut[1] ), true );
|
|
for( $i = 0; $i < count( $model ); $i++ ) {
|
|
$data[] = array( 'label' => strip_tags( $model[$i][ 'caption' ] ), 'title' => $model[$i][ 'title' ] );
|
|
}
|
|
}
|
|
if ( preg_match_all('#data-(video|image)-key="([^"]+)".*?\s<h2>(.*?)<\/h2>#is', $html, $galleryOut ) ) {
|
|
for( $i = 0; $i < count( $galleryOut[0] ); $i++ ) {
|
|
$data[] = array( 'label' => $galleryOut[3][$i], 'title' => $galleryOut[2][$i] );
|
|
}
|
|
}
|
|
if ( preg_match_all('#data-(video|image)-key="([^"]+)".*?<div class="lightbox-caption"[^>]*>(.*?)<\/div>#is', $html, $galleryOut ) ) {
|
|
for( $i = 0; $i < count( $galleryOut[0] ); $i++ ) {
|
|
$data[] = array( 'label' => $galleryOut[3][$i], 'title' => $galleryOut[2][$i] );
|
|
}
|
|
}
|
|
}
|
|
return $data;
|
|
}
|
|
|
|
public static function getTabberData( $html ) {
|
|
global $wgArticleAsJson;
|
|
$data = array();
|
|
$doc = new \DOMDocument();
|
|
libxml_use_internal_errors( true );
|
|
$doc->loadHTML( $html );
|
|
libxml_use_internal_errors( false );
|
|
$sxml = simplexml_import_dom( $doc );
|
|
$divs = $sxml->xpath( '//div[@class=\'tabbertab\']' );
|
|
foreach ( $divs as $div ) {
|
|
if ( $wgArticleAsJson ) {
|
|
if ( preg_match( '/data-ref="([^"]+)"/', $div->asXML(), $out ) ) {
|
|
$data[] = array( 'label' => (string) $div['title'], 'title' => \ArticleAsJson::$media[$out[1]]['title'] );
|
|
}
|
|
} else {
|
|
if ( preg_match( '/data-(video|image)-key="([^"]+)"/', $div->asXML(), $out ) ) {
|
|
$data[] = array( 'label' => (string) $div['title'], 'title' => $out[2] );
|
|
}
|
|
}
|
|
}
|
|
return $data;
|
|
}
|
|
|
|
public function getData() {
|
|
if ( !isset( $this->data ) ) {
|
|
$this->data = array();
|
|
|
|
// value passed to source parameter (or default)
|
|
$value = $this->getRawValueWithDefault( $this->xmlNode );
|
|
|
|
if ( $this->containsTabberOrGallery( $value ) ) {
|
|
$this->data = $this->getImagesData( $value );
|
|
} else {
|
|
$this->data = array( $this->getImageData(
|
|
$value,
|
|
$this->getValueWithDefault( $this->xmlNode->{self::ALT_TAG_NAME} ),
|
|
$this->getValueWithDefault( $this->xmlNode->{self::CAPTION_TAG_NAME} )
|
|
) );
|
|
}
|
|
}
|
|
return $this->data;
|
|
}
|
|
|
|
/**
|
|
* @desc Checks if parser preprocessed string containg Tabber or Gallery extension
|
|
* @param string $str String to check
|
|
* @return bool
|
|
*/
|
|
private function containsTabberOrGallery( $str ) {
|
|
// TODO: Consider more robust approach (UNIQ...QINU)
|
|
$strLower = strtolower( $str );
|
|
if ( strpos( $strLower, '-tabber-' ) !== false || strpos( $strLower, '-gallery-' ) !== false ) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private function getImagesData( $value ) {
|
|
$data = array();
|
|
$items = array_merge( $this->getGalleryItems( $value ), $this->getTabberItems( $value ) );
|
|
for( $i = 0; $i < count( $items ); $i++ ) {
|
|
$data[] = $this->getImageData( $items[$i]['title'], $items[$i]['label'], $items[$i]['label'] );
|
|
}
|
|
return $data;
|
|
}
|
|
|
|
private function getGalleryItems( $value ) {
|
|
$galleryItems = array();
|
|
$galleryMarkers = self::getMarkers( $value, 'GALLERY' );
|
|
for ( $i = 0; $i < count ( $galleryMarkers ); $i++ ) {
|
|
$galleryHtml = $this->getExternalParser()->parseRecursive( $galleryMarkers[$i] );
|
|
$galleryItems = array_merge( $galleryItems, self::getGalleryData( $galleryHtml ) );
|
|
}
|
|
return $galleryItems;
|
|
}
|
|
|
|
private function getTabberItems( $value ) {
|
|
$tabberItems = array();
|
|
$tabberMarkers = self::getMarkers( $value, 'TABBER' );
|
|
for ( $i = 0; $i < count ( $tabberMarkers ); $i++ ) {
|
|
$tabberHtml = $this->getExternalParser()->parseRecursive( $tabberMarkers[$i] );
|
|
$tabberItems = array_merge( $tabberItems, self::getTabberData( $tabberHtml ) );
|
|
}
|
|
return $tabberItems;
|
|
}
|
|
|
|
private function getImageData( $title, $alt, $caption ) {
|
|
$titleObj = $this->getImageAsTitleObject( $title );
|
|
$fileObj = $this->getFilefromTitle( $titleObj );
|
|
|
|
if ( $titleObj instanceof \Title ) {
|
|
$this->getExternalParser()->addImage( $titleObj->getDBkey() );
|
|
}
|
|
|
|
$image = [
|
|
'url' => $this->resolveImageUrl( $fileObj ),
|
|
'name' => $titleObj ? $titleObj->getText() : '',
|
|
'key' => $titleObj ? $titleObj->getDBKey() : '',
|
|
'alt' => $alt,
|
|
'caption' => $caption
|
|
];
|
|
|
|
if ( $this->isVideo( $fileObj ) ) {
|
|
$image = $this->videoDataDecorator( $image, $fileObj );
|
|
}
|
|
|
|
return $image;
|
|
}
|
|
|
|
public function isEmpty() {
|
|
$data = $this->getData();
|
|
foreach ( $data as $dataItem ) {
|
|
if ( !empty( $dataItem[ 'url' ] ) ) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
public function getSource() {
|
|
$sources = $this->extractSourceFromNode( $this->xmlNode );
|
|
if ( $this->xmlNode->{self::ALT_TAG_NAME} ) {
|
|
$sources = array_merge( $sources,
|
|
$this->extractSourceFromNode( $this->xmlNode->{self::ALT_TAG_NAME} ) );
|
|
}
|
|
if ( $this->xmlNode->{self::CAPTION_TAG_NAME} ) {
|
|
$sources = array_merge( $sources,
|
|
$this->extractSourceFromNode( $this->xmlNode->{self::CAPTION_TAG_NAME} ) );
|
|
}
|
|
|
|
return array_unique( $sources );
|
|
}
|
|
|
|
private function getImageAsTitleObject( $imageName ) {
|
|
global $wgContLang;
|
|
$title = \Title::makeTitleSafe(
|
|
NS_FILE,
|
|
ImageFilenameSanitizer::getInstance()->sanitizeImageFileName( $imageName, $wgContLang )
|
|
);
|
|
|
|
return $title;
|
|
}
|
|
|
|
/**
|
|
* @desc get file object from title object
|
|
* @param Title|null $title
|
|
* @return File|null
|
|
*/
|
|
private function getFilefromTitle( $title ) {
|
|
return $title ? WikiaFileHelper::getFileFromTitle( $title ) : null;
|
|
}
|
|
|
|
/**
|
|
* @desc returns image url for given image title
|
|
* @param File|null $file
|
|
* @return string url or '' if image doesn't exist
|
|
*/
|
|
public function resolveImageUrl( $file ) {
|
|
return $file ? $file->getUrl() : '';
|
|
}
|
|
|
|
/**
|
|
* @desc checks if file media type is VIDEO
|
|
* @param File|null $file
|
|
* @return bool
|
|
*/
|
|
private function isVideo( $file ) {
|
|
return $file ? $file->getMediaType() === self::MEDIA_TYPE_VIDEO : false;
|
|
}
|
|
|
|
/**
|
|
* @desc add addtional data required for video media type
|
|
* @param array $data
|
|
* @param File $file
|
|
* @return array
|
|
*/
|
|
private function videoDataDecorator( $data, $file ) {
|
|
$title = $file->getTitle();
|
|
|
|
if ( $title ) {
|
|
$data[ 'url' ] = $title->getFullURL();
|
|
}
|
|
|
|
$data[ 'isVideo' ] = true;
|
|
$data[ 'duration' ] = WikiaFileHelper::formatDuration( $file->getMetadataDuration());
|
|
|
|
return $data;
|
|
}
|
|
}
|