PortableInfobox/includes/services/Helpers/FileNamespaceSanitizeHelper.php

132 lines
3.4 KiB
PHP
Raw Normal View History

<?php
namespace PortableInfobox\Helpers;
2018-10-02 07:35:02 +00:00
// original class & authors:
// https://github.com/Wikia/app/blob/dev/includes/wikia/helpers/FileNamespaceSanitizeHelper.php
class FileNamespaceSanitizeHelper {
private static $instance = null;
2018-08-16 09:25:53 +00:00
private $filePrefixRegex = [];
private function __construct() {
}
/**
* @return null|FileNamespaceSanitizeHelper
*/
public static function getInstance() {
if ( is_null( self::$instance ) ) {
self::$instance = new self;
}
return self::$instance;
}
/**
2018-08-16 09:25:53 +00:00
* @param \Language $contLang
* Used as local cache for getting string to remove
*/
private function getFilePrefixRegex( $contLang ) {
global $wgNamespaceAliases;
$langCode = $contLang->getCode();
2018-08-16 09:25:53 +00:00
if ( empty( $this->filePrefixRegex[$langCode] ) ) {
$fileNamespaces = [
\MWNamespace::getCanonicalName( NS_FILE ),
2018-08-16 09:25:53 +00:00
$contLang->getNamespaces()[NS_FILE],
];
$aliases = array_merge( $contLang->getNamespaceAliases(), $wgNamespaceAliases );
foreach ( $aliases as $alias => $namespaceId ) {
if ( $namespaceId == NS_FILE ) {
2018-08-16 09:25:53 +00:00
$fileNamespaces[] = $alias;
}
}
2018-08-16 09:25:53 +00:00
2018-10-02 07:35:02 +00:00
// be able to match user-provided file namespaces that may contain both underscores and spaces
2018-08-16 09:25:53 +00:00
$fileNamespaces = array_map( function ( $namespace ) {
return mb_ereg_replace( '_', '(_|\ )', $namespace );
}, $fileNamespaces );
2018-10-02 07:35:02 +00:00
// be able to match both upper- and lowercase first letters of the namespace
2018-08-16 09:25:53 +00:00
$lowercaseFileNamespaces = array_map( function ( $namespace ) {
return mb_convert_case( $namespace, MB_CASE_LOWER, "UTF-8" );
}, $fileNamespaces );
$namespaces = array_merge( $fileNamespaces, $lowercaseFileNamespaces );
2018-08-16 09:25:53 +00:00
$this->filePrefixRegex[$langCode] = '^(' . implode( '|', $namespaces ) . '):';
}
2018-08-16 09:25:53 +00:00
return $this->filePrefixRegex[$langCode];
}
/**
2018-08-16 09:25:53 +00:00
* @param string $filename
* @param \Language $contLang
*
* @return mixed
*/
public function sanitizeImageFileName( $filename, $contLang ) {
$plainText = $this->convertToPlainText( $filename );
$filePrefixRegex = $this->getFilePrefixRegex( $contLang );
$textLines = explode( PHP_EOL, $plainText );
foreach ( $textLines as $potentialFilename ) {
$filename = $this->extractFilename( $potentialFilename, $filePrefixRegex );
if ( $filename ) {
return $filename;
}
}
return $plainText;
}
/**
* @param $filename
*
* @return string
*/
private function convertToPlainText( $filename ) {
// strip HTML tags
$filename = strip_tags( $filename );
// replace the surrounding whitespace
$filename = trim( $filename );
return $filename;
}
/**
2018-08-16 09:25:53 +00:00
* @param string $potentialFilename
* @param string $filePrefixRegex
*
* @return string|null
*/
private function extractFilename( $potentialFilename, $filePrefixRegex ) {
2018-10-02 07:35:02 +00:00
$trimmedFilename = trim( $potentialFilename, '[]' );
$unprefixedFilename = mb_ereg_replace( $filePrefixRegex, '', $trimmedFilename );
$filenameParts = explode( '|', $unprefixedFilename );
if ( !empty( $filenameParts[0] ) ) {
return rawurldecode( $filenameParts[0] );
}
return self::removeImageParams( $unprefixedFilename );
}
/**
2018-10-02 07:35:02 +00:00
* For given file wikitext without brackets, return it without any params
* or null if empty string
*
2018-08-16 09:25:53 +00:00
* @param string $fileWikitext
* @return string | null
*/
public function removeImageParams( $fileWikitext ) {
$filenameParts = explode( '|', $fileWikitext );
if ( empty( $filenameParts[0] ) ) {
return null;
}
return urldecode( $filenameParts[0] );
}
}