From 97c8de184262d9107c78232563626d7ae24f8750 Mon Sep 17 00:00:00 2001 From: idradm Date: Tue, 8 Sep 2015 10:25:10 +0200 Subject: [PATCH 1/2] DAT-3207 add mw namespaces aliases to image filename sanitizer --- services/Helpers/ImageFilenameSanitizer.php | 30 ++++++++++++++------- tests/ImageFilenameSanitizerTest.php | 8 +++++- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/services/Helpers/ImageFilenameSanitizer.php b/services/Helpers/ImageFilenameSanitizer.php index 3536cee..f98aaba 100644 --- a/services/Helpers/ImageFilenameSanitizer.php +++ b/services/Helpers/ImageFilenameSanitizer.php @@ -20,6 +20,7 @@ class ImageFilenameSanitizer { if ( is_null( self::$instance ) ) { self::$instance = new self; } + return self::$instance; } @@ -28,23 +29,30 @@ class ImageFilenameSanitizer { * Used as local cache for getting string to remove */ private function getFilePrefixRegex( $contLang ) { + global $wgNamespaceAliases; $langCode = $contLang->getCode(); - if ( empty( $this->filePrefixRegex[$langCode] ) ) { - $fileNamespaces = [ \MWNamespace::getCanonicalName( NS_FILE ), $contLang->getNamespaces()[NS_FILE] ]; + if ( empty( $this->filePrefixRegex[ $langCode ] ) ) { + $fileNamespaces = [ + \MWNamespace::getCanonicalName( NS_FILE ), + $contLang->getNamespaces()[ NS_FILE ], + ]; - $aliases = $contLang->getNamespaceAliases(); + $aliases = array_merge( $contLang->getNamespaceAliases(), $wgNamespaceAliases ); foreach ( $aliases as $alias => $namespaceId ) { - if ( $namespaceId == NS_FILE ) + if ( $namespaceId == NS_FILE ) { $fileNamespaces [] = $alias; + } } - $this->filePrefixRegex[$langCode] = '^(' . implode( '|', $fileNamespaces ) . '):'; + $this->filePrefixRegex[ $langCode ] = '^(' . implode( '|', $fileNamespaces ) . '):'; } - return $this->filePrefixRegex[$langCode]; + + return $this->filePrefixRegex[ $langCode ]; } /** * @param $filename string * @param $contLang \Language + * * @return mixed */ public function sanitizeImageFileName( $filename, $contLang ) { @@ -54,16 +62,18 @@ class ImageFilenameSanitizer { foreach ( $textLines as $potentialFilename ) { $filename = $this->extractFilename( $potentialFilename, $filePrefixRegex ); - if ($filename) { + if ( $filename ) { return $filename; } } + return $plainText; } /** * @param $filename + * * @return string */ private function convertToPlainText( $filename ) { @@ -71,20 +81,22 @@ class ImageFilenameSanitizer { $filename = strip_tags( $filename ); // replace the surrounding whitespace $filename = trim( $filename ); + return $filename; } /** * @param $potentialFilename * @param $filePrefixRegex + * * @return string|null */ private function extractFilename( $potentialFilename, $filePrefixRegex ) { $trimmedFilename = trim( $potentialFilename, "[]" ); $unprefixedFilename = mb_ereg_replace( $filePrefixRegex, "", $trimmedFilename ); $filenameParts = explode( '|', $unprefixedFilename ); - if ( !empty( $filenameParts[0] ) ) { - return $filenameParts[0]; + if ( !empty( $filenameParts[ 0 ] ) ) { + return $filenameParts[ 0 ]; } return null; diff --git a/tests/ImageFilenameSanitizerTest.php b/tests/ImageFilenameSanitizerTest.php index 9961808..7917df6 100644 --- a/tests/ImageFilenameSanitizerTest.php +++ b/tests/ImageFilenameSanitizerTest.php @@ -175,7 +175,13 @@ class ImageFilenameSanitizerTest extends WikiaBaseTest { 'en', '文件名óśłżźćńę?.jpg', 'Content of gallery with diacritics and UTF characters' - ] + ], + [ + '[[Image:image.jpg|300px|lorem ipsum]]', + 'es', + 'image.jpg', + 'Link to filename with canonical namespace, width and caption on a non-EN wiki ' + ], ]; } } From c5adee4ff706d4a91bf7855702513e9975e54323 Mon Sep 17 00:00:00 2001 From: idradm Date: Tue, 8 Sep 2015 10:26:48 +0200 Subject: [PATCH 2/2] DAT-3207 tests update --- tests/ImageFilenameSanitizerTest.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ImageFilenameSanitizerTest.php b/tests/ImageFilenameSanitizerTest.php index 7917df6..b51e424 100644 --- a/tests/ImageFilenameSanitizerTest.php +++ b/tests/ImageFilenameSanitizerTest.php @@ -177,10 +177,10 @@ class ImageFilenameSanitizerTest extends WikiaBaseTest { 'Content of gallery with diacritics and UTF characters' ], [ - '[[Image:image.jpg|300px|lorem ipsum]]', - 'es', - 'image.jpg', - 'Link to filename with canonical namespace, width and caption on a non-EN wiki ' + 'Image:filename.jpg', + 'en', + 'filename.jpg', + 'Filename with alias to namespace' ], ]; }