* * Inspired by djvuhandler from Tim Starling * Modified and written by Xarax * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ class PdfHandler extends ImageHandler { /** * Keep in sync with pdfhandler.messages in extension.json * * @see getWarningConfig */ private const MESSAGES = [ 'main' => 'pdf-file-page-warning', 'header' => 'pdf-file-page-warning-header', 'info' => 'pdf-file-page-warning-info', 'footer' => 'pdf-file-page-warning-footer', ]; /** * 10MB is considered a large file */ private const LARGE_FILE = 1e7; /** * Key for getHandlerState for value of type PdfImage */ private const STATE_PDF_IMAGE = 'pdfImage'; /** * Key for getHandlerState for dimension info */ private const STATE_DIMENSION_INFO = 'pdfDimensionInfo'; /** * @param File $file * @return bool */ public function mustRender( $file ) { return true; } /** * @param File $file * @return bool */ public function isMultiPage( $file ) { return true; } /** * @param string $name * @param string $value * @return bool */ public function validateParam( $name, $value ) { if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) { // Extra junk on the end of page, probably actually a caption // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]] return false; } if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) { return ( $value > 0 ); } return false; } /** * @param array $params * @return bool|string */ public function makeParamString( $params ) { $page = $params['page'] ?? 1; if ( !isset( $params['width'] ) ) { return false; } return "page{$page}-{$params['width']}px"; } /** * @param string $str * @return array|bool */ public function parseParamString( $str ) { $m = []; if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { return [ 'width' => $m[2], 'page' => $m[1] ]; } return false; } /** * @param array $params * @return array */ public function getScriptParams( $params ) { return [ 'width' => $params['width'], 'page' => $params['page'], ]; } /** * @return array */ public function getParamMap() { return [ 'img_width' => 'width', 'img_page' => 'page', ]; } /** * @param int $width * @param int $height * @param string $msg * @return MediaTransformError */ protected function doThumbError( $width, $height, $msg ) { return new MediaTransformError( 'thumbnail_error', $width, $height, wfMessage( $msg )->inContentLanguage()->text() ); } /** * @param File $image * @param string $dstPath * @param string $dstUrl * @param array $params * @param int $flags * @return MediaTransformError|MediaTransformOutput|ThumbnailImage|TransformParameterError */ public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality; if ( !$this->normaliseParams( $image, $params ) ) { return new TransformParameterError( $params ); } $width = (int)$params['width']; $height = (int)$params['height']; $page = (int)$params['page']; if ( $page > $this->pageCount( $image ) ) { return $this->doThumbError( $width, $height, 'pdf_page_error' ); } if ( $flags & self::TRANSFORM_LATER ) { return new ThumbnailImage( $image, $dstUrl, false, [ 'width' => $width, 'height' => $height, 'page' => $page, ] ); } if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) { return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' ); } // Thumbnail extraction is very inefficient for large files. // Provide a way to pool count limit the number of downloaders. if ( $image->getSize() >= self::LARGE_FILE ) { $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ), [ 'doWork' => static function () use ( $image ) { return $image->getLocalRefPath(); } ] ); $srcPath = $work->execute(); } else { $srcPath = $image->getLocalRefPath(); } if ( $srcPath === false ) { // could not download original return $this->doThumbError( $width, $height, 'filemissing' ); } $cmd = '(' . wfEscapeShellArg( $wgPdfProcessor, "-sDEVICE=jpeg", "-sOutputFile=-", "-sstdout=%stderr", "-dFirstPage={$page}", "-dLastPage={$page}", "-dSAFER", "-r{$wgPdfHandlerDpi}", // CropBox defines the region that the PDF viewer application is expected to display or print. "-dUseCropBox", "-dBATCH", "-dNOPAUSE", "-q", $srcPath ); $cmd .= " | " . wfEscapeShellArg( $wgPdfPostProcessor, "-depth", "8", "-quality", $wgPdfHandlerJpegQuality, "-resize", (string)$width, "-", $dstPath ); $cmd .= ")"; wfDebug( __METHOD__ . ": $cmd\n" ); $retval = ''; $err = wfShellExecWithStderr( $cmd, $retval ); $removed = $this->removeBadFile( $dstPath, $retval ); if ( $retval != 0 || $removed ) { wfDebugLog( 'thumbnail', sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"', wfHostname(), $retval, trim( $err ), $cmd ) ); return new MediaTransformError( 'thumbnail_error', $width, $height, $err ); } return new ThumbnailImage( $image, $dstUrl, $dstPath, [ 'width' => $width, 'height' => $height, 'page' => $page, ] ); } /** * @param \MediaHandlerState $state * @param string $path * @return PdfImage */ private function getPdfImage( $state, $path ) { $pdfImg = $state->getHandlerState( self::STATE_PDF_IMAGE ); if ( !$pdfImg ) { $pdfImg = new PdfImage( $path ); $state->setHandlerState( self::STATE_PDF_IMAGE, $pdfImg ); } return $pdfImg; } /** * @param \MediaHandlerState $state * @param string $path * @return array|bool */ public function getSizeAndMetadata( $state, $path ) { $metadata = $this->getPdfImage( $state, $path )->retrieveMetaData(); $sizes = PdfImage::getPageSize( $metadata, 1 ); if ( $sizes ) { return $sizes + [ 'metadata' => $metadata ]; } return [ 'metadata' => $metadata ]; } /** * @param string $ext * @param string $mime * @param null $params * @return array */ public function getThumbType( $ext, $mime, $params = null ) { global $wgPdfOutputExtension; static $mime; if ( !isset( $mime ) ) { $magic = MediaWikiServices::getInstance()->getMimeAnalyzer(); $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension ); } return [ $wgPdfOutputExtension, $mime ]; } /** * @param File $file * @return bool|int */ public function isFileMetadataValid( $file ) { $data = $file->getMetadataItems( [ 'mergedMetadata', 'pages' ] ); if ( !isset( $data['pages'] ) ) { return self::METADATA_BAD; } if ( !isset( $data['mergedMetadata'] ) ) { return self::METADATA_COMPATIBLE; } return self::METADATA_GOOD; } /** * @param File $image * @param bool|IContextSource $context Context to use (optional) * @return bool|array */ public function formatMetadata( $image, $context = false ) { $mergedMetadata = $image->getMetadataItem( 'mergedMetadata' ); if ( !is_array( $mergedMetadata ) || !count( $mergedMetadata ) ) { return false; } // Inherited from MediaHandler. return $this->formatMetadataHelper( $mergedMetadata, $context ); } /** @inheritDoc */ protected function formatTag( string $key, $vals, $context = false ) { switch ( $key ) { case 'pdf-Producer': case 'pdf-Version': return htmlspecialchars( $vals ); case 'pdf-PageSize': foreach ( $vals as &$val ) { $val = htmlspecialchars( $val ); } return $vals; case 'pdf-Encrypted': // @todo: The value isn't i18n-ised; should be done here. // For reference, if encrypted this field's value looks like: // "yes (print:yes copy:no change:no addNotes:no)" return htmlspecialchars( $vals ); default: break; } // Use default formatting return false; } /** * @param File $image * @return bool|int */ public function pageCount( File $image ) { $info = $this->getDimensionInfo( $image ); return $info ? $info['pageCount'] : false; } /** * @param File $image * @param int $page * @return array|bool */ public function getPageDimensions( File $image, $page ) { // MW starts pages at 1, as they are stored here $index = $page; $info = $this->getDimensionInfo( $image ); if ( $info && isset( $info['dimensionsByPage'][$index] ) ) { return $info['dimensionsByPage'][$index]; } return false; } /** * @param File $file * @return bool|mixed */ protected function getDimensionInfo( File $file ) { $info = $file->getHandlerState( self::STATE_DIMENSION_INFO ); if ( !$info ) { $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); $info = $cache->getWithSetCallback( $cache->makeKey( 'file-pdf-dimensions', $file->getSha1() ), $cache::TTL_MONTH, static function () use ( $file ) { $data = $file->getMetadataItems( PdfImage::ITEMS_FOR_PAGE_SIZE ); if ( !$data || !isset( $data['Pages'] ) ) { return false; } $dimsByPage = []; $count = intval( $data['Pages'] ); for ( $i = 1; $i <= $count; $i++ ) { $dimsByPage[$i] = PdfImage::getPageSize( $data, $i ); } return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ]; } ); } $file->setHandlerState( self::STATE_DIMENSION_INFO, $info ); return $info; } /** * @param File $image * @param int $page * @return bool */ public function getPageText( File $image, $page ) { $pageTexts = $image->getMetadataItem( 'text' ); if ( !is_array( $pageTexts ) || !isset( $pageTexts[$page - 1] ) ) { return false; } return $pageTexts[$page - 1]; } /** * Adds a warning about PDFs being potentially dangerous to the file * page. Multiple messages with this base will be used. * @param File $file * @return array */ public function getWarningConfig( $file ) { return [ 'messages' => self::MESSAGES, 'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files', 'module' => 'pdfhandler.messages', ]; } public function useSplitMetadata() { return true; } }