From fc5ca3c58cf344a16c745e13a4475993e215e202 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Tue, 4 Oct 2016 21:51:57 -0700 Subject: [PATCH] Add page dimension caching and avoid metadata tree loading use in doTransform() Bug: T147296 Change-Id: Ic27f0797317f3467305f953ca6b7ae729a566041 --- PdfHandler_body.php | 58 +++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/PdfHandler_body.php b/PdfHandler_body.php index 582dd9c..52d1ad8 100644 --- a/PdfHandler_body.php +++ b/PdfHandler_body.php @@ -146,16 +146,6 @@ class PdfHandler extends ImageHandler { function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality; - $metadata = $image->getMetadata(); - - if ( !$metadata ) { - return $this->doThumbError( - isset( $params['width'] ) ? $params['width'] : null, - isset( $params['height'] ) ? $params['height'] : null, - 'pdf_no_metadata' - ); - } - if ( !$this->normaliseParams( $image, $params ) ) { return new TransformParameterError( $params ); } @@ -221,11 +211,9 @@ class PdfHandler extends ImageHandler { ); $cmd .= ")"; - wfProfileIn( 'PdfHandler' ); wfDebug( __METHOD__ . ": $cmd\n" ); $retval = ''; $err = wfShellExecWithStderr( $cmd, $retval ); - wfProfileOut( 'PdfHandler' ); $removed = $this->removeBadFile( $dstPath, $retval ); @@ -336,7 +324,7 @@ class PdfHandler extends ImageHandler { /** * @param $image File * @param bool|IContextSource $context Context to use (optional) - * @return bool|int + * @return bool|array */ function formatMetadata( $image, $context = false ) { $meta = $image->getMetadata(); @@ -364,11 +352,9 @@ class PdfHandler extends ImageHandler { * @return bool|int */ function pageCount( File $image ) { - $data = $this->getMetaArray( $image ); - if ( !$data || !isset( $data['Pages'] ) ) { - return false; - } - return intval( $data['Pages'] ); + $info = $this->getDimensionInfo( $image ); + + return $info ? $info['pageCount'] : false; } /** @@ -377,8 +363,38 @@ class PdfHandler extends ImageHandler { * @return array|bool */ function getPageDimensions( File $image, $page ) { - $data = $this->getMetaArray( $image ); - return PdfImage::getPageSize( $data, $page ); + $index = $page; // MW starts pages at 1, as they are stored here + + $info = $this->getDimensionInfo( $image ); + if ( $info && isset( $info['dimensionsByPage'][$index] ) ) { + return $info['dimensionsByPage'][$index]; + } + + return false; + } + + protected function getDimensionInfo( File $file ) { + $cache = ObjectCache::getMainWANInstance(); + return $cache->getWithSetCallback( + $cache->makeKey( 'file-pdf', 'dimensions', $file->getSha1() ), + $cache::TTL_INDEFINITE, + function () use ( $file ) { + $data = $this->getMetaArray( $file ); + if ( !$data || !isset( $data['Pages'] ) ) { + return false; + } + unset( $data['text'] ); // lower peak RAM + + $dimsByPage = []; + $count = intval( $data['Pages'] ); + for ( $i = 1; $i <= $count; $i++ ) { + $dimsByPage[$i] = PdfImage::getPageSize( $data, $i ); + } + + return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ]; + }, + [ 'pcTTL' => $cache::TTL_INDEFINITE ] + ); } /** @@ -387,7 +403,7 @@ class PdfHandler extends ImageHandler { * @return bool */ function getPageText( File $image, $page ) { - $data = $this->getMetaArray( $image, true ); + $data = $this->getMetaArray( $image ); if ( !$data || !isset( $data['text'] ) || !isset( $data['text'][$page - 1] ) ) { return false; }