Improve logging for Pdf's retrieveMetadata.sh

- Don't check for file execution, but for command execution. This way
  pdfinfo and pdftext work without specifying the path
- Only pipe the stdout content of the commands to the outputfiles
- Exit as failure when the pdfinfo command is available, but it's
  execution failed
- Check and log the error output of retrieveMetadata.sh

Bug: T299521
Change-Id: Ia072469f4df6cce51793ab48823c7f4e4e13997b
This commit is contained in:
Derk-Jan Hartman 2024-03-16 00:45:28 +01:00 committed by TheDJ
parent c4c9bb4337
commit f87fc5a6ad
2 changed files with 20 additions and 4 deletions

View file

@ -132,6 +132,13 @@ class PdfImage {
MediaWikiServices::getInstance()->getStatsdDataFactory()
->increment( 'pdfhandler.shell.retrieve_meta_data' );
// Metadata retrieval is allowed to fail, but we'd like to know why
if ( $result->getExitCode() != 0 ) {
wfDebug( __METHOD__ . ': retrieveMetaData.sh' .
"\n\nExitcode: " . $result->getExitCode() . "\n\n"
. $result->getStderr() );
}
$resultMeta = $result->getFileContents( 'meta' );
$resultPages = $result->getFileContents( 'pages' );
if ( $resultMeta !== null || $resultPages !== null ) {

17
scripts/retrieveMetaData.sh Normal file → Executable file
View file

@ -13,14 +13,23 @@ runInfo() {
"$PDFHANDLER_INFO" \
-enc 'UTF-8' \
-meta \
file.pdf > meta
file.pdf 1> meta
# Check for errors and forward them
if [ $? -ne 0 ]; then
exit 1;
fi
# Report metadata as UTF-8 text...and report page sizes for all pages
"$PDFHANDLER_INFO" \
-enc 'UTF-8' \
-l 9999999 \
file.pdf > pages
file.pdf 1> pages
# Check for errors and forward them
if [ $? -ne 0 ]; then
exit 1;
fi
}
runToText() {
@ -34,10 +43,10 @@ runToText() {
echo $? > text_exit_code
}
if [ -x "$PDFHANDLER_INFO" ]; then
if [ -x "$(command -v $PDFHANDLER_INFO)" ]; then
runInfo
fi
if [ -x "$PDFHANDLER_TOTEXT" ]; then
if [ -x "$(command -v $PDFHANDLER_TOTEXT)" ]; then
runToText
fi