mediawiki-extensions-PdfHan.../scripts/retrieveMetaData.sh
Kunal Mehta b253dc04c4 Port retrieveMetaData to BoxedCommand
Combine all 3 shellouts into one script, retrieveMetaData.sh.

The script is executed by /bin/sh by default, it can be changed for
Windows users by setting $wgPdfHandlerShell.

pdftotext is a bit special since it's behavior varies based on the
program's exit code, so save that in a file so we can check it
independently of the overall exit status.

Bug: T289228
Change-Id: I29750bcc282bd5f9b8e2f79aa340869738ea5f5b
2021-09-20 10:28:27 -07:00

40 lines
860 B
Bash

#!/bin/sh
# Get parameters from environment
export PDFHANDLER_INFO="${PDFHANDLER_INFO:-pdfinfo}"
export PDFHANDLER_TOTEXT="${PDFHANDLER_TOTEXT:-pdftotext}"
runInfo() {
# Note in poppler 0.26 the -meta and page data options worked together,
# but as of poppler 0.48 they must be queried separately.
# https://bugs.freedesktop.org/show_bug.cgi?id=96801
# Report metadata as UTF-8 text...and report XMP metadata
"$PDFHANDLER_INFO" \
-enc 'UTF-8' \
-meta \
file.pdf > meta
# Report metadata as UTF-8 text...and report page sizes for all pages
"$PDFHANDLER_INFO" \
-enc 'UTF-8' \
-l 9999999 \
file.pdf > pages
}
runToText() {
"$PDFHANDLER_TOTEXT" \
file.pdf - > text
# Store exit code so we can use it later
echo $? > text_exit_code
}
if [ -x "$PDFHANDLER_INFO" ]; then
runInfo
fi
if [ -x "$PDFHANDLER_TOTEXT" ]; then
runToText
fi