mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/PdfHandler
synced 2024-11-24 08:14:37 +00:00
b253dc04c4
Combine all 3 shellouts into one script, retrieveMetaData.sh. The script is executed by /bin/sh by default, it can be changed for Windows users by setting $wgPdfHandlerShell. pdftotext is a bit special since it's behavior varies based on the program's exit code, so save that in a file so we can check it independently of the overall exit status. Bug: T289228 Change-Id: I29750bcc282bd5f9b8e2f79aa340869738ea5f5b
40 lines
860 B
Bash
40 lines
860 B
Bash
#!/bin/sh
|
|
|
|
# Get parameters from environment
|
|
|
|
export PDFHANDLER_INFO="${PDFHANDLER_INFO:-pdfinfo}"
|
|
export PDFHANDLER_TOTEXT="${PDFHANDLER_TOTEXT:-pdftotext}"
|
|
|
|
runInfo() {
|
|
# Note in poppler 0.26 the -meta and page data options worked together,
|
|
# but as of poppler 0.48 they must be queried separately.
|
|
# https://bugs.freedesktop.org/show_bug.cgi?id=96801
|
|
# Report metadata as UTF-8 text...and report XMP metadata
|
|
"$PDFHANDLER_INFO" \
|
|
-enc 'UTF-8' \
|
|
-meta \
|
|
file.pdf > meta
|
|
|
|
# Report metadata as UTF-8 text...and report page sizes for all pages
|
|
"$PDFHANDLER_INFO" \
|
|
-enc 'UTF-8' \
|
|
-l 9999999 \
|
|
file.pdf > pages
|
|
|
|
}
|
|
|
|
runToText() {
|
|
"$PDFHANDLER_TOTEXT" \
|
|
file.pdf - > text
|
|
# Store exit code so we can use it later
|
|
echo $? > text_exit_code
|
|
}
|
|
|
|
if [ -x "$PDFHANDLER_INFO" ]; then
|
|
runInfo
|
|
fi
|
|
|
|
if [ -x "$PDFHANDLER_TOTEXT" ]; then
|
|
runToText
|
|
fi
|