#!/bin/bash
#setup environment
PREFIX=`mktemp -t -d watchocr.XXXXXXXXXX`
trap 'rm -rf $PREFIX' EXIT
#list files only breaking on new lines (not spaces in file names)
IFS=$'\n'
#initialize variables
infolder=
outfoler=
preserve=
webstatus=
deskew=
res="300"
renamebyzxing="false"
maxProcesses=4
#display usage statement
function displayusage {
echo "Usage: $0 -i input-directory -o output-directory"
echo "Required"
echo " -i input directory"
echo " -o output directory"
echo "Options"
echo " -m web status monitoring"
echo " -p preserve originals-directory"
echo " -d deskew and despeckel (much slower)"
echo " -r output resolution"
echo " -z enable barcode renaming"
exit 2
}
#update web status
function updatestatus {
if [ "$1" = "-m" ]; then
cp /tmp/watchocr.log /var/www/status/watchocr.log
fi
}
#check to make sure old files are not overwriten
function dontoverwrite {
basenm=$(basename $1)
if [ -e $2$basenm ]; then
dtstamp=$(date +%s)
mv $1 $2$dtstamp.$basenm
echo Moved file to $2$dtstamp.$basenm "
" >> /tmp/watchocr.log
else
mv $1 $2$basenm
echo Moved file to $2$basenm "
" >> /tmp/watchocr.log
fi
}
# Verify command line options and values
# Display an error message if the input is not correct
while getopts i:o:p:r:mdz flag
do case "$flag" in
i) infolder="$OPTARG";;
o) outfolder="$OPTARG";;
p) preserve="$OPTARG";;
m) webstatus="-m";;
d) deskew="-d";;
z) renamebyzxing="-z";;
r) res="$OPTARG";;
?) displayusage;;
esac
done
if [[ -z "$infolder" || -z "$outfolder" ]]; then
displayusage
fi
#initalize log files
echo "" > /tmp/watchocr.log
updatestatus $webstatus
#watched folder loop
while [ 1 ]
do
#look for new files and process them
#process and convert .tif files
for file in $(find $infolder -maxdepth 1 -iname "*.tif" )
do
if [ $(lsof -t $file | wc -l) -ge 1 ];then
echo "File $file still being written, skipping"
else
echo Processing $file ... "
" >> /tmp/watchocr.log
updatestatus $webstatus
basenm=$(basename $file .tif)
tiff2pdf $file -o$PREFIX/$basenm.pdf
if [ -n "$preserve" ]; then
dontoverwrite $file $preserve
else
rm -rf $file
fi
fi
done
#process and convert .pdf files
for file in $(find $infolder -maxdepth 1 -iname "*.pdf" )
do
if [ $(lsof -t $file | wc -l) -ge 1 ];then
echo "File $file still being written, skipping"
else
echo Processing $file ... "
" >> /tmp/watchocr.log
updatestatus $webstatus
cp $file $PREFIX
if [ -n "$preserve" ]; then
dontoverwrite $file $preserve
else
rm -rf $file
fi
fi
done
#Process each pdf indepentantly
for file in $(find $PREFIX -maxdepth 1 -iname "*.pdf" )
do
while [ 1 ]
do
numLines=$(jobs | wc -l)
if [ "$numLines" -lt "$maxProcesses" ]; then
break
fi
sleep 1
done
basenm=$(basename $file)
cd $outfolder
img2pdf -i $file -o $outfolder$basenm $webstatus $deskew -r $res $renamebyzxing &
sleep 5
rm -f $file
done
sleep 5
done