#!/bin/bash

#setup environment

PREFIX=`mktemp -t -d watchocr.XXXXXXXXXX`
trap 'rm -rf $PREFIX' EXIT

#list files only breaking on new lines (not spaces in file names)
IFS=$'\n'

#initialize variables
infolder=
outfoler=
preserve=
webstatus=
deskew=
res="300"
renamebyzxing="false"
maxProcesses=4

#display usage statement
function displayusage {
	echo "Usage: $0 -i input-directory -o output-directory"
        echo "Required"
        echo "     -i input directory"
        echo "     -o output directory"
        echo "Options"
        echo "     -m web status monitoring"
        echo "     -p preserve originals-directory"
        echo "     -d deskew and despeckel (much slower)"
	echo "     -r output resolution"
	echo "     -z enable barcode renaming"
	exit 2
}

#update web status
function updatestatus {
if [ "$1" = "-m" ]; then
	cp /tmp/watchocr.log /var/www/status/watchocr.log
fi
}

#check to make sure old files are not overwriten
function dontoverwrite {
    basenm=$(basename $1)
	if [ -e $2$basenm ]; then
        dtstamp=$(date +%s)
        	mv $1 $2$dtstamp.$basenm
        	echo Moved file to $2$dtstamp.$basenm "<br>" >> /tmp/watchocr.log
        else
                mv $1 $2$basenm
                echo Moved file to $2$basenm "<br>" >> /tmp/watchocr.log
        fi
}

# Verify command line options and values
# Display an error message if the input is not correct
while getopts i:o:p:r:mdz flag
do      case "$flag" in
        i)      infolder="$OPTARG";;
        o)      outfolder="$OPTARG";;
        p)      preserve="$OPTARG";;
        m)      webstatus="-m";;
	d)	deskew="-d";;
	z)      renamebyzxing="-z";;
        r)      res="$OPTARG";;
        ?)      displayusage;;
        esac
done

if [[ -z "$infolder" || -z "$outfolder" ]]; then
        displayusage
fi

#initalize log files
echo "" > /tmp/watchocr.log

updatestatus $webstatus

#watched folder loop
while [ 1 ]
do


#look for new files and process them
#process and convert .tif files
for file in $(find $infolder -maxdepth 1 -iname "*.tif" )
        do
                if [ $(lsof -t $file | wc -l) -ge 1 ];then
			echo "File $file still being written, skipping"
		else
			echo Processing $file ... "<br>" >> /tmp/watchocr.log 
                	updatestatus $webstatus
			basenm=$(basename $file .tif)
			tiff2pdf $file -o$PREFIX/$basenm.pdf
			if [ -n "$preserve" ]; then
				dontoverwrite $file $preserve
			else
				rm -rf $file
			fi
		fi
        done

#process and convert .pdf files
for file in $(find $infolder -maxdepth 1 -iname "*.pdf" )
        do
		if [ $(lsof -t $file | wc -l) -ge 1 ];then
                        echo "File $file still being written, skipping"
                else
			echo Processing $file ... "<br>" >> /tmp/watchocr.log 
			updatestatus $webstatus
			cp $file $PREFIX
			if [ -n "$preserve" ]; then
				dontoverwrite $file $preserve
			else
				rm -rf $file
			fi
		fi
	done

#Process each pdf indepentantly
for file in $(find $PREFIX -maxdepth 1 -iname "*.pdf" )
        do
		while [ 1 ]
			do
				numLines=$(jobs | wc -l)
				if [ "$numLines" -lt "$maxProcesses" ]; then
					break
				fi
				sleep 1
			done

		basenm=$(basename $file)
		cd $outfolder
		img2pdf -i $file -o $outfolder$basenm $webstatus $deskew -r $res $renamebyzxing &

 		sleep 5
		rm -f $file
        done

sleep 5

done
