done;
# do ocr binarise
-[ -e $outdir/scanpgs ] && rm -r $outdir/scanpgs
-ocropus book2pages $outdir/scanpgs $outdir/scanpg-*.png || exit 1
+if [ -e $outdir/scanpgs ]; then
+ #assume (for the sake of speed in repeated runs) that
+ # if the last file exists, the process completed successfully
+ if [ -e $outdir/scanpgs/$(printf '%04d' $pages).bin.png ]; then
+ [[ $verbose ]] && echo Binarisation already complete
+ else
+ rm -r $outdir/scanpgs
+ fi
+fi
+[ -e $outdir/scanpgs ] || ocropus book2pages $outdir/scanpgs $outdir/scanpg-*.png || exit 1
+# process each binarised scan page
for scanpgnum in `$my_seq 1 $pages`; do
binscanpg=$outdir/scanpgs/$(printf '%04d' $scanpgnum).bin.png
#unpaper it
physpgbase=$outdir/physpg-${scanpgnum}
if [ ! -e ${physpgbase}-1.pnm ] || [ ! -e ${physpgbase}-2.pnm ]; then
- unpaper $unpaperflags --layout double --overwrite -ni 10 -op 2 $cleanscanpg ${physpgbase}-%01d.pnm || exit 1
+ # most of unpaper's processing is redundant given ocropus, and somewhat too agressive
+ unpaper $unpaperflags --layout double --overwrite -op 2 \
+ --no-processing \
+ $cleanscanpg ${physpgbase}-%01d.pnm || exit 1
+
fi;
for physpgnum in $($my_seq 1 2); do