done;
# do ocr binarise
-[ -e $outdir/scanpgs ] && rm -r $outdir/scanpgs
-ocropus book2pages $outdir/scanpgs $outdir/scanpg-*.png || exit 1
+if [ -e $outdir/scanpgs ]; then
+ #assume (for the sake of speed in repeated runs) that
+ # if the last file exists, the process completed successfully
+ if [ -e $outdir/scanpgs/$(printf '%04d' $pages).bin.png ]; then
+ [[ $verbose ]] && echo Binarisation already complete
+ else
+ rm -r $outdir/scanpgs
+ fi
+fi
+[ -e $outdir/scanpgs ] || ocropus book2pages $outdir/scanpgs $outdir/scanpg-*.png || exit 1
+# process each binarised scan page
for scanpgnum in `$my_seq 1 $pages`; do
binscanpg=$outdir/scanpgs/$(printf '%04d' $scanpgnum).bin.png
# ... get crop co-ords. They're off by ~2 as I don't know how to
# properly correct for the border.
cropcords=$(convert -border 1x1 -bordercolor '#000' -resize 1000% -trim -fuzz 90% -format "%wx%h%O" $outdir/scanpg-mask-${scanpgnum}.png info: || exit 1)
-
+
# ... crop and despeckle? the final pre-prepared image
convert $convertflags -crop $cropcords $binscanpg $cleanscanpg || exit 1
fi;
convert $convertflags -resize 10% -depth 8 -gamma 0.01 -median 2 $physpg $outdir/physpg-mask-${scanpgnum}-${physpgnum}.png ||exit 1
# Trim #-border 1x1 -bordercolor '#fff' -trim -fuzz 30%
- cropcords=$(convert -trim -fuzz 90%\
+ # binarise is so effective, try something << 90%
+ cropcords=$(convert -trim -fuzz 50%\
-resize 1000% -format "%wx%h%O" $outdir/physpg-mask-${scanpgnum}-${physpgnum}.png info: || exit 1)
+ [[ $verbose ]] && echo Crop co-ords: $cropcords
+
# ... crop and despeckle? the final pre-prepared image
convert $convertflags -crop $cropcords $physpg $cleanphyspg || exit 1
fi;
-
+
+ # check it hasn't mostly disappeared, warn viciously if it has!
+ if [[ $(convert $cleanphyspg -format '%[fx:s.w*s.h>1000]' info:) = "0" ]]; then
+ echo "Warning: discarding physical pg ${scanpgnum}-${physpgnum}: not enough remains after masking."
+ continue;
+ fi;
+
#detect if the page is 2-up
if [[ $logperphys == 2 ]] || ( [[ $logperphys != 1 ]] && $(dirname $0)/detect2pages.sh ${cleanphyspg} ${scanpgnum} ${physpgnum} ); then