From: Daniel Axtens Date: Mon, 1 Aug 2011 10:39:06 +0000 (+0800) Subject: ocropus as soon as possible X-Git-Url: https://git.ucc.asn.au/?a=commitdiff_plain;h=aeb663e7da50cdad5b60dcb5d7a1f78fc83f36b4;p=dja%2Fscandal.git ocropus as soon as possible --- diff --git a/scan2pages.sh b/scan2pages.sh index b2bad10..42b3bfb 100755 --- a/scan2pages.sh +++ b/scan2pages.sh @@ -125,7 +125,19 @@ for scanpgnum in `$my_seq 1 $pages`; do $scanpg || exit 1 fi; - # preprocess it! +done; + +# do ocr binarise +[ -e $outdir/scanpgs ] && rm -r $outdir/scanpgs +ocropus book2pages $outdir/scanpgs $outdir/scanpg-*.png || exit 1 + +for scanpgnum in `$my_seq 1 $pages`; do + + scanpgnum=$(printf '%03d' $scanpgnum) + scanpg=$outdir/scanpg-${scanpgnum}.png + binscanpg=$outdir/scanpgs/$(printf '%04d' $scanpgnum).bin.png + + # preprocess scanned page cleanscanpg=$outdir/scanpg-clean-${scanpgnum}.pnm if [ ! -e $cleanscanpg ]; then # create mask: @@ -137,9 +149,7 @@ for scanpgnum in `$my_seq 1 $pages`; do cropcords=$(convert -border 1x1 -bordercolor '#000' -resize 1000% -trim -fuzz 90% -format "%wx%h%O" $outdir/scanpg-mask-${scanpgnum}.png info: || exit 1) # ... crop and despeckle? the final pre-prepared image - convert $convertflags -crop $cropcords $scanpg $cleanscanpg || exit 1 - elif [[ $skipmask ]]; then - cp $origfile $preppnm + convert $convertflags -crop $cropcords $binscanpg $cleanscanpg || exit 1 fi; # check it hasn't mostly disappeared - e.g. if the scan was all black