ocropus as soon as possible
authorDaniel Axtens <dja@ucc.gu.uwa.edu.au>
Mon, 1 Aug 2011 10:39:06 +0000 (18:39 +0800)
committerDaniel Axtens <dja@ucc.gu.uwa.edu.au>
Mon, 1 Aug 2011 10:39:06 +0000 (18:39 +0800)
scan2pages.sh

index b2bad10..42b3bfb 100755 (executable)
@@ -125,7 +125,19 @@ for scanpgnum in `$my_seq 1 $pages`; do
                        $scanpg || exit 1
        fi;
        
-       # preprocess it!
+done;
+
+# do ocr binarise
+[ -e $outdir/scanpgs ] && rm -r $outdir/scanpgs
+ocropus book2pages $outdir/scanpgs $outdir/scanpg-*.png || exit 1
+
+for scanpgnum in  `$my_seq 1 $pages`; do
+
+       scanpgnum=$(printf '%03d' $scanpgnum)
+       scanpg=$outdir/scanpg-${scanpgnum}.png
+       binscanpg=$outdir/scanpgs/$(printf '%04d' $scanpgnum).bin.png
+
+       # preprocess scanned page
        cleanscanpg=$outdir/scanpg-clean-${scanpgnum}.pnm
        if [ ! -e $cleanscanpg ]; then
                # create mask: 
@@ -137,9 +149,7 @@ for scanpgnum in `$my_seq 1 $pages`; do
                cropcords=$(convert -border 1x1 -bordercolor '#000' -resize 1000% -trim -fuzz 90% -format "%wx%h%O" $outdir/scanpg-mask-${scanpgnum}.png info: || exit 1)
                
                # ... crop and despeckle? the final pre-prepared image
-               convert $convertflags -crop $cropcords $scanpg $cleanscanpg || exit 1
-       elif [[ $skipmask ]]; then
-               cp $origfile $preppnm
+               convert $convertflags -crop $cropcords $binscanpg  $cleanscanpg || exit 1
        fi;
        
        # check it hasn't mostly disappeared - e.g. if the scan was all black

UCC git Repository :: git.ucc.asn.au