Update docs, various fixes.
[dja/scandal.git] / scan2pages.sh
index 42b3bfb..dbd66ee 100755 (executable)
@@ -133,9 +133,9 @@ ocropus book2pages $outdir/scanpgs $outdir/scanpg-*.png || exit 1
 
 for scanpgnum in  `$my_seq 1 $pages`; do
 
+       binscanpg=$outdir/scanpgs/$(printf '%04d' $scanpgnum).bin.png
        scanpgnum=$(printf '%03d' $scanpgnum)
        scanpg=$outdir/scanpg-${scanpgnum}.png
-       binscanpg=$outdir/scanpgs/$(printf '%04d' $scanpgnum).bin.png
 
        # preprocess scanned page
        cleanscanpg=$outdir/scanpg-clean-${scanpgnum}.pnm
@@ -197,18 +197,28 @@ for scanpgnum in  `$my_seq 1 $pages`; do
                        cp $cleanphyspg $outdir/logpg-${scanpgnum}-${physpgnum}-1.pnm
                fi;
 
-       
+               #prepare for ocr
+               convert $convertflags $outdir/logpg-${scanpgnum}-${physpgnum}-?.pnm $outdir/logpg-${scanpgnum}-${physpgnum}-%01d.png || exit 1
+
                #final convert and clean w/ bebook optimisation
                if [[ $bebook ]]; then
-                       convert $convertflags -colorspace Gray -median 1 $outdir/logpg-${scanpgnum}-${physpgnum}-?.pnm -trim -fuzz 80% -resize 1200x1600 $outdir/final-${scanpgnum}-${physpgnum}-%01d.${extension} || exit 1
+                       convert $convertflags $outdir/logpg-${scanpgnum}-${physpgnum}-?.pnm -trim -fuzz 80% -resize 1200x1600 $outdir/final-${scanpgnum}-${physpgnum}-%01d.${extension} || exit 1
                else
                        convert $convertflags $outdir/logpg-${scanpgnum}-${physpgnum}-?.pnm $outdir/final-${scanpgnum}-${physpgnum}-%01d.${extension} || exit 1
                fi
-       
+
        done;
 
 done
 
+#try full ocr
+rm -rf $outdir/logpgs
+ocropus book2pages $outdir/logpgs $outdir/logpg-*.png
+ocropus pages2lines $outdir/logpgs
+ocropus lines2fsts $outdir/logpgs
+ocropus fsts2bestpaths $outdir/logpgs
+ocropus buildhtml $outdir/logpgs > $outdir/out.html
+
 mkdir -p $outdir/pages
 mv $outdir/final-*.${extension} $outdir/pages
        

UCC git Repository :: git.ucc.asn.au