From: Daniel Axtens Date: Fri, 8 Apr 2011 07:32:14 +0000 (+0800) Subject: Improved border artifact work; dealing with blank scans; 2up page detection X-Git-Url: https://git.ucc.asn.au/?p=dja%2Fscandal.git;a=commitdiff_plain;h=13a56d0f2178dd64270ae0b9d6e2994de42e9c07 Improved border artifact work; dealing with blank scans; 2up page detection --- diff --git a/detect2pages.sh b/detect2pages.sh new file mode 100755 index 0000000..4b8c189 --- /dev/null +++ b/detect2pages.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +#should probably have given up on shell and gone over to python. + + +infile=$1 +pg=$2 +subpg=$3 + +dir=$(dirname $infile) + +convert -blur 10 -gamma 0.00001 -median 1 $infile -resize 1x601\! -gamma 0.3 $dir/strip-${pg}-${subpg}.png + +#There seems to be a definte bias in my reader scans for things to be in the top part rather than the bottom part +testpoints=`jot - 270 320` + +sum=0 +prev=0 +longestrun=0 + + +for testpoint in $testpoints; do + #convert -verbose strip-${pg}-${subpg}.png[1x1+0+${testpoint}] -format "%[fx:s.intensity]" info: + value=$(convert $dir/strip-${pg}-${subpg}.png[1x1+0+${testpoint}] -format "%[fx:s.intensity>0.3]" info:) + sum=$(expr $value + $sum) + + if [[ $prev = 1 && $value = 0 ]]; then + if [[ $sum > $longestrun ]]; then + longestrun=$sum; + fi; + sum=0 + fi; + + prev=$value + + #echo "$testpoint: $value ($sum, $longestrun)" +done; + +if [[ $sum > $longestrun ]]; then + longestrun=$sum; +fi + +echo $longestrun + +exit $( expr $longestrun "<=" 10 ) diff --git a/scan2pages.sh b/scan2pages.sh index 321d31c..b129963 100755 --- a/scan2pages.sh +++ b/scan2pages.sh @@ -160,21 +160,46 @@ for pg in `$my_seq 1 $pages`; do cp $origpnm $preppnm fi; + # check it hasn't mostly disappeared - e.g. if the scan was all black + # (e.g. forgot to put the book down when you first hit scan) + # ... not sure why such a convoluted form is required. expr doesn't like + # processing a convert -format "%w * %h" or any variant thereof + if [[ $(convert $preppnm -format '%[fx:s.w*s.h>1000]' info:) = "0" ]]; then + [[ $verbose == 1 ]] && echo "Discarding pg ${pgn}: not enough remains after masking." + continue; + fi; + #unpaper it #names go a bit funny here #also, ignore flags starting here unppnm=$dir/upg-${pgn}-%01d.pnm unpaper $unpaperflags --layout double --overwrite --no-blackfilter -ni 10 -op 2 $preppnm $unppnm || exit 1 + #detect if the page is 2-up + for subpg in $($my_seq 1 2); do + echo Processing subpg ${subpg}. + if $(dirname $0)/detect2pages.sh $dir/upg-${pgn}-${subpg}.pnm ${pgn} ${subpg}; then + if [[ $verbose == 1 ]]; then + echo "Resplitting subpg ${subpg}." + fi + unpaper $unpaperflags --pre-rotate 90 --layout double --overwrite --no-blackfilter -op 2 $dir/upg-${pgn}-${subpg}.pnm $dir/upg-${pgn}-${subpg}-%01d.pnm + else + cp $dir/upg-${pgn}-${subpg}.pnm $dir/upg-${pgn}-${subpg}-1.pnm + fi; + + done; + #final convert and clean w/ bebook optimisation if [[ $bebook ]]; then - convert $convertflags -colorspace Gray -median 1 $dir/upg-${pgn}-?.pnm -resize 1200x1600 $dir/final-${pgn}-%01d.${extension} || exit 1 + convert $convertflags -colorspace Gray -median 1 $dir/upg-${pgn}-1-?.pnm -trim -resize 1800x2400 $dir/final-${pgn}-1-%01d.${extension} || exit 1 + convert $convertflags -colorspace Gray -median 1 $dir/upg-${pgn}-2-?.pnm -trim -resize 1800x2400 $dir/final-${pgn}-2-%01d.${extension} || exit 1 else - convert $convertflags $dir/upg-${pgn}-?.pnm $dir/final-${pgn}-%01d.${extension} || exit 1 + convert $convertflags $dir/upg-${pgn}-1-?.pnm $dir/final-${pgn}-1-%01d.${extension} || exit 1 + convert $convertflags $dir/upg-${pgn}-2-?.pnm $dir/final-${pgn}-2-%01d.${extension} || exit 1 fi done -mkdir $dir/pages +mkdir -p $dir/pages mv $dir/final-*.${extension} $dir/pages