initial
authorDaniel Axtens <[email protected]>
Mon, 28 Mar 2011 05:43:10 +0000 (13:43 +0800)
committerDaniel Axtens <[email protected]>
Mon, 28 Mar 2011 05:43:10 +0000 (13:43 +0800)
scan2pages.sh [new file with mode: 0755]

diff --git a/scan2pages.sh b/scan2pages.sh
new file mode 100755 (executable)
index 0000000..12f5670
--- /dev/null
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+UNPAPER_PATH="/Users/dja/Applications/unpaper-0.3/"
+PATH=$UNPAPER_PATH:$PATH
+
+function usage {
+cat >&2 << __EOF__ 
+Usage: $0 [-vfpsmut] [-d depth] pdffile outdir
+Convert pdffile - a pdf of scanned facing pages - to a set of images in outdir,
+each with only one page.
+OPTIONS:
+       -v: Be verbose.
+       -d depth: Use given depth. Default is 1. Unpaper can only handle up to 8.
+       -s: skip masking/trimming. Overrides -m.
+       -f: Forceably redo everything.
+       -p: Forceably redo pdf conversion. Implies options below, equivalent to -f.
+       -m: Forceably redo masking/trimming and other preprocessing. Impiles options below.
+       -u: Forceably redo unpaper processing. Implies option below. IGNORED
+       -t: Forceably redo final trimming and cleaning. IGNORED
+__EOF__
+}
+
+
+# process for -f flag to forceably redo all conversions
+forcepdf=
+forcemask=
+forceunpaper=
+forceclean=
+convertflags=
+unpaperflags=
+depthflags="-depth 1"
+skipmask=
+while getopts 'vd:sfpmuc' OPTION
+do
+       case $OPTION in
+       v)  convertflags="$convertflags -verbose"
+               unpaperflags="$unpaperflags -v --time"
+               ;;
+       d)      depthflags="-depth $OPTARG"
+               ;;
+       s)      skipmask=1;
+               ;;
+       f)      forcepdf=1; forcemask=1; forceunpaper=1; forceclean=1
+               ;;
+       p)      forcepdf=1; forcemask=1; forceunpaper=1; forceclean=1
+               ;;
+       m)      forcemask=1; forceunpaper=1; forceclean=1
+               ;;
+       u)      forceunpaper=1; forceclean=1
+               ;;
+       t)      forceclean=1
+               ;;
+#      b)      bflag=1
+#              bval="$OPTARG"
+#              ;;
+       ?)      usage
+               exit 2
+               ;;
+       esac
+done
+shift $(($OPTIND - 1))
+
+# check we have an input and output!
+if [[ $# != 2 ]]; then
+       echo "Wrong number of parameters (2 required, $# given: [$@])" >&2
+       usage
+       exit 2
+fi
+
+file=$1
+dir=$2
+
+filedir=$(dirname $1)
+base=$(basename $1 .pdf)
+
+# make the output dir
+mkdir -p $dir
+
+# figure out the number of pages
+dscname=$dir/${base}.dsc
+pdf2dsc $file $dscname || exit 1
+pages=$(awk '$1 ~ "%%Pages" {print $2}' $dscname)
+echo "Got $pages page(s)."
+rm $dscname
+
+# process pages 1 by 1 to avoid convert gobbling all the memory
+for pg in `jot - 1 $pages`; do
+       echo "Processing page $pg."
+       
+       pgn=$(printf '%03d' $pg)
+       
+       # convert from pdf
+       origpnm=$dir/pg-${pgn}.pnm
+       if [[ ! $([ -e $origpnm ]) || $forcepdf ]]; then
+               convert $convertflags $depthflags -density 300 $file[$(expr $pg - 1)] \
+                       $origpnm || exit 1
+       fi;
+       
+       # preprocess it!
+       preppnm=$dir/pg-pp-${pgn}.pnm
+       if [[ ! $skipmask && ( ! $([ -e $preppnm ]) || $forcemask ) ]]; then
+               # create mask: 
+               # ... downscale, blur,
+               convert $convertflags -resize 25% -depth 8 -blur 10 $origpnm $dir/pg-mask-${pgn}.pnm ||exit 1
+
+               # ... get crop co-ords
+               cropcords=$(convert -resize 400% -trim -fuzz 90% -format "%wx%h%O" $dir/pg-mask-${pgn}.pnm info: || exit 1)
+               
+               # ... crop and despeckle? the final pre-prepared image
+               convert $convertflags -crop $cropcords $origpnm $preppnm || exit 1
+       elif [[ $skipmask ]]; then
+               cp $origpnm $preppnm
+       fi;
+       
+       #unpaper it
+       #names go a bit funny here
+       #also, ignore flags starting here
+       unppnm=$dir/upg-${pgn}-%01d.pnm
+       unpaper $unpaperflags --layout double --overwrite --no-blackfilter -ni 10 -op 2 $preppnm $unppnm || exit 1
+
+       # final convert and clean
+       convert $convertflags $dir/upg-${pgn}-?.pnm $dir/upg-${pgn}-%01d.tiff || exit 1
+       
+       
+done
+
+mkdir $dir/pages
+mv $dir/upg-*.tiff $dir/pages
+       

UCC git Repository :: git.ucc.asn.au