From: Daniel Axtens Date: Mon, 28 Mar 2011 05:43:10 +0000 (+0800) Subject: initial X-Git-Url: https://git.ucc.asn.au/?p=dja%2Fscandal.git;a=commitdiff_plain;h=9c757d3063751e5f907c86241edfc1efa59bc6f4 initial --- 9c757d3063751e5f907c86241edfc1efa59bc6f4 diff --git a/scan2pages.sh b/scan2pages.sh new file mode 100755 index 0000000..12f5670 --- /dev/null +++ b/scan2pages.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +UNPAPER_PATH="/Users/dja/Applications/unpaper-0.3/" +PATH=$UNPAPER_PATH:$PATH + +function usage { +cat >&2 << __EOF__ +Usage: $0 [-vfpsmut] [-d depth] pdffile outdir +Convert pdffile - a pdf of scanned facing pages - to a set of images in outdir, +each with only one page. +OPTIONS: + -v: Be verbose. + -d depth: Use given depth. Default is 1. Unpaper can only handle up to 8. + -s: skip masking/trimming. Overrides -m. + -f: Forceably redo everything. + -p: Forceably redo pdf conversion. Implies options below, equivalent to -f. + -m: Forceably redo masking/trimming and other preprocessing. Impiles options below. + -u: Forceably redo unpaper processing. Implies option below. IGNORED + -t: Forceably redo final trimming and cleaning. IGNORED +__EOF__ +} + + +# process for -f flag to forceably redo all conversions +forcepdf= +forcemask= +forceunpaper= +forceclean= +convertflags= +unpaperflags= +depthflags="-depth 1" +skipmask= +while getopts 'vd:sfpmuc' OPTION +do + case $OPTION in + v) convertflags="$convertflags -verbose" + unpaperflags="$unpaperflags -v --time" + ;; + d) depthflags="-depth $OPTARG" + ;; + s) skipmask=1; + ;; + f) forcepdf=1; forcemask=1; forceunpaper=1; forceclean=1 + ;; + p) forcepdf=1; forcemask=1; forceunpaper=1; forceclean=1 + ;; + m) forcemask=1; forceunpaper=1; forceclean=1 + ;; + u) forceunpaper=1; forceclean=1 + ;; + t) forceclean=1 + ;; +# b) bflag=1 +# bval="$OPTARG" +# ;; + ?) usage + exit 2 + ;; + esac +done +shift $(($OPTIND - 1)) + +# check we have an input and output! +if [[ $# != 2 ]]; then + echo "Wrong number of parameters (2 required, $# given: [$@])" >&2 + usage + exit 2 +fi + +file=$1 +dir=$2 + +filedir=$(dirname $1) +base=$(basename $1 .pdf) + +# make the output dir +mkdir -p $dir + +# figure out the number of pages +dscname=$dir/${base}.dsc +pdf2dsc $file $dscname || exit 1 +pages=$(awk '$1 ~ "%%Pages" {print $2}' $dscname) +echo "Got $pages page(s)." +rm $dscname + +# process pages 1 by 1 to avoid convert gobbling all the memory +for pg in `jot - 1 $pages`; do + echo "Processing page $pg." + + pgn=$(printf '%03d' $pg) + + # convert from pdf + origpnm=$dir/pg-${pgn}.pnm + if [[ ! $([ -e $origpnm ]) || $forcepdf ]]; then + convert $convertflags $depthflags -density 300 $file[$(expr $pg - 1)] \ + $origpnm || exit 1 + fi; + + # preprocess it! + preppnm=$dir/pg-pp-${pgn}.pnm + if [[ ! $skipmask && ( ! $([ -e $preppnm ]) || $forcemask ) ]]; then + # create mask: + # ... downscale, blur, + convert $convertflags -resize 25% -depth 8 -blur 10 $origpnm $dir/pg-mask-${pgn}.pnm ||exit 1 + + # ... get crop co-ords + cropcords=$(convert -resize 400% -trim -fuzz 90% -format "%wx%h%O" $dir/pg-mask-${pgn}.pnm info: || exit 1) + + # ... crop and despeckle? the final pre-prepared image + convert $convertflags -crop $cropcords $origpnm $preppnm || exit 1 + elif [[ $skipmask ]]; then + cp $origpnm $preppnm + fi; + + #unpaper it + #names go a bit funny here + #also, ignore flags starting here + unppnm=$dir/upg-${pgn}-%01d.pnm + unpaper $unpaperflags --layout double --overwrite --no-blackfilter -ni 10 -op 2 $preppnm $unppnm || exit 1 + + # final convert and clean + convert $convertflags $dir/upg-${pgn}-?.pnm $dir/upg-${pgn}-%01d.tiff || exit 1 + + +done + +mkdir $dir/pages +mv $dir/upg-*.tiff $dir/pages +