Sunday, March 18, 2012

[Bash] extract pages from a pdf file using gs

#!/bin/bash
# extract pages from a big pdf file using gs
# Usage:
#           pdfselect.sh pdffile startPageNumber endPageNumber
#           http://scriptdemo.blogspot.ca

function getPDFPages()
{
   if [ -e $1 ]; then
      pdfinfo $1 | grep "Pages" | awk '{print $2}'
   else
      echo 0;
   fi
}

if [ $# -lt 2 ]; then
   sed -n '3,4p' `which pdfselect.sh`
   exit
fi

oriPdf=$1
[ ! -e ${oriPdf} ] && echo "${oriPdf} does not exist! " && exit

numPage=`getPDFPages ${oriPdf}`
[ ${numPage} -eq 0 ] && "ZERO page is found in ${oriPdf}" && exit

sPage=$2
[ ${sPage} -gt ${numPage} ] && ${sPage}=${numPage}
if [ $# -eq 2 ]; then
   ePage=${sPage}
else
   ePage=$3;
fi
[ ${ePage} -gt ${numPage} ] && ${ePage}=${numPage}

if [ $# -lt 4 ]; then
   outPdf="p${sPage}_${ePage}_${oriPdf}"
else
   outPdf=$4
fi

eval "gs -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dSAFER -dFirstPage=${sPage} -dLastPage=${ePage} -sOutputFile=${outPdf} ${oriPdf}"

ShowCalendar