Friday, May 18, 2012

[Bash] convert agu citation to bibtex format

#!/bin/bash
# convert citation string from agu website to a bibtex format
# usage:
#           agu2bib.sh 'citeString'
#   note: 
#          1: need single quotations
#          2: the whole command should be in one line
#         www.scriptdemo.blogspot.ca
getLastChar()
{
  echo -n $* | tail -c1
}

fixVolNum()
{
  # get number from volume-string
  volstr=$1
  if [ `getLastChar ${volstr}` == ")" ]; then
     numStr=`echo ${volstr} | awk -F\( '{print $2}' | rev | cut -c2- | rev`
     volstr=`echo ${numStr} | sed -e "s/\(${numStr}\)//"`
     echo "${volstr} ${numStr}" && exit
  else
     nf=`echo $volstr | awk -F\: '{print NF}'`
     if [ ${nf} -eq 2 ]; then
        echo ${volstr} | awk -F\: '{print $1" "$2}' && exit
     else
        echo "${volstr}" && exit
     fi
  fi
}
fixPageNum()
{
   echo $1 | sed -e 's/–/--/g' | sed -e 's/----/--/'
}

revName()
{
   nf=`echo $* | awk '{print NF}'`
   if [ $nf -ge 2 ]; then
      lastName=`echo $* | rev | awk '{print $1}' | rev`
      restName=`echo $* | sed -e "s/${lastName}//" | sed -e 's/\.\ /\./g' | sed -e 's/\ $//'`
      echo "${lastName}, ${restName}"
   else
      echo $*
   fi
}
getAuthor()
{
    nf=`echo $* | awk --field-separator=" and " '{print NF}'`
    if [ ${nf} -eq 1 ]; then
       # one author
       # Myers, P. G. ==> Myers, P.G.
       echo $* | sed -e 's/\.\ /\./g'
       exit
    else
       # more than one author
       lastOne=`echo $* | awk --field-separator=" and " '{print $2}'`
       lastOne=`revName ${lastOne}`
       # how to get first one
       str0=`echo $* | awk --field-separator=" and " '{print $1}' | sed -e 's/\,$//'`
       nf=`echo ${str0} | awk -F\, '{print NF}'`
       if [ ${nf} -eq 1 ]; then
          echo "${str0} and ${lastOne}"
          exit
       fi
       fl2=`echo ${str0} | awk -F\, '{print $2}'`
       indNext=2
       if [ `getLastChar ${fl2}` == "." ]; then
          # take first two fields as the first author
          fl1=`echo ${str0} | awk -F\, '{print $1}'`
          fl2=`echo ${fl2} | sed -e 's/\.\ /\./g' | sed -e 's/\ $//'`
          firstOne="${fl1}, ${fl2}"
          indNext=3
       fi
       outStr=${firstOne}
       for (( n=${indNext}; n<=${nf}; n++))
       do
           cfl=`echo ${str0} | cut -d\, -f ${n}`
           cnameStr=`revName ${cfl}`
           outStr="${outStr} and ${cnameStr}"
       done
       echo "${outStr} and ${lastOne}" && exit
    fi
}

[ $# -eq 0 ] && sed -n '2,7p' `which agu2bib.sh` && exit
isDebug=0
nLen=4
inStr=$*
nTLen=${#inStr}
nLoop=`expr ${nTLen} - ${nLen} - 1`
isFound=0
jTag=""

for (( nL=1; nL<=${nLoop}; nL++ ))
do
    subStr=${inStr:${nL}:${nLen}} # index starts from ZERO
    if [[ ${subStr} =~ "[1-2][0-9][0-9][0-9]" ]]; then
       [ ${isDebug} -eq 1 ] && set -x
       nL0=${nL}
       tmpChar=`echo ${inStr} | cut -c${nL0}`
       while [ "${tmpChar}" != ' ' ]
       do
             nL0=`expr ${nL0} - 1`
             tmpChar=`echo ${inStr} | cut -c${nL0}`
       done
       myStr0=`echo ${inStr} | cut -c-${nL0} | sed -e 's/\ $//g' -e 's/\,$//g'`

       nL1=`expr ${nL} + ${nLen} + 1`
       tmpChar=`echo ${inStr} | cut -c${nL1}`
       [ "${tmpChar}" == ":" ] && jTag="ams"
       while [ "${tmpChar}" != ' ' ]
       do
             [ "${tmpChar}" == ":" ] && jTag="ams"
             nL1=`expr ${nL1} + 1`
             tmpChar=`echo ${inStr} | cut -c${nL1}`
       done
       myStr1=`echo ${inStr} | cut -c${nL1}-`
       yearStr=${subStr}
       isFound=1
       break
    fi
done
[ ${isDebug} -eq 1 ] && set +x

[ ${isFound} -eq 0 ] && echo "can find the publicsh time!" && exit
[ ${isDebug} -eq 1 ] && echo "second half: ${myStr1}"

# get doi
doi=`echo ${myStr1} | rev | awk -F, '{print $1}' | rev | sed -e 's/\ //g'`
if [[ ${doi} =~ "doi*" ]] || [[ ${doi} =~ "DOI*" ]]; then
   isDOI=1
   doi=`echo ${doi} | sed -e 's/\.$//'`
   pages=`echo ${myStr1} | rev | awk -F, '{print $2}' | rev | sed -e 's/\ //g'`
   vol=`echo ${myStr1} | rev | awk -F, '{print $3}' | rev | sed -e 's/\ //g'`
   if [ "${#jTag}" -eq 0 ]; then
      jour=`echo ${myStr1} | rev | awk -F, '{print $4}' | rev`
   fi
else
   isDOI=0
   pages=${doi} && unset doi
   pages=`echo ${pages} | sed -e 's/\.$//'`
   vol=`echo ${myStr1} | rev | awk -F, '{print $2}' | rev | sed -e 's/\ //g'`
   if [ "${#jTag}" -eq 0 ]; then
      jour=`echo ${myStr1} | rev | awk -F, '{print $3}' | rev`
   fi
fi
if [ ${#jTag} -eq 0 ]; then
   indOfJour=`awk -v a="${myStr1}" -v b="${jour}" 'BEGIN{print index(a,b)}'`
   indOfJour=`expr ${indOfJour} - 3`
   title=`echo ${myStr1} | cut -c-${indOfJour}`
else
   title=`echo ${myStr1} | awk -F\. '{print $1}'`
   nL1=`expr ${#title} + 2`
   myStr1=`echo ${myStr1} | cut -c${nL1}-`
   jour=`echo ${myStr1} | awk -F\, '{print $1}'`
   nf=`echo ${myStr0} | awk -F\, '{print NF}'`
   if [ ${nf} -gt 2 ]; then
      myStr0=`echo ${myStr0} | rev | sed -e 's/\,/\ dna\ /' | rev`
   fi
fi

# get author string
[ ${isDebug} -eq 1 ] && echo "first half: ${myStr0}"
author=`getAuthor "${myStr0}"`
pages=`fixPageNum ${pages}`
volStr=`fixVolNum ${vol}`
vol=`echo ${volStr} | awk '{print $1}'`
num=`echo ${volStr} | awk '{print $2}'`

#generate the key
mykey=`echo ${author} | awk -F\, '{print $1}'`
mykey="${mykey}${yearStr}"
titleKey=`echo ${title} | awk '{print $1""$2}'`
mykey=`echo "${mykey}${titleKey}" | awk '{print tolower($0)}' | sed -e 's/\-//g'`

#output
echo "@article{${mykey},"
echo " author={${author}},"
echo " title = {{${title}}},"
echo " journal = {${jour}},"
echo " year = {${yearStr}},"
echo " volume = {${vol}},"
[ ${#num} -ne 0 ] && echo " number = { ${num}},"
if [ ${isDOI} -eq 1 ]; then
   echo " pages = {${pages}},"
   echo " doi = {${doi}}"
else
   echo " pages = {${pages}}"
fi
echo "}"

Example:

Output:

No comments:

ShowCalendar