#!/bin/sh
#
#  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#    Written by: Roberto Tejero, CABM Summer 2007
#              (c) 2007, Roberto Tejero and Gaeteno T. Montelione.
#  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# REQUIREMENTS: This script expect to have a set of files on the working
#               directory for proper run, the files are named after the name
#               assigned to the protein. For example assume we have a protein
#               to be determined under the name WR33, the expected files are
#               
#               For X-PLOR
#		WR33.seqXplor          required -- opt
#	        WR33_noe.tbl           required -- opt
#	        WR33_hbond.tbl         required -- opt 
#	        WR33_dihe.tbl          required -- opt
#	        WR33_shiftsCACB.tbl    optional  * not yet supported
#	        WR33_Jhnha.tbl         optional  * not yet supported
#	        WR33_sani.tbl          optional  * not yet supported
#               
#			   Roberto Tejero, CABM-Rutgers Univ, May 2007.
#
##########################################################################################

# General things, need to edit to fit your system, the user is responsible to give the
# exact location/path for the different pieces.

# SubmitPBS    - PBS command to sumbit jobs into the queue system
# XplorCommand - xplor command for structure calculations/refinement
# CnsCommand   - cns command for structure calculations/refinement
# PDBStat      - pdbstat command for structure calculations
# CNSENV       - sets the CNS environment
# WATREFLIB    - sets PATH to input files and topologies through TOPOWAT (see below)

machine=`uname -n`

if [ "$machine" = "h31.cabm.rutgers.edu" -o "$machine" = "m30.cabm.rutgers.edu" ]     
then						# For hummer or master2 - CABM.
	SubmitPBS="/opt/torque/bin/qsub"
	XplorCommand="/farm/software/xplor-nih-2.14/bin/xplor"
	CnsCommand="/farm/software/cns_solve_1.2/intel-x86_64bit-linux/bin/cns"
	PDBStat="/farm/software/bin/pdbstatnew"
	CNSENV="/farm/software/cns_solve_1.2"
        WATREFLIB="/farm/software/WaterRefinement_cns"
elif [ "$machine" = "master.cabm.rutgers.edu" ]     
then						# For master or n00 - CABM.
	SubmitPBS="/usr/local/bin/qsub"
	XplorCommand="/farm/software/xplor-nih-2.14/bin/xplor"
	CnsCommand="/farm/software/cns_solve_1.2/intel-i686-linux_g77/bin/cns"
	PDBStat="/farm/software/bin/pdbstatnew"
	CNSENV="/farm/software/cns_solve_1.2"
        WATREFLIB="/farm/software/WaterRefinement_cns"
elif [ "$machine" = "europa" -o "$machine" = "europaw" ]    # mi sony laptop
then
	SubmitPBS="/opt/torque/bin/qsub"
	XplorCommand="/soft/xplor-nih-2.17.0/bin/xplor"
	CnsCommand="/soft/cns_solve_1.2/intel-x86_64bit-linux/bin/cns"
	PDBStat="/soft/bin/pdbstat"
	CNSENV="/soft/cns_solve_1.2"
        WATREFLIB="/soft/WaterRefinement_cns"
else                                            # Default (NMRlab)
	SubmitPBS="/opt/torque/bin/qsub"
	XplorCommand="/farm/software/xplor-nih-2.14/bin/xplor"
	CnsCommand="/farm/software/cns_solve_1.2/intel-i686-linux_g77/bin/cns"
	PDBStat="/farm/software/bin/pdbstat"
	CNSENV="/farm/software/cns_solve_1.2"
        WATREFLIB="/farm/software/WaterRefinement_cns"
fi

# path where *.inp is stored.
TemplateFile="$WATREFLIB/cns_refine_h2o.inp"

# If you want all output, info files removed at end, change following to "yes",
# otherwise make it "no". Usually it is a good idea to clean up a little as
# the output files can be big, but for checking/debuging purposes is better
# to have them at hand, ... I guess.
CleanUp="yes"

#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#          Below this line, __do not__ edit unless you know what you are doing.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Getting time at the beginning of the process
Comienzo=`date`
Template=`basename $TemplateFile .inp`

##########################################################################################
Usage(){
    	echo " "
    	echo " "
	echo "      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "
	echo "        WaterRefCN -- a tool to launch structure refinement with water"
	echo "      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "
        echo " "
	echo " Usage: $0 -na NamProt -que [PBS|No] -pro [xpl|cns] "
        echo " "
	echo " ** Full set of options [not all are needed for a perfect run, use defaults] "
        echo " "
	echo "   -na  NamProt      Name of the protein NOTE: this is mandatory, no default value  "
	echo "   -qu  [PBS|NO]     Que system to use, one of PBS or NOQUE                def NOQUE"
	echo "   -pr  [cns|xpl]    Protocol to use, X-POR, CNS                           def cns  "
	echo "   -av  [center|sum] average for the distance restraints (for CNS, XPLOR)  def sum  "
	echo "   -ci  n1,n2        CIS pep info (PRO res num) in comma separated list, i.e 56,89  "
	echo "   -ss  n1-n2,n3-n4  S-S bridge info in comma separated, dash-separated pairs, i.e. 2-24,30-40  "
	echo "   -he               Display this help message "
	echo "   -heat N           number of cycles in heating stage,  default is 200    "        
	echo "   -hot  N           number of cycles in HOT stage, default is 1000        "
	echo "   -cool N           number of cycles for cooling stage,   default is 100  " 
	echo "   -sc  N            Scale all terms (noe, dihe, hbond) by xN times        def 1    "
	echo "   -par string       Choice of nonbonded params, one of OPLSX|PARAM19|PARMALLH6|PROLSQ|CONTACT"
	echo "                     default is OPLSX " 
	echo "   -seed <int>       Seed for random number generator,  default 31415      "
	echo " "
	echo " Examples: "
	echo "      $0 -na SR358 -que no -pr cns -ci 22  "
	cat $WATREFLIB/Notes
}

##########################################################################################
# Get the args
while [ "$1" ];
do
	case "$1" in
		-[Aa][vV]* )
			shift
			AveNoe=$1 
			shift ;;
		-[cC][hH]* )
			shift
			Chain=$1
			shift ;;
		-[cC][Ii]* )
			shift
			RawCisPep=$1
			for ires in `echo  $1 | sed 's/,/ /g'`
			do
			   temp=`expr $ires - 1`
			   echo "         patch CISP                 "  >> FileCis.$$
			   echo "              reference=NIL=(resid $temp) "  >> FileCis.$$
			   echo "         end                          "  >> FileCis.$$
			done
			HaveCis="yes"
			shift ;;
                -[cC][oO][oO]*)
		        shift
		        CoolCycles=$1
		        shift ;;
		-[dD][Ee]* ) 
			Debug="yes" 
			shift ;;
                -[hH][eE][aA]*)
		        shift
		        HeatCycles=$1
		        shift ;;
		-[hH][eE]* )  
			Usage
			exit ;;
                -[hH][oO][tT]*)
		        shift
		        HotCycles=$1
		        shift ;;
		-[nN][aA]* )
			shift 
			Name=$1 
			shift ;;
		-[qQ][uU]* )
			shift
			BatchQ=$1 
			shift ;;
		-[pP][aA]* )
		        shift 
			Params=$1
			shift ;;
		-[pP][rR]* )
			shift
			case "$1" in
				[xX]* )
					Protocol="XPLOR" ;;
				[cC][yY]* )
					Protocol="CYANA" ;;
				[cC][nN]* )
					Protocol="CNS"   ;;
				*)
					echo " >> No protocol defined, applying def CNS "
					Protocol="CNS" ;;
			esac
			shift ;;
		-[sS][cC]* )
		        shift
			AppScale=$1
			shift;; 
		-[sS][eE]* )
			shift
			Semilla=$1
			shift;;
		-[sS][sS]* )
			HaveSSBridge="yes"
			shift
			RawSS=$1
			shift ;;
		-[rR][eE]* )
		        cat $WATREFLIB/BeSureToREADME
			exit ;;
		   * )
		   	echo "  "
			echo " *************************************************************************"
		        echo " ******* ERROR *** option/argument --> $1 <-- is _not_ recognized "
			echo " ******* EXITING *** "
			echo " *************************************************************************"
		   	echo "  Try again with $0 -help "
			echo "  "
			exit ;;
	esac
done

#
##########################################################################################
# Check variables -- assign default values in case of missing
if [ -z $Name ] 
then
    	echo " "
     	echo " ** Exiting **: No name given for the protein in the calcs. "
	Usage 
	exit 
fi

# Check values and apply default values if not provided
[ -z $BatchQ ]   && BatchQ="NOQ"
[ -z $AppScale ] && AppScale="1"
[ -z $Protocol ] && Protocol="CNS"
[ -z $AveNoe ]   && AveNoe="sum"
[ -z $Chain ]    && Chain="A"
[ -z $Debug ]    && Debug="no"
[ -z $Params ]   && Params="OPLSX"
[ -z $HotCycles ] && HotCycles=1000
[ -z $CoolCycles ] && CoolCycles=100
[ -z $HeatCycles ] && HeatCycles=200
[ -z $Semilla ] && Semilla=31415

#
##########################################################################################
# Basic Functions (in general) for use in the script, they will be called in
# time down in the script.

##########################################################################################
AgrupaModelos() {
    model=0 
    rm -f All_${Name}_sa 
    for Fichero in ${Name}_sa_* 
    do
    	model=`expr $model + 1`
 	awk  '{
        	  if ( /.*FILENAME*/) {
   	  	        printf ("MODEL          %3d\n",modeln);
       		   } else if (  /.*overall*/ ) {
  	     	        print $0 ;
      	 	   } else if (  /^REMARK/ ) {
	       	        print $0;
  	     	   } else if ( /^END/ ) {
 	      	        printf ("ENDMDL\n") ;
		   } else if ( / ANI / ) {           /* avoid RDC axes coords */
			;
 	      	   } else {
   		        print $0;
  	     	   }
  	      }' modeln=$model $Fichero >> All_${Name}_sa 
    done
    echo "END" >> All_${Name}_sa
}
	
##########################################################################################
batch_PBS_CNS() {
	echo "#!/bin/sh "                       	     > Smt${Name}_${Process}.sh
	echo "#           "                       	    >> Smt${Name}_${Process}.sh
	echo "#PBS -S /bin/sh "                      	    >> Smt${Name}_${Process}.sh
	echo "#PBS -N Refine_${Name}_${Process}"            >> Smt${Name}_${Process}.sh
	echo "#                       "                     >> Smt${Name}_${Process}.sh
	echo "# down here we launch the job        "        >> Smt${Name}_${Process}.sh
	echo 'cd  $PBS_O_WORKDIR'                           >> Smt${Name}_${Process}.sh
	echo "export TOPOWAT=$WATREFLIB"                    >> Smt${Name}_${Process}.sh
	echo  $CnsCommand \< $TARGET.inp \> $TARGET.out     >> Smt${Name}_${Process}.sh
    	echo "touch Done_with_${Name}_${Process}"           >> Smt${Name}_${Process}.sh
	echo "exit"                                         >> Smt${Name}_${Process}.sh
}

##########################################################################################
batch_NOQ_CNS() {
	echo "#!/bin/sh"                         	     > Smt${Name}_${Process}.sh
	echo "export TOPOWAT=$WATREFLIB"                    >> Smt${Name}_${Process}.sh
	echo  cd  `pwd`                                     >> Smt${Name}_${Process}.sh
	echo  $CnsCommand  \< $TARGET.inp \> $TARGET.out    >> Smt${Name}_${Process}.sh
    	echo "touch Done_with_${Name}_${Process}"           >> Smt${Name}_${Process}.sh
	echo "exit"                                         >> Smt${Name}_${Process}.sh
}

##########################################################################################
# either xplor-ssbridge or cns-ssbridge
case "$Protocol" in
   [cC][nN]* | [xX]* )
      if [ "$HaveSSBridge" = "yes" ]
      then
           # First create DISN, with _NO_ actual SG-SG bond
           echo "    noe"                                       >> SSNoes.$$
           for SSbr in `echo $RawSS | sed 's/,/ /g'`
           do
              echo "         patch disu "                     >> SSBridges.$$
              Ref=0
              for ResId in `echo $SSbr | sed 's/\-/ /g'`
              do
                  Ref=`expr $Ref + 1`
                  echo -n "           assign (resid $ResId and name sg) " >> SSNoes.$$
                  echo "              reference=$Ref=(resid $ResId)"      >> SSBridges.$$
              done
              # This new one added, RTT, Sep 2003
              echo "         end "                         >> SSBridges.$$
              echo "  2.02 0.1 0.1 "                       >> SSNoes.$$
           done
           # next one commented, RTT, Sep 2003
           #echo "    end "                                   >> SSBridges.$$
           echo "    end "                                    >> SSNoes.$$
      fi
      ;;                                                                                                 
  * )
      ;;
esac

##########################################################################################
PrepareCNS() {

        case "$Params" in
	     [oO][pP]* )
	           Params="OPLSX" ;;
	     [pP][rR]* )
	           Params="PROLSQ" ;;
	     [cC][oO]*)
	           Params="CONTACT" ;;
	     [pP][aA][rR][mM]* )
	           Params="PARMALLH7" ;;
	     [pP][aA][rR][aA]* )
	           Params="PARAM19" ;;
	     * )
	           Params="OPLSX";;
	esac  
	
 	# cis residue info
	if [ "$HaveCis" = "yes" ]
	then
	       echo "/evaluate (\$HaveCis/s/no/yes/"      > SedFile.$$
	       echo "/CISpep/r FileCis.$$"               >> SedFile.$$
	fi

	if [ "$HaveSSBridge" = "yes" ]
	then
	        echo "/evaluate (\$HaveDisu/s/no/yes/"   >> SedFile.$$
		echo "/SSBridge/r SSBridges.$$"          >> SedFile.$$
	fi

	# Edit through sed
	echo "s/PROTEIN/$Name/"                          >> SedFile.$$                          
	echo "s/BEGIN/$Begin/"                           >> SedFile.$$ 
	echo "s/END/$End/"                               >> SedFile.$$ 
	echo "s/SEMILLA/$Seed/"                          >> SedFile.$$ 
        echo "s/USCALE/$AppScale/"                       >> SedFile.$$
        echo "s/_PARAM_/$Params/"                        >> SedFile.$$
        echo "s/_HEATINGCYCLES_/$HeatCycles/"            >> SedFile.$$
        echo "s/_HOTCYCLE_/$HotCycles/"                  >> SedFile.$$
        echo "s/_COOLCYCLES_/$CoolCycles/"               >> SedFile.$$
	echo "/evaluate (\$MyDebug/s/DBG/$Debug/"        >> SedFile.$$
	echo "/name=\"CHAI\"/s/CHAI/$Chain   /"          >> SedFile.$$
	echo "/evaluate (\$Noeavg/s/unk/$AveNoe/"        >> SedFile.$$ 
  
	#
	# check for some files and edit accordingly
	[ -f ${Name}_noe.tbl ]        && echo "/evaluate (\$HaveNoe/s/no/yes/"   >> SedFile.$$ 
	[ -f ${Name}_dihe.tbl ]       && echo "/evaluate (\$HaveDih/s/no/yes/"   >> SedFile.$$ 
	[ -f ${Name}_hbond.tbl ]      && echo "/evaluate (\$HaveHbond/s/no/yes/" >> SedFile.$$ 
	[ -f ${Name}_sani.tbl ]       && echo "/evaluate (\$HaveRDC/s/no/yes/"   >> SedFile.$$ 
	[ -f ${Name}_Jhnha.tbl ]      && echo "/evaluate (\$HaveJcoup/s/no/yes/" >> SedFile.$$ 
	[ -f ${Name}_shiftsCACB.tbl ] && echo "/evaluate (\$HaveCaCbShifts/s/no/yes/" >> SedFile.$$ 
}

#
##########################################################################################
# Do some initial stuff

# split original file with models into singular files
echo " ---> Splitting PDB into individual files "
[ -d xplor_split ] || mkdir xplor_split
Processes=`$WATREFLIB/pdbsplit.py ${Name}.pdb xplor_split/sa` 

# generate initial cns PDB coordinates files and MTF's
echo " ---> Generating initial PDB and MTF files "
sed "{ s/PROT/${Name}/
       s/TEMPLATE/template_${Name}.pdb/
    }"  ${WATREFLIB}/generate_h2o.inp > ${Name}_generate_h2o.inp
    
# do aditional editting for CIS pep and DISULFIDES
if [ "$HaveCis" = "yes" ]
then
     sed "{
	       /evaluate (\$HaveCis/s/no/yes/
	       /CISpep/r FileCis.$$
          }" ${Name}_generate_h2o.inp > TmpGenerate
     mv TmpGenerate ${Name}_generate_h2o.inp
fi

if [ "$HaveSSBridge" = "yes" ]
then
    sed "{
	       /evaluate (\$HaveDisu/s/no/yes/
	       /SSBridge/r SSBridges.$$
          }" ${Name}_generate_h2o.inp > TmpGenerate
    mv TmpGenerate ${Name}_generate_h2o.inp
fi

cp xplor_split/sa_1.pdb template_$Name.pdb
. ${CNSENV}/.cns_solve_env_sh
export TOPOWAT=$WATREFLIB 
$CnsCommand < ${Name}_generate_h2o.inp > ${Name}_generate_h2o.out 

# Create directory for cns PDB coords
[ -d cnsPDB ] || mkdir cnsPDB 

# Put all in PDB for CNS 
let generate=1
while [ $generate -le  $Processes ]
do
    echo " ---> Generating coords in PDB for CNS file $generate"

    # Edit input generate file to have right protein name and template
    sed "{ s/PROT_h2o/cnsPDB\/sa_cns_${generate}/
           s/TEMPLATE/template_${Name}.pdb/
           s/write structure/!write structure/
         }"  ${WATREFLIB}/generate_h2o.inp > ${Name}_generate_h2o_${generate}.inp

    # Do again CISpep and DISU for each model
    if [ "$HaveCis" = "yes" ]
    then
          sed "{
	         /evaluate (\$HaveCis/s/no/yes/
	         /CISpep/r FileCis.$$
               }" ${Name}_generate_h2o_${generate}.inp > TmpGenerate
          mv TmpGenerate ${Name}_generate_h2o_${generate}.inp
    fi

    if [ "$HaveSSBridge" = "yes" ]
    then
          sed "{
	         /evaluate (\$HaveDisu/s/no/yes/
	         /SSBridge/r SSBridges.$$
               }" ${Name}_generate_h2o_${generate}.inp > TmpGenerate
          mv TmpGenerate ${Name}_generate_h2o_${generate}.inp
    fi

    # update coordinates to be modified
    cp xplor_split/sa_${generate}.pdb template_$Name.pdb

    # launch CNS
    $CnsCommand < ${Name}_generate_h2o_${generate}.inp > ${Name}_generate_h2o_${generate}.out 

    # increase counter generate
    generate=`echo "${generate} + 1" | bc`
done

# move all these I/O files to TempDir
[ -d TempDir ] || mkdir TempDir
mv *generate* TempDir 

# create direcotries to store the results
[ -d refinedPDB ] || mkdir refinedPDB 
[ -d refinedPDB_w ] || mkdir refinedPDB_w 

#
##########################################################################################
# Generate a number of seeds randomly
#
date > TrashFile
#trigger=`date +%s`
trigger=$Semilla
Semillas=`awk '{
        srand(Some);
        for (i=1; i<=total;i++) {
  	    print int(rand()*1000000000)
	   }
	}' total=$Processes Some=$trigger TrashFile`
rm TrashFile
echo 
#echo Seeds are:   $Semillas 

Process=$Processes

#
##########################################################################################
# Main loop going through all selected number of structures
for Seed in $Semillas
do
    #
    Begin=$Process
    End=$Process
    case "$Protocol" in 
	[xX]* )
		echo " Protocol for Xplor -- not yet ready!!! "
		;;
        [cC][nN]* )
		PrepareCNS $Name $Begin $End $Seed $AveNoe 
		TARGET=${Template}_${Process}
		sed -f SedFile.$$ < $TemplateFile > ${Template}_${Process}.inp
	        rm SedFile.$$
	
		case "$BatchQ" in
		        [pP][bB][sS] )
				 batch_PBS_CNS $TARGET
				 # resubmit until success 
                                 until ($SubmitPBS Smt${Name}_${Process}.sh) do 
				        echo "Try again ..."; sleep 2; 
				 done ;;
    			[nN][oO]* )
				 batch_NOQ_CNS $TARGET $Name $Process 
		                 echo " ---> Submitting Smt${Name}_${Process}.sh" 
        			 /bin/sh Smt${Name}_${Process}.sh ;; 
		esac ;;
    esac 
    Process=`echo "${Process} - 1" | bc`
done

##########################################################################################
# Here we are going to wait until ALL the calcs have been done.
# Once that happen we will do an overview of the strucutures keeping
# only the 10 best ones and deleting all other files.
#
echo " "
echo " ---> $Processes Jobs launched, waiting for them to finish "
let Process=1
let TopSleep=1
let AmountSleep=30
while [ $Process -le $Processes -a $TopSleep ] #-lt $WarnSleep ]
do
    #echo "  >> DEBUG:  TopSleep = $TopSleep,  Max = $WarnSleep, Amount = $AmountSleep "
    if ! [ -f Done_with_${Name}_${Process} ]
    then
          sleep $AmountSleep
    else
          let Process=Process+1
    fi
    TopSleep=`expr $TopSleep + 1`
done

# Everything is done so cat all files in All_resa.pdb 
cd refinedPDB 
$WATREFLIB/Agrupa *.pdb > All_${Name}_cns  
cd ..
# delete key files for sleeping 
rm  -f Done_with_${Name}*

# move all I/O files to TempDir.  ONCE, and only ONCE all calculations are done.
# if PBS is used it will move the needed files when they are still needed.
mv ${Template}_* TempDir

# Place to cat the ReadmeFile
cp ${WATREFLIB}/BeSureToREADME .

##########################################################################################
# Some cleaning - all the files related to DQS are deleted
if [ "$CleanUp" = "yes" ]
then
     rm -f Smt*
     rm -f SedFile.$$
     rm -f GenerateSedFile.$$
     rm -f Refine_${Name}_*
     if [ "$HaveCis" = "yes" ]
     then 
           rm -f FileCis.$$
     fi

     if [ "$HaveSSBridge" = "yes" ]
     then
	     rm -f SSBridges.$$
	     rm -f SSNoes.$$
     fi
fi

# Some _needed_ cleaning for further runs
#rm  -f Done_with_${Name}*

#
# bye
exit
