Site Tools


freshs:harness-gromacs
#!/bin/ksh
 
# Copyright (c) 2013 Aaron Taudt, Universität Stuttgart, ITB,
# Allmandring 31, 70569 Stuttgart, Germany; all rights
# reserved unless otherwise stated.
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307 USA
 
# This is a FRESHS job_script written in BASH for an FFS run with GROMACS.
# Two proteins in contact in a box of water are driven apart. Order parameter is the center-of-mass distance.
 
# ---------------------------------------------------------- 
# A few words to the naming convention
# ---------------------------------------------------------- 
 
# The naming convention for the GROMACS commands is the following:
# EM=Energy Minimization, EQU=Equilibration with position restraints on all heavy atoms, PD=Production
# The suffix _OUT, e.g. EQU_OUT, PD_OUT, etc., designate the output-files of the equilibration, production, etc. process.
# The variable $NAME is the name of my system, which is contained in every filename for easier subsequent analysis and file management.
 
# ---------------------------------------------------------- 
# Additional files
# ----------------------------------------------------------  
 
# For this script to work, several additional files have to be present in the harness folder:
# chains.ndx: Index-file for the calculation of the reaction coordinate
# dissociation_contact1.top: Topology file
# dissociation_contact1_Protein_chain_A.itp, dissociation_contact1_Protein_chain_A2.itp: Topology-include file
# EQU_OUT_dissociation_contact1.gro: Equilibrated starting configuration
# PD_dissociation_contact1.tpr: Structure file
# initial_config.dat
 
# ---------------------------------------------------------- 
# Get parameters from client
# ---------------------------------------------------------- 
 
OLDIFS=$IFS
IFS=$'\n'
varmod=0
for el in $*;do
    if [ $varmod == 0 ];then
        varset=`echo $el | sed s/^-//g`
        varmod=1
    else
        eval "$varset=\"$el\""
        varmod=0
    fi
done
IFS=$OLDIFS
 
# ---------------------------------------------------------- 
# User defined variables
# ----------------------------------------------------------
 
# The following variables have to be given in the server-config-file via "user_msg"
# store_directory
# source_directory
# eval_time_intervall
# stepsize
# temperature
# NAME
# CPU
 
PMECORES=0     # Number of nodes used for PME, automatic is -1
 
# Machine specific parameters
HOST=biobrain2.itbc.uni-stuttgart.de
case $HOST in
    "biobrain2.itbc.uni-stuttgart.de" )	# Biobrain
        WALLTIMEH=48
        MACHINE='default'
        MDRUN='mdrun_mpi'
        MPIRUN="mpirun -np $CPU"
        SRC_DIR="$HOME/$source_directory"
        STR_DIR="$HOME/${store_directory}_$timestamp"
	;;
  * )	# Everything else
    echo "ERROR: Could not define machine paramters for hostname $HOSTNAME"
    echo "Stopping here."
    exit 1
    ;;
esac
WALLTIMEM='00'
MAXH=`echo "scale=3; $WALLTIMEH * 0.992" | bc` # Maximum hours to run one mdrun in job_script
 
 
# ---------------------------------------------------------- 
# Get input (key that lets us find the correct directories)
# ---------------------------------------------------------- 
 
if [ -z ${in_fifoname+x} ]; then
    #in_fifoname is unset
    INPUT=`cat $initial_config`
else
    #in_fifoname is set
    INPUT=`cat $in_fifoname`
fi
 
# $uuid is the key for the current trajectory fragment
# $uuid_input is the key of the trajectory fragment which we receive from the client
uuid_input=`echo $INPUT`
# between interfaces: $uuid == $uuid_input
# starting new trajectory fragment from interface: $uuid != $uuid_input
 
# ---------------------------------------------------------- 
# Create new directories
# ---------------------------------------------------------- 
 
# Make overall storage directory if it doesn't exist
if [ ! -e $STR_DIR ]
then
    mkdir $STR_DIR
fi
 
INP_DIR="$STR_DIR/$uuid_input"
RUN_DIR="$STR_DIR/$uuid"
 
if [ $uuid != $uuid_input ]
then
    # We are starting a trajectory fragment from an interface.
 
    if [ ! -e $RUN_DIR ]
    then
        mkdir $RUN_DIR
    else
        echo "job_script: $RUN_DIR exists, but should not!! Something is terribly wrong!"
        return
    fi
elif [ $uuid == $uuid_input ]
then
    # We are calculating on a trajectory fragment between two interfaces.
    if [ $RUN_DIR != $INP_DIR ]
    then
        echo "job_script: RUN_DIR != INP_DIR: Should not be this way!! Something is terribly wrong!"
        return
    fi
fi
 
 
# ---------------------------------------------------------- 
# Production (PD) input file
# ---------------------------------------------------------- 
 
# Define time-variables, use integers
 
# $eval_time_intervall: Simulation time in [ps]
# $stepsize: Stepsize in [fs]
PRECout=$eval_time_intervall                  # Timestep to store full PRECision coordinates in [ps]
COMPout=$eval_time_intervall                  # Timestep to store COMPressed coordinates in [ps]
LOGout=$eval_time_intervall                    # Timestep to store LOGfile info in [ps]
 
# Calculate variables
DTps=0`echo "scale=3; $stepsize/1000" | bc`  # Stepsize in [ps]
NSTEPS=$((1000*$eval_time_intervall/$stepsize))         # Number of steps for simulation
NSTXOUT=$((1000*$PRECout/$stepsize))         # Number of steps after which full precision coordinates are stored
NSTXTCOUT=$((1000*$COMPout/$stepsize))       # Number of steps after which compressed coordinates are stored
NSTLOGOUT=$((1000*$LOGout/$stepsize))        # Number of steps after which logfile info is stored
 
 
# Generate Parameterfile
if [ $uuid != $uuid_input ]
then
    # We are starting a trajectory fragment from an interface and have to generate the GROMACS parameter files for this simulation. The only values that are different from the previous trajectory fragment are the starttime $TINIT and the thermostat-RNG $seed. This is crucial because we want different trajectories launched from the same configuration point to diverge.
    # (In between two interfaces the simulation can be continued seamlessly)
 
    if [ -e $INP_DIR/PD_OUT_$NAME.cpt ]
    then
        # A checkpoint file exists in the input directory, so we take the start time of our new simulation from there
        TINIT=`gmxcheck -f $INP_DIR/PD_OUT_$NAME.cpt 2>&1 | grep "Last frame" | awk '{print $5}'`
        GENVEL='no' # Do not generate velocities, but use them from the .cpt file
    else
        # No checkpoint file exists in the input directory (because the input directory does not exist), meaning we start a new run from inside region A
        TINIT='0.0'
        GENVEL='yes'
    fi
 
cat > $RUN_DIR/PD_$NAME.mdp <<eof
    ; Generated at $timestamp
    integrator          =  md
    tinit               =  $TINIT
    dt                  =  $DTps
    nsteps              =  $NSTEPS
    nstcomm             =  1
    nstxout             =  $NSTXOUT
    nstvout             =  $NSTXOUT
    nstfout             =  $NSTXOUT
    nstenergy           =  $NSTXTCOUT
    nstlog              =  $NSTLOGOUT
    nstxtcout           =  $NSTXTCOUT
    nstlist             =  5
    energygrps          =  System
    ns_type             =  grid
    ; Constraints
    constraints         =  all-bonds
    constraint_algorithm = lincs
    ;Treatment of Vdw and Elctrostatics
    vdwtype             =  cut-off
    rlist               =  0.9
    rvdw                =  1.4
    coulombtype         =  PME
    rcoulomb            =  0.9
    fourierspacing      =  0.12
    fourier_nx          = 108
    fourier_ny          = 84
    fourier_nz          = 72
    pme_order           =  4
    ewald_rtol          =  1e-05
    optimize_fft        =  yes
    ; Temperature coupling is on
    tcoupl          = V-rescale             ; modified Berendsen thermostat
    tc-grps         = Protein Non-Protein   ; two coupling groups - more accurate
    tau_t           = 0.1   0.1             ; time constant, in ps
    ref_t           = $temperature  $temperature    ; reference temperature, one for each group, in K
    ld_seed         = $seed
    ; Pressure coupling is  on
    pcoupl          = Parrinello-Rahman             ; Pressure coupling on in NPT
    pcoupltype      = isotropic                     ; uniform scaling of box vectors
    tau_p           = 2.0                           ; time constant, in ps
    ref_p           = 1.0                           ; reference pressure, in bar
    compressibility = 4.5e-5                        ; isothermal compressibility of water, bar^-1
    refcoord_scaling = com
    ; Generate velocites is on at ${temperature}K
    gen_vel             =  $GENVEL
    gen_temp            =  $temperature
    gen_seed            =  $seed
eof
 
 
fi
 
 
# ---------------------------------------------------------- 
# Preprocessing and simulation
# ---------------------------------------------------------- 
 
if [ $uuid != $uuid_input ]
then
    # We are starting a trajectory fragment from an interface
 
    if [ -e $INP_DIR/PD_OUT_$NAME.cpt ]
    then
        # If not starting from inside region A, a checkpoint file should exist
 
        # Generate new .tpr with new .mdp (seed!) for continuation from interface
        # grompp -f new.mdp -c old.tpr -o new.tpr -t old.cpt
        grompp -f $RUN_DIR/PD_$NAME.mdp -po $RUN_DIR/PD_OUT_$NAME.mdp -c $INP_DIR/PD_$NAME.tpr -p $SRC_DIR/$NAME.top -o $RUN_DIR/PD_$NAME.tpr -t $INP_DIR/PD_OUT_$NAME.cpt
        # $MDRUN -s new.tpr
        $MPIRUN $MDRUN -v -npme $PMECORES -maxh $MAXH -s $RUN_DIR/PD_$NAME.tpr -o $RUN_DIR/PD_OUT_$NAME.trr -x $RUN_DIR/PD_OUT_$NAME.xtc -cpt 15 -cpo $RUN_DIR/PD_OUT_$NAME.cpt -e $RUN_DIR/PD_OUT_$NAME.edr -c $RUN_DIR/PD_OUT_$NAME.gro -g $RUN_DIR/PD_OUT_$NAME.log
    else
        # Starting from inside region A, start from the precalculated configuration
 
        grompp -f $RUN_DIR/PD_$NAME.mdp -po $RUN_DIR/PD_OUT_$NAME.mdp -c $SRC_DIR/$STARTCONF -p $SRC_DIR/$NAME.top -o $RUN_DIR/PD_$NAME.tpr
        $MPIRUN $MDRUN -v -npme $PMECORES -maxh $MAXH -s $RUN_DIR/PD_$NAME.tpr -o $RUN_DIR/PD_OUT_$NAME.trr -x $RUN_DIR/PD_OUT_$NAME.xtc -cpt 15 -cpo $RUN_DIR/PD_OUT_$NAME.cpt -e $RUN_DIR/PD_OUT_$NAME.edr -c $RUN_DIR/PD_OUT_$NAME.gro -g $RUN_DIR/PD_OUT_$NAME.log
 
    fi
elif [ $uuid == $uuid_input ]
then
    # We continue a trajectory fragment between two interfaces
 
    # Extending simulation by STEPS
    # tpbconv -s previous.tpr -extend timetoextendby -o next.tpr
    tpbconv -s $INP_DIR/PD_$NAME.tpr -extend $eval_time_intervall -o $INP_DIR/next.tpr
    mv $INP_DIR/PD_${NAME}.tpr $INP_DIR/\#PD_${NAME}.tpr\#
    mv $INP_DIR/next.tpr $INP_DIR/PD_$NAME.tpr
    $MPIRUN $MDRUN -v -npme $PMECORES -maxh $MAXH -s $INP_DIR/PD_$NAME.tpr -o $INP_DIR/PD_OUT_$NAME.trr -x $INP_DIR/PD_OUT_$NAME.xtc -cpt 15 -cpo $INP_DIR/PD_OUT_$NAME.cpt -e $INP_DIR/PD_OUT_$NAME.edr -c $INP_DIR/PD_OUT_$NAME.gro -g $INP_DIR/PD_OUT_$NAME.log -append -cpi $INP_DIR/PD_OUT_$NAME.cpt
fi
 
 
# ---------------------------------------------------------- 
# Extraction of the reaction coordinate
# ---------------------------------------------------------- 
 
echo "15
16" | g_dist -f $RUN_DIR/PD_OUT_$NAME.xtc -s $RUN_DIR/PD_$NAME.tpr -n $SRC_DIR/chains.ndx -o $RUN_DIR/distance.xvg
 
# Extract last timestep
rc_line=`tail -n 1 $RUN_DIR/distance.xvg`
rc_time=`echo $rc_line | awk '{print $1}'`
rc_val=`echo $rc_line | awk '{print $2}'`
 
echo "job_script: simulation-time spent in this part of trajectory: $eval_time_intervall"
 
# ---------------------------------------------------------- 
# Give the data back
# ---------------------------------------------------------- 
 
# Print key ($uuid) to fifo
echo $uuid > $back_fifoname
 
# Print the data that will be written to the metadata_fifoname fifo
echo $rc_val $NSTEPS $eval_time_intervall |\
 awk '{printf "{\"rc\": %.4f, \"steps\": %i, \"time\": %.4f}\n",$1,$2,$3}'
 
# Print some data to metadata_fifoname
echo $rc_val $NSTEPS $eval_time_intervall |\
 awk '{printf "{\"rc\": %.4f, \"steps\": %i, \"time\": %.4f}",$1,$2,$3}'> $metadata_fifoname
# Insert newline, because unlike "echo", "awk" with "printf" does not do this
echo > $metadata_fifoname
 
 
# ---------------------------------------------------------- 
# Clean up
# ----------------------------------------------------------
 
# Get rid of GROMACS backup files to save space
rm $RUN_DIR/\#*\#
freshs/harness-gromacs.txt · Last modified: 2013/06/12 11:28 (external edit)