Site Tools


freshs:harness-gromacs

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

freshs:harness-gromacs [2013/06/12 11:28] (current)
Line 1: Line 1:
 +<code bash>
  
 +#!/bin/ksh
 +
 +# Copyright (c) 2013 Aaron Taudt, Universit√§t Stuttgart, ITB,
 +# Allmandring 31, 70569 Stuttgart, Germany; all rights
 +# reserved unless otherwise stated.
 +
 +# This program is free software; you can redistribute it and/or modify
 +# it under the terms of the GNU General Public License as published by
 +# the Free Software Foundation; either version 3 of the License, or
 +# (at your option) any later version.
 +
 +# This program is distributed in the hope that it will be useful, but
 +# WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ​ See the GNU
 +# General Public License for more details.
 +
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write to the Free Software
 +# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 +# MA 02111-1307 USA
 +
 +# This is a FRESHS job_script written in BASH for an FFS run with GROMACS.
 +# Two proteins in contact in a box of water are driven apart. Order parameter is the center-of-mass distance.
 +
 +# ---------------------------------------------------------- ​
 +# A few words to the naming convention
 +# ---------------------------------------------------------- ​
 +
 +# The naming convention for the GROMACS commands is the following:
 +# EM=Energy Minimization,​ EQU=Equilibration with position restraints on all heavy atoms, PD=Production
 +# The suffix _OUT, e.g. EQU_OUT, PD_OUT, etc., designate the output-files of the equilibration,​ production, etc. process.
 +# The variable $NAME is the name of my system, which is contained in every filename for easier subsequent analysis and file management.
 +
 +# ---------------------------------------------------------- ​
 +# Additional files
 +# ----------------------------------------------------------  ​
 +
 +# For this script to work, several additional files have to be present in the harness folder:
 +# chains.ndx: Index-file for the calculation of the reaction coordinate
 +# dissociation_contact1.top:​ Topology file
 +# dissociation_contact1_Protein_chain_A.itp,​ dissociation_contact1_Protein_chain_A2.itp:​ Topology-include file
 +# EQU_OUT_dissociation_contact1.gro:​ Equilibrated starting configuration
 +# PD_dissociation_contact1.tpr:​ Structure file
 +# initial_config.dat
 +
 +# ---------------------------------------------------------- ​
 +# Get parameters from client
 +# ---------------------------------------------------------- ​
 +
 +OLDIFS=$IFS
 +IFS=$'​\n'​
 +varmod=0
 +for el in $*;do
 +    if [ $varmod == 0 ];then
 +        varset=`echo $el | sed s/^-//g`
 +        varmod=1
 +    else
 +        eval "​$varset=\"​$el\""​
 +        varmod=0
 +    fi
 +done
 +IFS=$OLDIFS
 +
 +# ---------------------------------------------------------- ​
 +# User defined variables
 +# ----------------------------------------------------------
 +
 +# The following variables have to be given in the server-config-file via "​user_msg"​
 +# store_directory
 +# source_directory
 +# eval_time_intervall
 +# stepsize
 +# temperature
 +# NAME
 +# CPU
 +
 +PMECORES=0 ​    # Number of nodes used for PME, automatic is -1
 +
 +# Machine specific parameters
 +HOST=biobrain2.itbc.uni-stuttgart.de
 +case $HOST in
 +    "​biobrain2.itbc.uni-stuttgart.de"​ ) # Biobrain
 +        WALLTIMEH=48
 +        MACHINE='​default'​
 +        MDRUN='​mdrun_mpi'​
 +        MPIRUN="​mpirun -np $CPU"
 +        SRC_DIR="​$HOME/​$source_directory"​
 +        STR_DIR="​$HOME/​${store_directory}_$timestamp"​
 + ;;
 +  * ) # Everything else
 +    echo "​ERROR:​ Could not define machine paramters for hostname $HOSTNAME"​
 +    echo "​Stopping here."
 +    exit 1
 +    ;;
 +esac
 +WALLTIMEM='​00'​
 +MAXH=`echo "​scale=3;​ $WALLTIMEH * 0.992" | bc` # Maximum hours to run one mdrun in job_script
 +
 +
 +# ---------------------------------------------------------- ​
 +# Get input (key that lets us find the correct directories)
 +# ---------------------------------------------------------- ​
 +
 +if [ -z ${in_fifoname+x} ]; then
 +    #​in_fifoname is unset
 +    INPUT=`cat $initial_config`
 +else
 +    #​in_fifoname is set
 +    INPUT=`cat $in_fifoname`
 +fi
 +
 +# $uuid is the key for the current trajectory fragment
 +# $uuid_input is the key of the trajectory fragment which we receive from the client
 +uuid_input=`echo $INPUT`
 +# between interfaces: $uuid == $uuid_input
 +# starting new trajectory fragment from interface: $uuid != $uuid_input
 +
 +# ---------------------------------------------------------- ​
 +# Create new directories
 +# ---------------------------------------------------------- ​
 +
 +# Make overall storage directory if it doesn'​t exist
 +if [ ! -e $STR_DIR ]
 +then
 +    mkdir $STR_DIR
 +fi
 +
 +INP_DIR="​$STR_DIR/​$uuid_input"​
 +RUN_DIR="​$STR_DIR/​$uuid"​
 +
 +if [ $uuid != $uuid_input ]
 +then
 +    # We are starting a trajectory fragment from an interface.
 +    ​
 +    if [ ! -e $RUN_DIR ]
 +    then
 +        mkdir $RUN_DIR
 +    else
 +        echo "​job_script:​ $RUN_DIR exists, but should not!! Something is terribly wrong!"​
 +        return
 +    fi
 +elif [ $uuid == $uuid_input ]
 +then
 +    # We are calculating on a trajectory fragment between two interfaces.
 +    if [ $RUN_DIR != $INP_DIR ]
 +    then
 +        echo "​job_script:​ RUN_DIR != INP_DIR: Should not be this way!! Something is terribly wrong!"​
 +        return
 +    fi
 +fi
 +
 +
 +# ---------------------------------------------------------- ​
 +# Production (PD) input file
 +# ---------------------------------------------------------- ​
 +
 +# Define time-variables,​ use integers
 +
 +# $eval_time_intervall:​ Simulation time in [ps]
 +# $stepsize: Stepsize in [fs]
 +PRECout=$eval_time_intervall ​                 # Timestep to store full PRECision coordinates in [ps]
 +COMPout=$eval_time_intervall ​                 # Timestep to store COMPressed coordinates in [ps]
 +LOGout=$eval_time_intervall ​                   # Timestep to store LOGfile info in [ps]
 +
 +# Calculate variables
 +DTps=0`echo "​scale=3;​ $stepsize/​1000"​ | bc`  # Stepsize in [ps]
 +NSTEPS=$((1000*$eval_time_intervall/​$stepsize)) ​        # Number of steps for simulation
 +NSTXOUT=$((1000*$PRECout/​$stepsize)) ​        # Number of steps after which full precision coordinates are stored
 +NSTXTCOUT=$((1000*$COMPout/​$stepsize)) ​      # Number of steps after which compressed coordinates are stored
 +NSTLOGOUT=$((1000*$LOGout/​$stepsize)) ​       # Number of steps after which logfile info is stored
 +
 +
 +# Generate Parameterfile
 +if [ $uuid != $uuid_input ]
 +then
 +    # We are starting a trajectory fragment from an interface and have to generate the GROMACS parameter files for this simulation. The only values that are different from the previous trajectory fragment are the starttime $TINIT and the thermostat-RNG $seed. This is crucial because we want different trajectories launched from the same configuration point to diverge.
 +    # (In between two interfaces the simulation can be continued seamlessly)
 +    ​
 +    if [ -e $INP_DIR/​PD_OUT_$NAME.cpt ]
 +    then
 +        # A checkpoint file exists in the input directory, so we take the start time of our new simulation from there
 +        TINIT=`gmxcheck -f $INP_DIR/​PD_OUT_$NAME.cpt 2>&1 | grep "Last frame" | awk '​{print $5}'`
 +        GENVEL='​no'​ # Do not generate velocities, but use them from the .cpt file
 +    else
 +        # No checkpoint file exists in the input directory (because the input directory does not exist), meaning we start a new run from inside region A
 +        TINIT='​0.0'​
 +        GENVEL='​yes'​
 +    fi
 +
 +cat > $RUN_DIR/​PD_$NAME.mdp <<eof
 +    ; Generated at $timestamp
 +    integrator ​         =  md
 +    tinit               ​= ​ $TINIT
 +    dt                  =  $DTps
 +    nsteps ​             =  $NSTEPS
 +    nstcomm ​            ​= ​ 1
 +    nstxout ​            ​= ​ $NSTXOUT
 +    nstvout ​            ​= ​ $NSTXOUT
 +    nstfout ​            ​= ​ $NSTXOUT
 +    nstenergy ​          ​= ​ $NSTXTCOUT
 +    nstlog ​             =  $NSTLOGOUT
 +    nstxtcout ​          ​= ​ $NSTXTCOUT
 +    nstlist ​            ​= ​ 5
 +    energygrps ​         =  System
 +    ns_type ​            ​= ​ grid
 +    ; Constraints
 +    constraints ​        ​= ​ all-bonds
 +    constraint_algorithm = lincs
 +    ;Treatment of Vdw and Elctrostatics
 +    vdwtype ​            ​= ​ cut-off
 +    rlist               ​= ​ 0.9
 +    rvdw                =  1.4
 +    coulombtype ​        ​= ​ PME
 +    rcoulomb ​           =  0.9
 +    fourierspacing ​     =  0.12
 +    fourier_nx ​         = 108
 +    fourier_ny ​         = 84
 +    fourier_nz ​         = 72
 +    pme_order ​          ​= ​ 4
 +    ewald_rtol ​         =  1e-05
 +    optimize_fft ​       =  yes
 +    ; Temperature coupling is on
 +    tcoupl ​         = V-rescale ​            ; modified Berendsen thermostat
 +    tc-grps ​        = Protein Non-Protein ​  ; two coupling groups - more accurate
 +    tau_t           = 0.1   ​0.1 ​            ; time constant, in ps
 +    ref_t           = $temperature ​ $temperature ​   ; reference temperature,​ one for each group, in K
 +    ld_seed ​        = $seed
 +    ; Pressure coupling is  on
 +    pcoupl ​         = Parrinello-Rahman ​            ; Pressure coupling on in NPT
 +    pcoupltype ​     = isotropic ​                    ; uniform scaling of box vectors
 +    tau_p           = 2.0                           ; time constant, in ps
 +    ref_p           = 1.0                           ; reference pressure, in bar
 +    compressibility = 4.5e-5 ​                       ; isothermal compressibility of water, bar^-1
 +    refcoord_scaling = com
 +    ; Generate velocites is on at ${temperature}K
 +    gen_vel ​            ​= ​ $GENVEL
 +    gen_temp ​           =  $temperature
 +    gen_seed ​           =  $seed
 +eof
 +
 +
 +fi
 +
 +
 +# ---------------------------------------------------------- ​
 +# Preprocessing and simulation
 +# ---------------------------------------------------------- ​
 +
 +if [ $uuid != $uuid_input ]
 +then
 +    # We are starting a trajectory fragment from an interface
 +
 +    if [ -e $INP_DIR/​PD_OUT_$NAME.cpt ]
 +    then
 +        # If not starting from inside region A, a checkpoint file should exist
 +    ​
 +        # Generate new .tpr with new .mdp (seed!) for continuation from interface
 +        # grompp -f new.mdp -c old.tpr -o new.tpr -t old.cpt
 +        grompp -f $RUN_DIR/​PD_$NAME.mdp -po $RUN_DIR/​PD_OUT_$NAME.mdp -c $INP_DIR/​PD_$NAME.tpr -p $SRC_DIR/​$NAME.top -o $RUN_DIR/​PD_$NAME.tpr -t $INP_DIR/​PD_OUT_$NAME.cpt
 +        # $MDRUN -s new.tpr
 +        $MPIRUN $MDRUN -v -npme $PMECORES -maxh $MAXH -s $RUN_DIR/​PD_$NAME.tpr -o $RUN_DIR/​PD_OUT_$NAME.trr -x $RUN_DIR/​PD_OUT_$NAME.xtc -cpt 15 -cpo $RUN_DIR/​PD_OUT_$NAME.cpt -e $RUN_DIR/​PD_OUT_$NAME.edr -c $RUN_DIR/​PD_OUT_$NAME.gro -g $RUN_DIR/​PD_OUT_$NAME.log
 +    else
 +        # Starting from inside region A, start from the precalculated configuration
 +        ​
 +        grompp -f $RUN_DIR/​PD_$NAME.mdp -po $RUN_DIR/​PD_OUT_$NAME.mdp -c $SRC_DIR/​$STARTCONF -p $SRC_DIR/​$NAME.top -o $RUN_DIR/​PD_$NAME.tpr
 +        $MPIRUN $MDRUN -v -npme $PMECORES -maxh $MAXH -s $RUN_DIR/​PD_$NAME.tpr -o $RUN_DIR/​PD_OUT_$NAME.trr -x $RUN_DIR/​PD_OUT_$NAME.xtc -cpt 15 -cpo $RUN_DIR/​PD_OUT_$NAME.cpt -e $RUN_DIR/​PD_OUT_$NAME.edr -c $RUN_DIR/​PD_OUT_$NAME.gro -g $RUN_DIR/​PD_OUT_$NAME.log
 +        ​
 +    fi
 +elif [ $uuid == $uuid_input ]
 +then
 +    # We continue a trajectory fragment between two interfaces
 +    ​
 +    # Extending simulation by STEPS
 +    # tpbconv -s previous.tpr -extend timetoextendby -o next.tpr
 +    tpbconv -s $INP_DIR/​PD_$NAME.tpr -extend $eval_time_intervall -o $INP_DIR/​next.tpr
 +    mv $INP_DIR/​PD_${NAME}.tpr $INP_DIR/​\#​PD_${NAME}.tpr\#​
 +    mv $INP_DIR/​next.tpr $INP_DIR/​PD_$NAME.tpr
 +    $MPIRUN $MDRUN -v -npme $PMECORES -maxh $MAXH -s $INP_DIR/​PD_$NAME.tpr -o $INP_DIR/​PD_OUT_$NAME.trr -x $INP_DIR/​PD_OUT_$NAME.xtc -cpt 15 -cpo $INP_DIR/​PD_OUT_$NAME.cpt -e $INP_DIR/​PD_OUT_$NAME.edr -c $INP_DIR/​PD_OUT_$NAME.gro -g $INP_DIR/​PD_OUT_$NAME.log -append -cpi $INP_DIR/​PD_OUT_$NAME.cpt
 +fi
 +
 +
 +# ---------------------------------------------------------- ​
 +# Extraction of the reaction coordinate
 +# ---------------------------------------------------------- ​
 +
 +echo "15
 +16" | g_dist -f $RUN_DIR/​PD_OUT_$NAME.xtc -s $RUN_DIR/​PD_$NAME.tpr -n $SRC_DIR/​chains.ndx -o $RUN_DIR/​distance.xvg
 +
 +# Extract last timestep
 +rc_line=`tail -n 1 $RUN_DIR/​distance.xvg`
 +rc_time=`echo $rc_line | awk '​{print $1}'`
 +rc_val=`echo $rc_line | awk '​{print $2}'`
 +
 +echo "​job_script:​ simulation-time spent in this part of trajectory: $eval_time_intervall"​
 +
 +# ---------------------------------------------------------- ​
 +# Give the data back
 +# ---------------------------------------------------------- ​
 +
 +# Print key ($uuid) to fifo
 +echo $uuid > $back_fifoname
 +
 +# Print the data that will be written to the metadata_fifoname fifo
 +echo $rc_val $NSTEPS $eval_time_intervall |\
 + awk '​{printf "​{\"​rc\":​ %.4f, \"​steps\":​ %i, \"​time\":​ %.4f}\n",​$1,​$2,​$3}'​
 +
 +# Print some data to metadata_fifoname
 +echo $rc_val $NSTEPS $eval_time_intervall |\
 + awk '​{printf "​{\"​rc\":​ %.4f, \"​steps\":​ %i, \"​time\":​ %.4f}",​$1,​$2,​$3}'>​ $metadata_fifoname
 +# Insert newline, because unlike "​echo",​ "​awk"​ with "​printf"​ does not do this
 +echo > $metadata_fifoname
 +
 +
 +# ---------------------------------------------------------- ​
 +# Clean up
 +# ----------------------------------------------------------
 +
 +# Get rid of GROMACS backup files to save space
 +rm $RUN_DIR/​\#​*\#​
 +
 +</​code>​
freshs/harness-gromacs.txt · Last modified: 2013/06/12 11:28 (external edit)