Statistics
| Branch: | Tag: | Revision:

dockonsurf / modules / script_grandes_molecules+diss.sh @ 3ba3c134

History | View | Annotate | Download (12.9 kB)

1
#!/bin/bash
2

    
3
##########################
4
#### Arguments input  ####
5
##########################
6

    
7
nom_de_la_molecule=$1
8
atom_nb=$2
9
nb_surf=$3
10
cutoff=$4
11
max_qw=$5
12

    
13

    
14
#############################################################
15
#### Creation of the working diretory and initialisation ####
16
#############################################################
17

    
18

    
19
mol_dir=${MolOnSurf_results_path}/${nom_de_la_molecule}
20
list_errors=()
21
mkdir ${mol_dir}/analyse
22

    
23

    
24
###############################################################
25
#### Extraction of structures needed from file *-pos-1.xyz ####
26
###############################################################
27

    
28

    
29
num=0
30

    
31
for dir in ${mol_dir}/* ; do
32
	if [ ! -e  $dir/*-pos-1.xyz ]
33
	then list_errors+=($dir)
34
	else
35
		name="$(echo $dir | awk -F/ '{print $NF}')"
36
		mkdir ${mol_dir}/analyse/${name}_$num
37

    
38

    
39
		#######################################
40
		#### Get last geometry of mol+surf #### 
41
		#######################################
42

    
43

    
44
		n_line="$(grep -n "i =" ${dir}/*-pos-1.xyz | awk 'END{print}' | awk 'NF=1{print}' | awk 'sub(".$", "")')"
45
		n_line_1="$(expr $n_line - 1)"
46
		awk -v ligne="$n_line_1" 'NR>=ligne{print}' ${dir}/*-pos-1.xyz > ${mol_dir}/analyse/${name}_$num/last_geo.xyz
47

    
48

    
49
		#######################################################
50
		#### Recenter the mol+surf on one atom without pbc ####
51
		#######################################################
52

    
53
		
54
		${DockOnSurf_path}/modules/recenter_only.py ${mol_dir}/analyse/${name}_$num ${atom_nb}
55
		recentered_temp_file=${mol_dir}/analyse/${name}_$num/recentered_only.xyz
56

    
57

    
58
		#####################################################
59
		#### Get the molecule alone centered on one atom ####
60
		#####################################################
61

    
62
		nb_surf2="$(expr ${nb_surf} + 2 )"
63
		nbligne="$(awk 'END{print NR}' ${recentered_temp_file})"
64
		for ((a=3; a<=nbligne; a++)) ; do
65
			z="$(awk '{print NR,$0}' ${recentered_temp_file}| awk 'NR == nligne {print}' nligne="$a" | awk '{print $1}')"
66
			if (( $(echo "$z > ${nb_surf2}" | bc -l) ))
67
			then awk 'NR == nligne {print}' nligne="$a" ${recentered_temp_file} >> ${mol_dir}/analyse/${name}_$num/temp_file.xyz
68
			fi
69
		done
70

    
71
       		temp_file=${mol_dir}/analyse/${name}_$num/temp_file.xyz
72
		awk 'END{print NR}' ${temp_file} > ${mol_dir}/analyse/${name}_$num/molecule_seule.xyz
73
       		mol_seule_file=${mol_dir}/analyse/${name}_$num/molecule_seule.xyz
74
		echo ' ' >> ${mol_seule_file}
75
		cat ${temp_file} >> ${mol_seule_file}
76
		rm ${temp_file}
77

    
78

    
79
		##############################################################
80
		### Apply pbc conditions to surface only in mol+surf file ####
81
		##############################################################
82

    
83

    
84
		${DockOnSurf_path}/modules/recenter_periodicity.py ${mol_dir}/analyse/${name}_$num ${atom_nb}
85
		recentered_periodicity_file=${mol_dir}/analyse/${name}_$num/recentered_periodicity.xyz
86

    
87
		for ((a=3; a<=nbligne; a++)) ; do
88
			z="$(awk '{print NR,$0}' ${recentered_temp_file}| awk 'NR == nligne {print}' nligne="$a" | awk '{print $1}')"
89
			if (( $(echo "$z <= ${nb_surf2}" | bc -l) ))
90
			then awk 'NR == nligne {print}' nligne="$a" ${recentered_periodicity_file} >> ${mol_dir}/analyse/${name}_$num/temp_file.xyz
91
			fi
92
		done
93

    
94
		awk 'NR <= 2{print}' ${recentered_periodicity_file} > ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz
95
		cat ${mol_dir}/analyse/${name}_$num/temp_file.xyz >> ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz
96
		awk 'NR > 2{print}' ${mol_seule_file} >> ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz
97

    
98
		rm ${mol_dir}/analyse/${name}_$num/temp_file.xyz
99
		rm ${recentered_periodicity_file}
100
		rm ${recentered_temp_file}
101

    
102
		
103
		num=$((num+1))
104

    
105
	fi
106
done
107

    
108
echo "All structures files have been created"
109

    
110

    
111
#################################################################################
112
#### RMSD calculation of molecules without surface to create the RMSD matrix ####
113
#################################################################################
114

    
115

    
116
matrix_file=${mol_dir}/analyse/matrice_RMSD_mol_seule.txt
117

    
118
touch ${matrix_file}
119

    
120
for ((i=0; i<$num; i++)) ; do
121
	echo  >> ${matrix_file}
122
done
123

    
124
for ((i=1; i<=$num; i++)) ; do
125
	for ((j=$i; j<=$num; j++)) ; do
126
		if (( $(echo "$i == $j" | bc -l) ))
127
		then 
128
			add_to_line="$(awk -v ii="$i" 'NR==ii {print}' ${matrix_file})"
129
			add_to_line+=' 0'
130
			sed -i "${i}c $add_to_line" ${matrix_file}
131
		else 
132
			num_i="$(expr $i - 1)"
133
			num_j="$(expr $j - 1)"
134
			RMSD_mol_seule_i_j="$(${DockOnSurf_path}/modules/calculate_rmsd --reorder ${mol_dir}/analyse/${nom_de_la_molecule}*_${num_i}/molecule_seule.xyz ${mol_dir}/analyse/${nom_de_la_molecule}*_${num_j}/molecule_seule.xyz)"
135
			add_to_i_line="$(awk -v ii="$i" 'NR==ii {print}' ${matrix_file})"
136
			add_to_i_line+=' -'
137
			add_to_i_line+=$RMSD_mol_seule_i_j
138
			sed -i "${i}c $add_to_i_line" ${matrix_file}
139
			add_to_j_line="$(awk -v jj="$j" 'NR==jj {print}' ${matrix_file})"
140
			add_to_j_line+=' -'
141
			add_to_j_line+=$RMSD_mol_seule_i_j
142
			sed -i "${j}c $add_to_j_line" ${matrix_file}
143
		fi
144
	done
145
done
146

    
147
echo "RMSD matrix for molecules without surface has been calculated"
148

    
149

    
150
###############################################################
151
#### Clustering of the molecule structures without surface #### 
152
###############################################################
153

    
154

    
155
cluster_molecule_file=${mol_dir}/analyse/cluster_molecule.txt
156

    
157
${DockOnSurf_path}/modules/clustering.py ${matrix_file} > ${cluster_molecule_file}
158

    
159
echo "Clustering of molecules has been done"
160

    
161

    
162
###################################################################################
163
#### Separation of the clusters found for the molecule alone and re-clustering ####
164
###################################################################################
165

    
166

    
167
nb_clusters="$(awk 'END{print NR}' ${cluster_molecule_file})"
168

    
169
sed -i 's/\[/ /g' ${cluster_molecule_file}
170
sed -i 's/\]/ /g' ${cluster_molecule_file}
171

    
172
mkdir ${mol_dir}/analyse/cluster_centers_energy
173

    
174
s=1
175

    
176
line_cluster_group="$(awk '{print NR,$0}' ${cluster_molecule_file} | grep "Cluster\ groups:" | awk '{print $1}')"
177
line_first_cluster="$(expr ${line_cluster_group} + 1 )"
178

    
179
for ((k=${line_first_cluster}; k<=${nb_clusters}; k++)) ; do
180

    
181

    
182
	################################
183
	#### Separation of clusters ####
184
	################################
185

    
186

    
187
	awk -v ligne="$k" 'NR==ligne{print}' ${cluster_molecule_file} > ${mol_dir}/analyse/array
188
	sed -i 's/^ *//' ${mol_dir}/analyse/array
189
	sed -i 's/  / /g' ${mol_dir}/analyse/array
190
	sed -i 's/  / /g' ${mol_dir}/analyse/array
191
	sed -i 's/  / /g' ${mol_dir}/analyse/array
192
	sed -i 's/  / /g' ${mol_dir}/analyse/array
193
	readarray -d " " cluster < ${mol_dir}/analyse/array
194
	
195
	mkdir ${mol_dir}/analyse/cluster_molecule_$s
196
	cluster_dir=${mol_dir}/analyse/cluster_molecule_$s
197
	
198
	for numero in ${cluster[*]} ; do
199
		mv ${mol_dir}/analyse/${nom_de_la_molecule}*_${numero} ${cluster_dir}/.
200
	done
201
	a=0
202
	for dir2 in ${cluster_dir}/${nom_de_la_molecule}* ; do
203
		mv $dir2 ${dir2}_$a
204
		a=$((a+1))
205
	done
206
	
207
	rm ${mol_dir}/analyse/array
208

    
209

    
210
	######################################################################
211
	#### Calculation of RMSD for mol+surf and creation of RMSD matrix ####
212
	######################################################################
213

    
214
	
215
 	cluster_matrix_file=${cluster_dir}/matrice_RMSD_cluster.txt
216
 	touch ${cluster_matrix_file}
217

    
218
	len=${#cluster[@]}
219
	len_cluster="$(expr $len - 1)"
220

    
221
 	for ((i=1; i<=${len_cluster}; i++)) ; do
222
 		echo >> ${cluster_matrix_file}
223
 	done
224

    
225
 	for ((i=1; i<=${len_cluster}; i++)) ; do
226
 		for ((j=$i; j<=${len_cluster}; j++)) ; do
227
 			if (( $(echo "$i == $j" | bc -l) ))
228
 			then
229
 				add_to_line="$(awk -v ii="$i" 'NR==ii {print}' ${cluster_matrix_file})"
230
 				add_to_line+=' 0'
231
 				sed -i "${i}c $add_to_line" ${cluster_matrix_file}
232
 			else
233
				num_i="$(expr $i - 1)"
234
				num_j="$(expr $j - 1)"
235
 				RMSD_mol_surf_i_j="$(${DockOnSurf_path}/modules/calculate_rmsd --reorder ${cluster_dir}/${nom_de_la_molecule}*_${num_i}/recentered_pbc.xyz ${cluster_dir}/${nom_de_la_molecule}*_${num_j}/recentered_pbc.xyz)"
236
 				add_to_i_line="$(awk -v ii="$i" 'NR==ii {print}' ${cluster_matrix_file})"
237
 				add_to_i_line+=' -'
238
 				add_to_i_line+=$RMSD_mol_surf_i_j
239
 				sed -i "${i}c $add_to_i_line" ${cluster_matrix_file}
240
 				add_to_j_line="$(awk -v jj="$j" 'NR==jj {print}' ${cluster_matrix_file})"
241
 				add_to_j_line+=' -'
242
 				add_to_j_line+=$RMSD_mol_surf_i_j
243
 				sed -i "${j}c $add_to_j_line" ${cluster_matrix_file}
244
 			fi
245
 		done
246
 	done
247

    
248

    
249
	#######################
250
	#### Re-clustering ####
251
	#######################
252

    
253
	
254
	cluster_molsurf_file=${cluster_dir}/cluster_molsurf.txt
255
	${DockOnSurf_path}/modules/clustering.py ${cluster_matrix_file} > ${cluster_molsurf_file}
256

    
257
	
258
	#################################################
259
	#### Get the cluster centers (min of energy) ####
260
	#################################################
261

    
262
	
263
	nb_cluster_molsurf="$(awk 'END{print NR}' ${cluster_molsurf_file})"
264
	line_cluster_group2="$(awk '{print NR,$0}' ${cluster_molsurf_file} | grep "Cluster\ groups:" | awk '{print $1}')"
265
	line_first_cluster2="$(expr ${line_cluster_group2} + 1 )"
266

    
267
	for ((a=${line_first_cluster2}; a<=${nb_cluster_molsurf}; a++)); do
268
		awk -v ligne="$a" 'NR==ligne{print}' ${cluster_molsurf_file} > ${cluster_dir}/array
269
		sed -i 's/\[/ /g' ${cluster_dir}/array
270
		sed -i 's/\]/ /g' ${cluster_dir}/array
271
		sed -i 's/^ *//' ${cluster_dir}/array
272
		sed -i 's/  / /g' ${cluster_dir}/array
273
		readarray -d " " cluster_struct < ${cluster_dir}/array
274
	
275
		E_min=0
276
		numero_mol_E_min=t
277

    
278
		for struct_nb in ${cluster_struct[*]} ; do
279
			energie="$(awk 'NR==2{E=$NF} END{print E}' ${cluster_dir}/*_${struct_nb}/last_geo.xyz)"
280
			if (( $(echo $energie '<' $E_min | bc -l) ))
281
			then
282
				E_min=$energie
283
				numero_mol_E_min=$struct_nb
284
			fi
285
		done
286

    
287
		cp -r ${cluster_dir}/${nom_de_la_molecule}*_${numero_mol_E_min} ${mol_dir}/analyse/cluster_centers_energy/.
288
	done
289

    
290
	s=$((s+1))
291
done
292

    
293
for dir in ${mol_dir}/analyse/cluster_centers_energy/${nom_de_la_molecule}* ; do
294
	nb_champs="$(echo $dir | awk -F  "_" '{print NF}')"
295
	nb_limit="$(expr $nb_champs - 2)"
296
	name="$(echo $dir | cut -d '_' -f-$nb_limit)"
297
	mv $dir $name
298
done
299

    
300
echo "Clustering of structures mol+surf has been done"
301

    
302

    
303
###################################################
304
#### Determination of calculations to relaunch ####
305
###################################################
306

    
307
Energy_min=0
308

    
309
for structure in ${mol_dir}/analyse/cluster_centers_energy/*; do
310
	Energy="$(awk 'NR==2{E=$NF} END{print E}' $structure/last_geo.xyz)"
311
	if (( $(echo $Energy '<' $Energy_min | bc -l) ))
312
	then Energy_min=$Energy
313
	fi
314
done
315

    
316
Limit_energy="$(echo $Energy_min + ${cutoff}/27.2 | bc -l)" #limit of energy to relaunch the calculation 0.25eV more than min
317

    
318
mkdir ${mol_dir}/relaunched_calculations
319

    
320
for structure in ${mol_dir}/analyse/cluster_centers_energy/*; do
321
	Energy="$(awk 'NR==2{E=$NF} END{print E}' $structure/last_geo.xyz)"
322
	if (( $(echo $Energy '<' $Limit_energy | bc -l) ))
323
	then cp -r $structure ${mol_dir}/relaunched_calculations/.
324
	fi
325
done
326

    
327

    
328
####################################
329
#### Launching the calculations ####
330
####################################
331

    
332
n="$(find ${mol_dir}/relaunched_calculations/${nom_de_la_molecule}* -type d | wc -l)"
333

    
334
echo $n "clusters centers with energy lower than Emin+$cutoff eV have been found. Relaunching calculation with higher precision for structure refinement."
335

    
336
cp ${mol_dir}/${nom_de_la_molecule}_1/cp2k_gamma.j ${mol_dir}/relaunched_calculations/.
337

    
338
queued_jobs=0
339
for dir in ${mol_dir}/relaunched_calculations/${nom_de_la_molecule}*; do
340
              # takes the restart file of the previous calculation as input file for re-optimization
341
	conf=$(echo $dir | rev | cut -d/ -f1 | rev )
342
              cp ${mol_dir}/$conf/surf_${nom_de_la_molecule}-1.restart ${dir}/surf_${nom_de_la_molecule}.inp
343
              topo_line=$(grep -n \&TOPOLOGY ${dir}/surf_${nom_de_la_molecule}.inp | cut -d: -f1)
344
              sed -i "$topo_line,$(($topo_line+5))d" ${dir}/surf_${nom_de_la_molecule}.inp
345
	# changes the optimization criterions for more precise re-optimization
346
	sed -i 's/MAX_FORCE/#MAX_FORCE/g' ${dir}/surf_${nom_de_la_molecule}.inp
347
	sed -i 's/MAX_DR/#MAX_DR/g' ${dir}/surf_${nom_de_la_molecule}.inp
348
	sed -i 's/RMS_DR/#RMS_DR/g' ${dir}/surf_${nom_de_la_molecule}.inp
349
	sed -i 's/RMS_FORCE/#RMS_FORCE/g' ${dir}/surf_${nom_de_la_molecule}.inp
350
	sed -i 's/EPS_SCF/EPS_SCF\ 1E-07\ #/g' ${dir}/surf_${nom_de_la_molecule}.inp
351
	sed -i 's/STEP_START_VAL/#STEP_START_VAL/g' ${dir}/surf_${nom_de_la_molecule}.inp
352
	# launches the calculation
353
	cd $dir/
354
	while [ "$queued_jobs" -ge $max_qw ]; do
355
		sleep 30
356
		queued_jobs=0
357
		for job in ${jobs_owned[@]} ; do
358
			queued_jobs=$((queued_jobs+$(qstat | grep $job | grep -c " qw ")))
359
		done
360
	done
361
	new_job_id=$(qsub ${mol_dir}/relaunched_calculations/cp2k_gamma.j | awk '{print $3}')
362
  echo "Submitted job $new_job_id"
363
  jobs_owned+=($new_job_id)
364
	queued_jobs=$((queued_jobs+1))
365
	cd -
366
done
367

    
368
### Attente jusqu'à ce que tous les calculs aient fini
369
go_on=true
370
while [ $go_on == true ]; do
371
  all_jobs=`qstat | tail -n+3 | awk '{print $1}'`
372
  for j1 in ${jobs_owned[@]} ; do
373
    for j2 in ${all_jobs[@]}; do
374
      if [ $j1 == $j2 ]; then
375
        sleep 30
376
        continue 3
377
      fi
378
    done
379
  done
380
  echo "loop finished"
381
  go_on=false
382
done
383