Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / script_grandes_molecules+diss.sh @ f43a1b4c

Historique | Voir | Annoter | Télécharger (12,6 ko)

1
#!/bin/bash
2

    
3
##########################
4
#### Arguments input  ####
5
##########################
6

    
7
nom_de_la_molecule=$1
8
atom_nb=$2
9
nb_surf=$3
10
cutoff=$4
11

    
12

    
13
#############################################################
14
#### Creation of the working diretory and initialisation ####
15
#############################################################
16

    
17

    
18
mol_dir=${MolOnSurf_results_path}/${nom_de_la_molecule}
19

    
20
list_errors=()
21

    
22
mkdir ${mol_dir}/analyse
23

    
24

    
25
###############################################################
26
#### Extraction of structures needed from file *-pos-1.xyz ####
27
###############################################################
28

    
29

    
30
num=0
31

    
32
for dir in ${mol_dir}/* ; do
33
	if [ ! -e  $dir/*-pos-1.xyz ]
34
	then list_errors+=($dir)
35
	else
36
		name="$(echo $dir | awk -F/ '{print $NF}')"
37
		mkdir ${mol_dir}/analyse/${name}_$num
38

    
39

    
40
		#######################################
41
		#### Get last geometry of mol+surf #### 
42
		#######################################
43

    
44

    
45
		n_line="$(grep -n "i =" ${dir}/*-pos-1.xyz | awk 'END{print}' | awk 'NF=1{print}' | awk 'sub(".$", "")')"
46
		n_line_1="$(expr $n_line - 1)"
47
		awk -v ligne="$n_line_1" 'NR>=ligne{print}' ${dir}/*-pos-1.xyz > ${mol_dir}/analyse/${name}_$num/last_geo.xyz
48

    
49

    
50
		#######################################################
51
		#### Recenter the mol+surf on one atom without pbc ####
52
		#######################################################
53

    
54
		
55
		${DockOnSurf_path}/modules/recenter_only.py ${mol_dir}/analyse/${name}_$num ${atom_nb}
56
		recentered_temp_file=${mol_dir}/analyse/${name}_$num/recentered_only.xyz
57

    
58

    
59
		#####################################################
60
		#### Get the molecule alone centered on one atom ####
61
		#####################################################
62

    
63
		nb_surf2="$(expr ${nb_surf} + 2 )"
64
		nbligne="$(awk 'END{print NR}' ${recentered_temp_file})"
65
		for ((a=3; a<=nbligne; a++)) ; do
66
			z="$(awk '{print NR,$0}' ${recentered_temp_file}| awk 'NR == nligne {print}' nligne="$a" | awk '{print $1}')"
67
			if (( $(echo "$z > ${nb_surf2}" | bc -l) ))
68
			then awk 'NR == nligne {print}' nligne="$a" ${recentered_temp_file} >> ${mol_dir}/analyse/${name}_$num/temp_file.xyz
69
			fi
70
		done
71

    
72
       		temp_file=${mol_dir}/analyse/${name}_$num/temp_file.xyz
73
		awk 'END{print NR}' ${temp_file} > ${mol_dir}/analyse/${name}_$num/molecule_seule.xyz
74
       		mol_seule_file=${mol_dir}/analyse/${name}_$num/molecule_seule.xyz
75
		echo ' ' >> ${mol_seule_file}
76
		cat ${temp_file} >> ${mol_seule_file}
77
		rm ${temp_file}
78

    
79

    
80
		##############################################################
81
		### Apply pbc conditions to surface only in mol+surf file ####
82
		##############################################################
83

    
84

    
85
		${DockOnSurf_path}/modules/recenter_periodicity.py ${mol_dir}/analyse/${name}_$num ${atom_nb}
86
		recentered_periodicity_file=${mol_dir}/analyse/${name}_$num/recentered_periodicity.xyz
87

    
88
		for ((a=3; a<=nbligne; a++)) ; do
89
			z="$(awk '{print NR,$0}' ${recentered_temp_file}| awk 'NR == nligne {print}' nligne="$a" | awk '{print $1}')"
90
			if (( $(echo "$z <= ${nb_surf2}" | bc -l) ))
91
			then awk 'NR == nligne {print}' nligne="$a" ${recentered_periodicity_file} >> ${mol_dir}/analyse/${name}_$num/temp_file.xyz
92
			fi
93
		done
94

    
95
		awk 'NR <= 2{print}' ${recentered_periodicity_file} > ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz
96
		cat ${mol_dir}/analyse/${name}_$num/temp_file.xyz >> ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz
97
		awk 'NR > 2{print}' ${mol_seule_file} >> ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz
98

    
99
		rm ${mol_dir}/analyse/${name}_$num/temp_file.xyz
100
		rm ${recentered_periodicity_file}
101
		rm ${recentered_temp_file}
102

    
103
		
104
		num=$((num+1))
105

    
106
	fi
107
done
108

    
109
echo "All structures files have been created"
110

    
111

    
112
#################################################################################
113
#### RMSD calculation of molecules without surface to create the RMSD matrix ####
114
#################################################################################
115

    
116

    
117
matrix_file=${mol_dir}/analyse/matrice_RMSD_mol_seule.txt
118

    
119
touch ${matrix_file}
120

    
121
for ((i=0; i<$num; i++)) ; do
122
	echo  >> ${matrix_file}
123
done
124

    
125
for ((i=1; i<=$num; i++)) ; do
126
	for ((j=$i; j<=$num; j++)) ; do
127
		if (( $(echo "$i == $j" | bc -l) ))
128
		then 
129
			add_to_line="$(awk -v ii="$i" 'NR==ii {print}' ${matrix_file})"
130
			add_to_line+=' 0'
131
			sed -i "${i}c $add_to_line" ${matrix_file}
132
		else 
133
			num_i="$(expr $i - 1)"
134
			num_j="$(expr $j - 1)"
135
			RMSD_mol_seule_i_j="$(${DockOnSurf_path}/modules/calculate_rmsd --reorder ${mol_dir}/analyse/${nom_de_la_molecule}*_${num_i}/molecule_seule.xyz ${mol_dir}/analyse/${nom_de_la_molecule}*_${num_j}/molecule_seule.xyz)"
136
			add_to_i_line="$(awk -v ii="$i" 'NR==ii {print}' ${matrix_file})"
137
			add_to_i_line+=' -'
138
			add_to_i_line+=$RMSD_mol_seule_i_j
139
			sed -i "${i}c $add_to_i_line" ${matrix_file}
140
			add_to_j_line="$(awk -v jj="$j" 'NR==jj {print}' ${matrix_file})"
141
			add_to_j_line+=' -'
142
			add_to_j_line+=$RMSD_mol_seule_i_j
143
			sed -i "${j}c $add_to_j_line" ${matrix_file}
144
		fi
145
	done
146
done
147

    
148
echo "RMSD matrix for molecules without surface has been calculated"
149

    
150

    
151
###############################################################
152
#### Clustering of the molecule structures without surface #### 
153
###############################################################
154

    
155

    
156
cluster_molecule_file=${mol_dir}/analyse/cluster_molecule.txt
157

    
158
${DockOnSurf_path}/modules/clustering.py ${matrix_file} > ${cluster_molecule_file}
159

    
160
echo "Clustering of molecules has been done"
161

    
162

    
163
###################################################################################
164
#### Separation of the clusters found for the molecule alone and re-clustering ####
165
###################################################################################
166

    
167

    
168
nb_clusters="$(awk 'END{print NR}' ${cluster_molecule_file})"
169

    
170
sed -i 's/\[/ /g' ${cluster_molecule_file}
171
sed -i 's/\]/ /g' ${cluster_molecule_file}
172

    
173
mkdir ${mol_dir}/analyse/cluster_centers_energy
174

    
175
s=1
176

    
177
line_cluster_group="$(awk '{print NR,$0}' ${cluster_molecule_file} | grep "Cluster\ groups:" | awk '{print $1}')"
178
line_first_cluster="$(expr ${line_cluster_group} + 1 )"
179

    
180
for ((k=${line_first_cluster}; k<=${nb_clusters}; k++)) ; do
181

    
182

    
183
	################################
184
	#### Separation of clusters ####
185
	################################
186

    
187

    
188
	awk -v ligne="$k" 'NR==ligne{print}' ${cluster_molecule_file} > ${mol_dir}/analyse/array
189
	sed -i 's/^ *//' ${mol_dir}/analyse/array
190
	sed -i 's/  / /g' ${mol_dir}/analyse/array
191
	sed -i 's/  / /g' ${mol_dir}/analyse/array
192
	sed -i 's/  / /g' ${mol_dir}/analyse/array
193
	sed -i 's/  / /g' ${mol_dir}/analyse/array
194
	readarray -d " " cluster < ${mol_dir}/analyse/array
195
	
196
	mkdir ${mol_dir}/analyse/cluster_molecule_$s
197
	cluster_dir=${mol_dir}/analyse/cluster_molecule_$s
198
	
199
	for numero in ${cluster[*]} ; do
200
		mv ${mol_dir}/analyse/${nom_de_la_molecule}*_${numero} ${cluster_dir}/.
201
	done
202
	a=0
203
	for dir2 in ${cluster_dir}/${nom_de_la_molecule}* ; do
204
		mv $dir2 ${dir2}_$a
205
		a=$((a+1))
206
	done
207
	
208
	rm ${mol_dir}/analyse/array
209

    
210

    
211
	######################################################################
212
	#### Calculation of RMSD for mol+surf and creation of RMSD matrix ####
213
	######################################################################
214

    
215
	
216
 	cluster_matrix_file=${cluster_dir}/matrice_RMSD_cluster.txt
217
 	touch ${cluster_matrix_file}
218

    
219
	len=${#cluster[@]}
220
	len_cluster="$(expr $len - 1)"
221

    
222
 	for ((i=1; i<=${len_cluster}; i++)) ; do
223
 		echo >> ${cluster_matrix_file}
224
 	done
225

    
226
 	for ((i=1; i<=${len_cluster}; i++)) ; do
227
 		for ((j=$i; j<=${len_cluster}; j++)) ; do
228
 			if (( $(echo "$i == $j" | bc -l) ))
229
 			then
230
 				add_to_line="$(awk -v ii="$i" 'NR==ii {print}' ${cluster_matrix_file})"
231
 				add_to_line+=' 0'
232
 				sed -i "${i}c $add_to_line" ${cluster_matrix_file}
233
 			else
234
				num_i="$(expr $i - 1)"
235
				num_j="$(expr $j - 1)"
236
 				RMSD_mol_surf_i_j="$(${DockOnSurf_path}/modules/calculate_rmsd --reorder ${cluster_dir}/${nom_de_la_molecule}*_${num_i}/recentered_pbc.xyz ${cluster_dir}/${nom_de_la_molecule}*_${num_j}/recentered_pbc.xyz)"
237
 				add_to_i_line="$(awk -v ii="$i" 'NR==ii {print}' ${cluster_matrix_file})"
238
 				add_to_i_line+=' -'
239
 				add_to_i_line+=$RMSD_mol_surf_i_j
240
 				sed -i "${i}c $add_to_i_line" ${cluster_matrix_file}
241
 				add_to_j_line="$(awk -v jj="$j" 'NR==jj {print}' ${cluster_matrix_file})"
242
 				add_to_j_line+=' -'
243
 				add_to_j_line+=$RMSD_mol_surf_i_j
244
 				sed -i "${j}c $add_to_j_line" ${cluster_matrix_file}
245
 			fi
246
 		done
247
 	done
248

    
249

    
250
	#######################
251
	#### Re-clustering ####
252
	#######################
253

    
254
	
255
	cluster_molsurf_file=${cluster_dir}/cluster_molsurf.txt
256
	${DockOnSurf_path}/modules/clustering.py ${cluster_matrix_file} > ${cluster_molsurf_file}
257

    
258
	
259
	#################################################
260
	#### Get the cluster centers (min of energy) ####
261
	#################################################
262

    
263
	
264
	nb_cluster_molsurf="$(awk 'END{print NR}' ${cluster_molsurf_file})"
265
	line_cluster_group2="$(awk '{print NR,$0}' ${cluster_molsurf_file} | grep "Cluster\ groups:" | awk '{print $1}')"
266
	line_first_cluster2="$(expr ${line_cluster_group2} + 1 )"
267

    
268
	for ((a=${line_first_cluster2}; a<=${nb_cluster_molsurf}; a++)); do
269
		awk -v ligne="$a" 'NR==ligne{print}' ${cluster_molsurf_file} > ${cluster_dir}/array
270
		sed -i 's/\[/ /g' ${cluster_dir}/array
271
		sed -i 's/\]/ /g' ${cluster_dir}/array
272
		sed -i 's/^ *//' ${cluster_dir}/array
273
		sed -i 's/  / /g' ${cluster_dir}/array
274
		readarray -d " " cluster_struct < ${cluster_dir}/array
275
	
276
		E_min=0
277
		numero_mol_E_min=t
278

    
279
		for struct_nb in ${cluster_struct[*]} ; do
280
			energie="$(awk 'NR==2{E=$NF} END{print E}' ${cluster_dir}/*_${struct_nb}/last_geo.xyz)"
281
			if (( $(echo $energie '<' $E_min | bc -l) ))
282
			then
283
				E_min=$energie
284
				numero_mol_E_min=$struct_nb
285
			fi
286
		done
287

    
288
		cp -r ${cluster_dir}/${nom_de_la_molecule}*_${numero_mol_E_min} ${mol_dir}/analyse/cluster_centers_energy/.
289
	done
290

    
291
	s=$((s+1))
292
done
293

    
294
for dir in ${mol_dir}/analyse/cluster_centers_energy/${nom_de_la_molecule}* ; do
295
	nb_champs="$(echo $dir | awk -F  "_" '{print NF}')"
296
	nb_limit="$(expr $nb_champs - 2)"
297
	name="$(echo $dir | cut -d '_' -f-$nb_limit)"
298
	mv $dir $name
299
done
300

    
301
echo "Clustering of structures mol+surf has been done"
302

    
303

    
304
###################################################
305
#### Determination of calculations to relaunch ####
306
###################################################
307

    
308
Energy_min=0
309

    
310
for structure in ${mol_dir}/analyse/cluster_centers_energy/*; do
311
	Energy="$(awk 'NR==2{E=$NF} END{print E}' $structure/last_geo.xyz)"
312
	if (( $(echo $Energy '<' $Energy_min | bc -l) ))
313
	then Energy_min=$Energy
314
	fi
315
done
316

    
317
Limit_energy="$(echo $Energy_min + ${cutoff}/27.2 | bc -l)" #limit of energy to relaunch the calculation 0.25eV more than min
318

    
319
mkdir ${mol_dir}/relaunched_calculations
320

    
321
for structure in ${mol_dir}/analyse/cluster_centers_energy/*; do
322
	Energy="$(awk 'NR==2{E=$NF} END{print E}' $structure/last_geo.xyz)"
323
	if (( $(echo $Energy '<' $Limit_energy | bc -l) ))
324
	then cp -r $structure ${mol_dir}/relaunched_calculations/.
325
	fi
326
done
327

    
328

    
329
####################################
330
#### Launching the calculations ####
331
####################################
332

    
333
n="$(find ${mol_dir}/relaunched_calculations/${nom_de_la_molecule}* -type d | wc -l)"
334

    
335
echo $n "clusters centers with energy lower than Emin+$cutoff eV have been found"
336
echo "Relaunch the calculations ?"
337
read answer
338

    
339
if [[ $answer = "yes" ]]
340
then 
341
	cp ${mol_dir}/${nom_de_la_molecule}_1/cp2k_gamma.j ${mol_dir}/relaunched_calculations/.
342

    
343
	for dir in ${mol_dir}/relaunched_calculations/${nom_de_la_molecule}*; do
344
                # takes the restart file of the previous calculation as input file for re-optimization
345
		conf=$(echo $dir | rev | cut -d/ -f1 | rev )
346
                cp ${mol_dir}/$conf/surf_${nom_de_la_molecule}-1.restart ${dir}/surf_${nom_de_la_molecule}.inp
347
                topo_line=$(grep -n \&TOPOLOGY ${dir}/surf_${nom_de_la_molecule}.inp | cut -d: -f1)
348
                sed -i "$topo_line,$(($topo_line+5))d" ${dir}/surf_${nom_de_la_molecule}.inp
349
		# changes the optimization criterions for more precise re-optimization
350
		sed -i 's/MAX_FORCE/#MAX_FORCE/g' ${dir}/surf_${nom_de_la_molecule}.inp
351
		sed -i 's/MAX_DR/#MAX_DR/g' ${dir}/surf_${nom_de_la_molecule}.inp
352
		sed -i 's/RMS_DR/#RMS_DR/g' ${dir}/surf_${nom_de_la_molecule}.inp
353
		sed -i 's/RMS_FORCE/#RMS_FORCE/g' ${dir}/surf_${nom_de_la_molecule}.inp
354
		sed -i 's/EPS_SCF/EPS_SCF\ 1E-07\ #/g' ${dir}/surf_${nom_de_la_molecule}.inp
355
		# launches the calculation
356
		cd $dir/
357
		new_job_id=$(qsub ${mol_dir}/relaunched_calculations/cp2k_gamma.j | awk '{print $3}')
358
    echo "Submitted job $new_job_id"
359
    jobs_owned+=($new_job_id)
360
		cd -
361
	done
362
fi
363

    
364
### Attente jusqu'à ce que tous les calculs aient fini
365
go_on=true
366
while [ $go_on == true ]; do
367
  all_jobs=`qstat | tail -n+3 | awk '{print $1}'`
368
  for j1 in ${jobs_owned[@]} ; do
369
    for j2 in ${all_jobs[@]}; do
370
      if [ $j1 == $j2 ]; then
371
        sleep 30
372
        continue 3
373
      fi
374
    done
375
  done
376
  echo "loop finished"
377
  go_on=false
378
done
379