dockonsurf / modules / script_grandes_molecules+diss.sh @ f43a1b4c
Historique | Voir | Annoter | Télécharger (12,6 ko)
1 |
#!/bin/bash |
---|---|
2 |
|
3 |
########################## |
4 |
#### Arguments input #### |
5 |
########################## |
6 |
|
7 |
nom_de_la_molecule=$1 |
8 |
atom_nb=$2 |
9 |
nb_surf=$3 |
10 |
cutoff=$4 |
11 |
|
12 |
|
13 |
############################################################# |
14 |
#### Creation of the working diretory and initialisation #### |
15 |
############################################################# |
16 |
|
17 |
|
18 |
mol_dir=${MolOnSurf_results_path}/${nom_de_la_molecule} |
19 |
|
20 |
list_errors=() |
21 |
|
22 |
mkdir ${mol_dir}/analyse |
23 |
|
24 |
|
25 |
############################################################### |
26 |
#### Extraction of structures needed from file *-pos-1.xyz #### |
27 |
############################################################### |
28 |
|
29 |
|
30 |
num=0 |
31 |
|
32 |
for dir in ${mol_dir}/* ; do |
33 |
if [ ! -e $dir/*-pos-1.xyz ] |
34 |
then list_errors+=($dir) |
35 |
else |
36 |
name="$(echo $dir | awk -F/ '{print $NF}')" |
37 |
mkdir ${mol_dir}/analyse/${name}_$num |
38 |
|
39 |
|
40 |
####################################### |
41 |
#### Get last geometry of mol+surf #### |
42 |
####################################### |
43 |
|
44 |
|
45 |
n_line="$(grep -n "i =" ${dir}/*-pos-1.xyz | awk 'END{print}' | awk 'NF=1{print}' | awk 'sub(".$", "")')" |
46 |
n_line_1="$(expr $n_line - 1)" |
47 |
awk -v ligne="$n_line_1" 'NR>=ligne{print}' ${dir}/*-pos-1.xyz > ${mol_dir}/analyse/${name}_$num/last_geo.xyz |
48 |
|
49 |
|
50 |
####################################################### |
51 |
#### Recenter the mol+surf on one atom without pbc #### |
52 |
####################################################### |
53 |
|
54 |
|
55 |
${DockOnSurf_path}/modules/recenter_only.py ${mol_dir}/analyse/${name}_$num ${atom_nb} |
56 |
recentered_temp_file=${mol_dir}/analyse/${name}_$num/recentered_only.xyz |
57 |
|
58 |
|
59 |
##################################################### |
60 |
#### Get the molecule alone centered on one atom #### |
61 |
##################################################### |
62 |
|
63 |
nb_surf2="$(expr ${nb_surf} + 2 )" |
64 |
nbligne="$(awk 'END{print NR}' ${recentered_temp_file})" |
65 |
for ((a=3; a<=nbligne; a++)) ; do |
66 |
z="$(awk '{print NR,$0}' ${recentered_temp_file}| awk 'NR == nligne {print}' nligne="$a" | awk '{print $1}')" |
67 |
if (( $(echo "$z > ${nb_surf2}" | bc -l) )) |
68 |
then awk 'NR == nligne {print}' nligne="$a" ${recentered_temp_file} >> ${mol_dir}/analyse/${name}_$num/temp_file.xyz |
69 |
fi |
70 |
done |
71 |
|
72 |
temp_file=${mol_dir}/analyse/${name}_$num/temp_file.xyz |
73 |
awk 'END{print NR}' ${temp_file} > ${mol_dir}/analyse/${name}_$num/molecule_seule.xyz |
74 |
mol_seule_file=${mol_dir}/analyse/${name}_$num/molecule_seule.xyz |
75 |
echo ' ' >> ${mol_seule_file} |
76 |
cat ${temp_file} >> ${mol_seule_file} |
77 |
rm ${temp_file} |
78 |
|
79 |
|
80 |
############################################################## |
81 |
### Apply pbc conditions to surface only in mol+surf file #### |
82 |
############################################################## |
83 |
|
84 |
|
85 |
${DockOnSurf_path}/modules/recenter_periodicity.py ${mol_dir}/analyse/${name}_$num ${atom_nb} |
86 |
recentered_periodicity_file=${mol_dir}/analyse/${name}_$num/recentered_periodicity.xyz |
87 |
|
88 |
for ((a=3; a<=nbligne; a++)) ; do |
89 |
z="$(awk '{print NR,$0}' ${recentered_temp_file}| awk 'NR == nligne {print}' nligne="$a" | awk '{print $1}')" |
90 |
if (( $(echo "$z <= ${nb_surf2}" | bc -l) )) |
91 |
then awk 'NR == nligne {print}' nligne="$a" ${recentered_periodicity_file} >> ${mol_dir}/analyse/${name}_$num/temp_file.xyz |
92 |
fi |
93 |
done |
94 |
|
95 |
awk 'NR <= 2{print}' ${recentered_periodicity_file} > ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz |
96 |
cat ${mol_dir}/analyse/${name}_$num/temp_file.xyz >> ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz |
97 |
awk 'NR > 2{print}' ${mol_seule_file} >> ${mol_dir}/analyse/${name}_$num/recentered_pbc.xyz |
98 |
|
99 |
rm ${mol_dir}/analyse/${name}_$num/temp_file.xyz |
100 |
rm ${recentered_periodicity_file} |
101 |
rm ${recentered_temp_file} |
102 |
|
103 |
|
104 |
num=$((num+1)) |
105 |
|
106 |
fi |
107 |
done |
108 |
|
109 |
echo "All structures files have been created" |
110 |
|
111 |
|
112 |
################################################################################# |
113 |
#### RMSD calculation of molecules without surface to create the RMSD matrix #### |
114 |
################################################################################# |
115 |
|
116 |
|
117 |
matrix_file=${mol_dir}/analyse/matrice_RMSD_mol_seule.txt |
118 |
|
119 |
touch ${matrix_file} |
120 |
|
121 |
for ((i=0; i<$num; i++)) ; do |
122 |
echo >> ${matrix_file} |
123 |
done |
124 |
|
125 |
for ((i=1; i<=$num; i++)) ; do |
126 |
for ((j=$i; j<=$num; j++)) ; do |
127 |
if (( $(echo "$i == $j" | bc -l) )) |
128 |
then |
129 |
add_to_line="$(awk -v ii="$i" 'NR==ii {print}' ${matrix_file})" |
130 |
add_to_line+=' 0' |
131 |
sed -i "${i}c $add_to_line" ${matrix_file} |
132 |
else |
133 |
num_i="$(expr $i - 1)" |
134 |
num_j="$(expr $j - 1)" |
135 |
RMSD_mol_seule_i_j="$(${DockOnSurf_path}/modules/calculate_rmsd --reorder ${mol_dir}/analyse/${nom_de_la_molecule}*_${num_i}/molecule_seule.xyz ${mol_dir}/analyse/${nom_de_la_molecule}*_${num_j}/molecule_seule.xyz)" |
136 |
add_to_i_line="$(awk -v ii="$i" 'NR==ii {print}' ${matrix_file})" |
137 |
add_to_i_line+=' -' |
138 |
add_to_i_line+=$RMSD_mol_seule_i_j |
139 |
sed -i "${i}c $add_to_i_line" ${matrix_file} |
140 |
add_to_j_line="$(awk -v jj="$j" 'NR==jj {print}' ${matrix_file})" |
141 |
add_to_j_line+=' -' |
142 |
add_to_j_line+=$RMSD_mol_seule_i_j |
143 |
sed -i "${j}c $add_to_j_line" ${matrix_file} |
144 |
fi |
145 |
done |
146 |
done |
147 |
|
148 |
echo "RMSD matrix for molecules without surface has been calculated" |
149 |
|
150 |
|
151 |
############################################################### |
152 |
#### Clustering of the molecule structures without surface #### |
153 |
############################################################### |
154 |
|
155 |
|
156 |
cluster_molecule_file=${mol_dir}/analyse/cluster_molecule.txt |
157 |
|
158 |
${DockOnSurf_path}/modules/clustering.py ${matrix_file} > ${cluster_molecule_file} |
159 |
|
160 |
echo "Clustering of molecules has been done" |
161 |
|
162 |
|
163 |
################################################################################### |
164 |
#### Separation of the clusters found for the molecule alone and re-clustering #### |
165 |
################################################################################### |
166 |
|
167 |
|
168 |
nb_clusters="$(awk 'END{print NR}' ${cluster_molecule_file})" |
169 |
|
170 |
sed -i 's/\[/ /g' ${cluster_molecule_file} |
171 |
sed -i 's/\]/ /g' ${cluster_molecule_file} |
172 |
|
173 |
mkdir ${mol_dir}/analyse/cluster_centers_energy |
174 |
|
175 |
s=1 |
176 |
|
177 |
line_cluster_group="$(awk '{print NR,$0}' ${cluster_molecule_file} | grep "Cluster\ groups:" | awk '{print $1}')" |
178 |
line_first_cluster="$(expr ${line_cluster_group} + 1 )" |
179 |
|
180 |
for ((k=${line_first_cluster}; k<=${nb_clusters}; k++)) ; do |
181 |
|
182 |
|
183 |
################################ |
184 |
#### Separation of clusters #### |
185 |
################################ |
186 |
|
187 |
|
188 |
awk -v ligne="$k" 'NR==ligne{print}' ${cluster_molecule_file} > ${mol_dir}/analyse/array |
189 |
sed -i 's/^ *//' ${mol_dir}/analyse/array |
190 |
sed -i 's/ / /g' ${mol_dir}/analyse/array |
191 |
sed -i 's/ / /g' ${mol_dir}/analyse/array |
192 |
sed -i 's/ / /g' ${mol_dir}/analyse/array |
193 |
sed -i 's/ / /g' ${mol_dir}/analyse/array |
194 |
readarray -d " " cluster < ${mol_dir}/analyse/array |
195 |
|
196 |
mkdir ${mol_dir}/analyse/cluster_molecule_$s |
197 |
cluster_dir=${mol_dir}/analyse/cluster_molecule_$s |
198 |
|
199 |
for numero in ${cluster[*]} ; do |
200 |
mv ${mol_dir}/analyse/${nom_de_la_molecule}*_${numero} ${cluster_dir}/. |
201 |
done |
202 |
a=0 |
203 |
for dir2 in ${cluster_dir}/${nom_de_la_molecule}* ; do |
204 |
mv $dir2 ${dir2}_$a |
205 |
a=$((a+1)) |
206 |
done |
207 |
|
208 |
rm ${mol_dir}/analyse/array |
209 |
|
210 |
|
211 |
###################################################################### |
212 |
#### Calculation of RMSD for mol+surf and creation of RMSD matrix #### |
213 |
###################################################################### |
214 |
|
215 |
|
216 |
cluster_matrix_file=${cluster_dir}/matrice_RMSD_cluster.txt |
217 |
touch ${cluster_matrix_file} |
218 |
|
219 |
len=${#cluster[@]} |
220 |
len_cluster="$(expr $len - 1)" |
221 |
|
222 |
for ((i=1; i<=${len_cluster}; i++)) ; do |
223 |
echo >> ${cluster_matrix_file} |
224 |
done |
225 |
|
226 |
for ((i=1; i<=${len_cluster}; i++)) ; do |
227 |
for ((j=$i; j<=${len_cluster}; j++)) ; do |
228 |
if (( $(echo "$i == $j" | bc -l) )) |
229 |
then |
230 |
add_to_line="$(awk -v ii="$i" 'NR==ii {print}' ${cluster_matrix_file})" |
231 |
add_to_line+=' 0' |
232 |
sed -i "${i}c $add_to_line" ${cluster_matrix_file} |
233 |
else |
234 |
num_i="$(expr $i - 1)" |
235 |
num_j="$(expr $j - 1)" |
236 |
RMSD_mol_surf_i_j="$(${DockOnSurf_path}/modules/calculate_rmsd --reorder ${cluster_dir}/${nom_de_la_molecule}*_${num_i}/recentered_pbc.xyz ${cluster_dir}/${nom_de_la_molecule}*_${num_j}/recentered_pbc.xyz)" |
237 |
add_to_i_line="$(awk -v ii="$i" 'NR==ii {print}' ${cluster_matrix_file})" |
238 |
add_to_i_line+=' -' |
239 |
add_to_i_line+=$RMSD_mol_surf_i_j |
240 |
sed -i "${i}c $add_to_i_line" ${cluster_matrix_file} |
241 |
add_to_j_line="$(awk -v jj="$j" 'NR==jj {print}' ${cluster_matrix_file})" |
242 |
add_to_j_line+=' -' |
243 |
add_to_j_line+=$RMSD_mol_surf_i_j |
244 |
sed -i "${j}c $add_to_j_line" ${cluster_matrix_file} |
245 |
fi |
246 |
done |
247 |
done |
248 |
|
249 |
|
250 |
####################### |
251 |
#### Re-clustering #### |
252 |
####################### |
253 |
|
254 |
|
255 |
cluster_molsurf_file=${cluster_dir}/cluster_molsurf.txt |
256 |
${DockOnSurf_path}/modules/clustering.py ${cluster_matrix_file} > ${cluster_molsurf_file} |
257 |
|
258 |
|
259 |
################################################# |
260 |
#### Get the cluster centers (min of energy) #### |
261 |
################################################# |
262 |
|
263 |
|
264 |
nb_cluster_molsurf="$(awk 'END{print NR}' ${cluster_molsurf_file})" |
265 |
line_cluster_group2="$(awk '{print NR,$0}' ${cluster_molsurf_file} | grep "Cluster\ groups:" | awk '{print $1}')" |
266 |
line_first_cluster2="$(expr ${line_cluster_group2} + 1 )" |
267 |
|
268 |
for ((a=${line_first_cluster2}; a<=${nb_cluster_molsurf}; a++)); do |
269 |
awk -v ligne="$a" 'NR==ligne{print}' ${cluster_molsurf_file} > ${cluster_dir}/array |
270 |
sed -i 's/\[/ /g' ${cluster_dir}/array |
271 |
sed -i 's/\]/ /g' ${cluster_dir}/array |
272 |
sed -i 's/^ *//' ${cluster_dir}/array |
273 |
sed -i 's/ / /g' ${cluster_dir}/array |
274 |
readarray -d " " cluster_struct < ${cluster_dir}/array |
275 |
|
276 |
E_min=0 |
277 |
numero_mol_E_min=t |
278 |
|
279 |
for struct_nb in ${cluster_struct[*]} ; do |
280 |
energie="$(awk 'NR==2{E=$NF} END{print E}' ${cluster_dir}/*_${struct_nb}/last_geo.xyz)" |
281 |
if (( $(echo $energie '<' $E_min | bc -l) )) |
282 |
then |
283 |
E_min=$energie |
284 |
numero_mol_E_min=$struct_nb |
285 |
fi |
286 |
done |
287 |
|
288 |
cp -r ${cluster_dir}/${nom_de_la_molecule}*_${numero_mol_E_min} ${mol_dir}/analyse/cluster_centers_energy/. |
289 |
done |
290 |
|
291 |
s=$((s+1)) |
292 |
done |
293 |
|
294 |
for dir in ${mol_dir}/analyse/cluster_centers_energy/${nom_de_la_molecule}* ; do |
295 |
nb_champs="$(echo $dir | awk -F "_" '{print NF}')" |
296 |
nb_limit="$(expr $nb_champs - 2)" |
297 |
name="$(echo $dir | cut -d '_' -f-$nb_limit)" |
298 |
mv $dir $name |
299 |
done |
300 |
|
301 |
echo "Clustering of structures mol+surf has been done" |
302 |
|
303 |
|
304 |
################################################### |
305 |
#### Determination of calculations to relaunch #### |
306 |
################################################### |
307 |
|
308 |
Energy_min=0 |
309 |
|
310 |
for structure in ${mol_dir}/analyse/cluster_centers_energy/*; do |
311 |
Energy="$(awk 'NR==2{E=$NF} END{print E}' $structure/last_geo.xyz)" |
312 |
if (( $(echo $Energy '<' $Energy_min | bc -l) )) |
313 |
then Energy_min=$Energy |
314 |
fi |
315 |
done |
316 |
|
317 |
Limit_energy="$(echo $Energy_min + ${cutoff}/27.2 | bc -l)" #limit of energy to relaunch the calculation 0.25eV more than min |
318 |
|
319 |
mkdir ${mol_dir}/relaunched_calculations |
320 |
|
321 |
for structure in ${mol_dir}/analyse/cluster_centers_energy/*; do |
322 |
Energy="$(awk 'NR==2{E=$NF} END{print E}' $structure/last_geo.xyz)" |
323 |
if (( $(echo $Energy '<' $Limit_energy | bc -l) )) |
324 |
then cp -r $structure ${mol_dir}/relaunched_calculations/. |
325 |
fi |
326 |
done |
327 |
|
328 |
|
329 |
#################################### |
330 |
#### Launching the calculations #### |
331 |
#################################### |
332 |
|
333 |
n="$(find ${mol_dir}/relaunched_calculations/${nom_de_la_molecule}* -type d | wc -l)" |
334 |
|
335 |
echo $n "clusters centers with energy lower than Emin+$cutoff eV have been found" |
336 |
echo "Relaunch the calculations ?" |
337 |
read answer |
338 |
|
339 |
if [[ $answer = "yes" ]] |
340 |
then |
341 |
cp ${mol_dir}/${nom_de_la_molecule}_1/cp2k_gamma.j ${mol_dir}/relaunched_calculations/. |
342 |
|
343 |
for dir in ${mol_dir}/relaunched_calculations/${nom_de_la_molecule}*; do |
344 |
# takes the restart file of the previous calculation as input file for re-optimization |
345 |
conf=$(echo $dir | rev | cut -d/ -f1 | rev ) |
346 |
cp ${mol_dir}/$conf/surf_${nom_de_la_molecule}-1.restart ${dir}/surf_${nom_de_la_molecule}.inp |
347 |
topo_line=$(grep -n \&TOPOLOGY ${dir}/surf_${nom_de_la_molecule}.inp | cut -d: -f1) |
348 |
sed -i "$topo_line,$(($topo_line+5))d" ${dir}/surf_${nom_de_la_molecule}.inp |
349 |
# changes the optimization criterions for more precise re-optimization |
350 |
sed -i 's/MAX_FORCE/#MAX_FORCE/g' ${dir}/surf_${nom_de_la_molecule}.inp |
351 |
sed -i 's/MAX_DR/#MAX_DR/g' ${dir}/surf_${nom_de_la_molecule}.inp |
352 |
sed -i 's/RMS_DR/#RMS_DR/g' ${dir}/surf_${nom_de_la_molecule}.inp |
353 |
sed -i 's/RMS_FORCE/#RMS_FORCE/g' ${dir}/surf_${nom_de_la_molecule}.inp |
354 |
sed -i 's/EPS_SCF/EPS_SCF\ 1E-07\ #/g' ${dir}/surf_${nom_de_la_molecule}.inp |
355 |
# launches the calculation |
356 |
cd $dir/ |
357 |
new_job_id=$(qsub ${mol_dir}/relaunched_calculations/cp2k_gamma.j | awk '{print $3}') |
358 |
echo "Submitted job $new_job_id" |
359 |
jobs_owned+=($new_job_id) |
360 |
cd - |
361 |
done |
362 |
fi |
363 |
|
364 |
### Attente jusqu'à ce que tous les calculs aient fini |
365 |
go_on=true |
366 |
while [ $go_on == true ]; do |
367 |
all_jobs=`qstat | tail -n+3 | awk '{print $1}'` |
368 |
for j1 in ${jobs_owned[@]} ; do |
369 |
for j2 in ${all_jobs[@]}; do |
370 |
if [ $j1 == $j2 ]; then |
371 |
sleep 30 |
372 |
continue 3 |
373 |
fi |
374 |
done |
375 |
done |
376 |
echo "loop finished" |
377 |
go_on=false |
378 |
done |
379 |
|