Révision 86112fec
b/DockOnSurf_path | ||
---|---|---|
1 |
##################################################### |
|
2 |
# Please change here the paths to your directories: # |
|
3 |
##################################################### |
|
4 |
|
|
5 |
|
|
6 |
# Directory of the DockOnSurf script: |
|
7 |
export DockOnSurf_path="/home/sblanck/DockOnSurf_hematite" |
|
8 |
|
|
9 |
# Directory where you want to get the results for the molecule alone: |
|
10 |
# The script will automatically create a subdirectory with the name of the studied molecule |
|
11 |
export Molecule_results_path="/home/sblanck/cp2k/hematite_molecules" |
|
12 |
|
|
13 |
# Directory where you want to get the results for the asorption structures: |
|
14 |
# The script will automatically create a subdirectory with the name of the studied molecule |
|
15 |
export MolOnSurf_results_path="/home/sblanck/cp2k/hematite" |
|
16 |
|
|
17 |
# Directory where your surface xyz files are stored: |
|
18 |
export Surface_path="/home/sblanck/DockOnSurf_hematite/surfaces" |
|
19 |
|
|
20 |
# Directory where your molecule mol files are stored: |
|
21 |
export Molecule_path="/home/sblanck/DockOnSurf_hematite/molecules" |
|
22 |
|
|
23 |
# Input file for CP2K calculation for molecule alone: |
|
24 |
# Please check that the PROJECT_NAME is "molecule" |
|
25 |
export CP2K_input_molecule="/home/sblanck/DockOnSurf_hematite/CP2K_input/molecule.inp" |
|
26 |
|
|
27 |
# Input file for CP2K calculation for molecule on the surface: |
|
28 |
# Please check that the PROJECT_NAME is "surf_molecule" |
|
29 |
export CP2K_input_MolOnSurf="/home/sblanck/DockOnSurf_hematite/CP2K_input/adsorption.inp" |
|
30 |
|
|
31 |
# Submission script file for CP2K calculations: |
|
32 |
# Please check that the name of the Job is "XXXX" |
|
33 |
export CP2K_sub="/home/sblanck/DockOnSurf_hematite/CP2K_input/gammakpoint.j" |
b/global_script.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
user="$(pwd | awk -F"/" '{print $3}')" |
|
4 |
|
|
5 |
file_path="$(find /home/${user} -name "DockOnSurf_path")" |
|
6 |
|
|
7 |
### Mise en place de l'arborescence |
|
8 |
|
|
9 |
source ${file_path} |
|
10 |
|
|
11 |
### Récupération des vecteurs de cellule |
|
12 |
|
|
13 |
a1="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $2}')" |
|
14 |
if [[ $a1 =~ "E+00" ]]; then a1="$(echo $a1 | cut -d "E" -f1)"; fi ; export a1 |
|
15 |
a2="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $3}')" |
|
16 |
if [[ $a2 =~ "E+00" ]]; then a2="$(echo $a2 | cut -d "E" -f1)"; fi ; export a2 |
|
17 |
a3="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $4}')" |
|
18 |
if [[ $a3 =~ "E+00" ]]; then a3="$(echo $a3 | cut -d "E" -f1)"; fi ; export a3 |
|
19 |
b1="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $2}')" |
|
20 |
if [[ $b1 =~ "E+00" ]]; then b1="$(echo $b1 | cut -d "E" -f1)"; fi ; export b1 |
|
21 |
b2="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $3}')" |
|
22 |
if [[ $b2 =~ "E+00" ]]; then b2="$(echo $b2 | cut -d "E" -f1)"; fi ; export b2 |
|
23 |
b3="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $4}')" |
|
24 |
if [[ $b3 =~ "E+00" ]]; then b3="$(echo $b3 | cut -d "E" -f1)"; fi ; export b3 |
|
25 |
c1="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $2}')" |
|
26 |
if [[ $c1 =~ "E+00" ]]; then c1="$(echo $c1 | cut -d "E" -f1)"; fi ; export c1 |
|
27 |
c2="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $3}')" |
|
28 |
if [[ $c2 =~ "E+00" ]]; then c2="$(echo $c2 | cut -d "E" -f1)"; fi ; export c2 |
|
29 |
c3="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $4}')" |
|
30 |
if [[ $c3 =~ "E+00" ]]; then c3="$(echo $c3 | cut -d "E" -f1)"; fi ; export c3 |
|
31 |
|
|
32 |
### Paramètres initiaux |
|
33 |
|
|
34 |
echo 'Name of the molecule (without .mol) ' |
|
35 |
read molecule |
|
36 |
|
|
37 |
echo '' |
|
38 |
|
|
39 |
module load rdkit |
|
40 |
|
|
41 |
### Generation des conformeres |
|
42 |
|
|
43 |
${DockOnSurf_path}/modules/generation_conformeres.py $molecule 5000 |
|
44 |
echo ' -- CONFORMERS HAVE BEEN GENERATED --' |
|
45 |
|
|
46 |
### Conversion des fichiers .mol en .xyz |
|
47 |
|
|
48 |
${DockOnSurf_path}/modules/mol_to_xyz.sh $molecule |
|
49 |
|
|
50 |
### Lancement des calculs pour les molecules seules |
|
51 |
|
|
52 |
${DockOnSurf_path}/modules/launch_cp2k_molecule_seule.sh $molecule |
|
53 |
|
|
54 |
### Attente jusqu'à ce que tous les calculs aient fini |
|
55 |
|
|
56 |
mol="$(echo $molecule | cut -c1-10)" |
|
57 |
while [ "$(qstat | grep $mol | wc -l)" != 0 ]; |
|
58 |
do |
|
59 |
sleep 300 |
|
60 |
done |
|
61 |
|
|
62 |
echo ' -- ALL CALCULATIONS FOR MOLECULE ALONE HAVE BEEN DONE --' |
|
63 |
|
|
64 |
### Suppression des fichiers inutiles |
|
65 |
|
|
66 |
rm -r ${Molecule_results_path}/${molecule}/*/*RESTART* |
|
67 |
rm -r ${Molecule_results_path}/${molecule}/*/*restart* |
|
68 |
rm -r ${Molecule_results_path}/${molecule}/*/*BFGS* |
|
69 |
rm -r ${Molecule_results_path}/${molecule}/*/*.p* |
|
70 |
|
|
71 |
echo ' ' |
|
72 |
echo ' -- RESULTS --' |
|
73 |
|
|
74 |
min_energy_molecule=0 |
|
75 |
for struct in ${Molecule_results_path}/${molecule}/${molecule}*; do |
|
76 |
energy_molecule="$(awk '/ENERGY/{E=$NF} END{print E}' ${struct}/*${molecule}.out)" |
|
77 |
if [[ "${energy_molecule}" > "${min_energy_molecule}" ]] |
|
78 |
then |
|
79 |
min_energy_molecule=${energy_molecule} |
|
80 |
numero_mol_stable="$(echo $struct | awk -F'/' '{print $NF}')" |
|
81 |
fi |
|
82 |
done |
|
83 |
echo " Most stable structure for molecule alone: ${numero_mol_stable} with energy ${min_energy_molecule} Ha" |
|
84 |
|
|
85 |
echo ' ' |
|
86 |
echo ' -- ARGUMENTS FOR ADSORPTION -- ' |
|
87 |
echo ' ' |
|
88 |
echo 'Name of the file to use for the surface (without .xyz) ' |
|
89 |
read surface |
|
90 |
echo 'Number of the central atom of the molecule' |
|
91 |
read center |
|
92 |
read -a list_atom_1_mol -p 'List of atoms of the molecule to adsorb ' |
|
93 |
read -a list_atom_2_mol -p 'List of neighboring atoms of the molecule (linked to the atoms to adsorb) ' |
|
94 |
nombre_atom_mol=${#list_atom_1_mol[@]} |
|
95 |
if [ ${#list_atom_2_mol[@]} != ${nombre_atom_mol} ] |
|
96 |
then |
|
97 |
echo " Problem: number of atoms different in the two lists" |
|
98 |
exit |
|
99 |
fi |
|
100 |
read -a list_atom_surf -p 'List of the atoms of the surface that can be adsorption sites ' |
|
101 |
echo 'Value of cutoff energy for reoptimisation (eV)' |
|
102 |
read cutoff |
|
103 |
echo '' |
|
104 |
|
|
105 |
### Adsorption de la molecule sur la surface |
|
106 |
|
|
107 |
${DockOnSurf_path}/modules/script_add_adsorbate+diss.sh $molecule $surface "${list_atom_1_mol[*]}" "${list_atom_2_mol[*]}" "${list_atom_surf[*]}" |
|
108 |
|
|
109 |
echo ' -- ADSORPTION SUTRUCTURES HAVE BEEN CALCULATED --' |
|
110 |
|
|
111 |
# Changes labels of elements to its original definition, thus allowing different kinds of same element (Fe1 Fe2) with different properties (e.g. spin state in compounds with magnetic coupling) |
|
112 |
|
|
113 |
${DockOnSurf_path}/modules/fe_change.sh $molecule $surface ${MolOnSurf_results_path}/${molecule} |
|
114 |
|
|
115 |
### Lancement des calculs pour les molecules adsorbees |
|
116 |
|
|
117 |
${DockOnSurf_path}/modules/launch_script+diss.sh $molecule |
|
118 |
|
|
119 |
### Attente jusqu'à ce que tous les calculs aient fini |
|
120 |
|
|
121 |
mol2="$(echo $molecule | cut -c1-5)" |
|
122 |
while [ "$(qstat | grep surf_${mol2} | wc -l)" != 0 ]; |
|
123 |
do |
|
124 |
sleep 300 |
|
125 |
done |
|
126 |
|
|
127 |
echo ' -- ALL CALCULATIONS FOR ADSORPTION STRUCTURES HAVE BEEN DONE --' |
|
128 |
|
|
129 |
### Suppression des fichiers inutiles |
|
130 |
|
|
131 |
rm -r ${MolOnSurf_results_path}/${molecule}/*/*RESTART* |
|
132 |
rm -r ${MolOnSurf_results_path}/${molecule}/*/*BFGS* |
|
133 |
rm -r ${MolOnSurf_results_path}/${molecule}/*/*.p* |
|
134 |
|
|
135 |
### Clustering et lancement des structures centres des clusters |
|
136 |
|
|
137 |
nb_at_surf="$(awk 'NR==1{print}' ${Surface_path}/${surface}.xyz)" |
|
138 |
num_center="$(expr ${nb_at_surf} + $center )" |
|
139 |
|
|
140 |
${DockOnSurf_path}/modules/script_grandes_molecules+diss.sh $molecule $num_center ${nb_at_surf} ${cutoff} |
|
141 |
|
|
142 |
echo ' -- CLUSTERING HAS BEEN DONE AND CLUSTER CENTERS CALCULATIONS HAVE BEEN LAUNCHED --' |
|
143 |
|
|
144 |
### Attente jusqu'à ce que tous les calculs aient fini |
|
145 |
|
|
146 |
while [ "$(qstat | grep surf_${mol2} | wc -l)" != 0 ]; |
|
147 |
do |
|
148 |
sleep 300 |
|
149 |
done |
|
150 |
|
|
151 |
echo ' -- ALL CLUSTER CENTERS CALCULATIONS HAVE BEEN DONE --' |
|
152 |
|
|
153 |
|
|
154 |
echo " " |
|
155 |
echo ' -- RESULTS --' |
|
156 |
|
|
157 |
echo ' ' |
|
158 |
echo "Most stable structure for molecule alone: ${numero_mol_stable} with energy ${min_energy_molecule} Ha" |
|
159 |
|
|
160 |
min_energy_global=0 |
|
161 |
for global_struct in ${MolOnSurf_results_path}/${molecule}/relaunched_calculations/* ; do |
|
162 |
if [[ "$global_struct" != *.inp ]] |
|
163 |
then |
|
164 |
if [[ "$global_struct" != *.j ]] |
|
165 |
then |
|
166 |
energy_global="$(awk '/ENERGY/{E=$NF} END{print E}' ${global_struct}/*${molecule}.out)" |
|
167 |
if [[ "${energy_global}" > "${min_energy_global}" ]] |
|
168 |
then |
|
169 |
min_energy_global=${energy_global} |
|
170 |
numero_global_stable="$(echo ${global_struct} | awk -F'/' '{print $NF}')" |
|
171 |
fi |
|
172 |
fi |
|
173 |
fi |
|
174 |
done |
|
175 |
|
|
176 |
echo "Most stable structure for adsorption: ${numero_global_stable} with energy ${min_energy_global} Ha" |
|
177 |
|
|
178 |
|
b/modules/add_adsorbate_euler+dissociation.py | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# -*- coding: utf-8 -*- |
|
3 |
|
|
4 |
import sys |
|
5 |
import numpy as np |
|
6 |
import ase |
|
7 |
from ase import io |
|
8 |
from ase.io import read |
|
9 |
from ase.io import write |
|
10 |
from ase import Atom |
|
11 |
from ase import build |
|
12 |
from ase.build import add_adsorbate |
|
13 |
from ase import constraints |
|
14 |
from ase.constraints import FixAtoms |
|
15 |
from ase import neighborlist |
|
16 |
from ase.neighborlist import NeighborList |
|
17 |
from ase import utils |
|
18 |
import ase_Fe1_Fe2 |
|
19 |
|
|
20 |
if len(sys.argv) < 7 : |
|
21 |
print "This script has to be used with at least 6 arguments." |
|
22 |
print "1. Name of the file containing the surface" |
|
23 |
print "2. Atom number of the atom of the surface where the adsorbate will be adsorbed" |
|
24 |
print "3. Name of the file containing the molecule to be adsorbed" |
|
25 |
print "4. Atom number of the atom of the molecule that will be adsorbed of the surface" |
|
26 |
print "5. Distance between the molecule and the surface" |
|
27 |
print "6. Name of the output file" |
|
28 |
print "Other arguments can be added before the name of the output file :" |
|
29 |
print " Second atom of the molecule" |
|
30 |
print " phi angle (rotation around z)" |
|
31 |
print " theta angle (rotation around new x)" |
|
32 |
print " psi angle (rotation around new z) " |
|
33 |
sys.exit(1) |
|
34 |
|
|
35 |
|
|
36 |
############################################################# |
|
37 |
# Définition de la fonction permettant de calculer les angles |
|
38 |
############################################################# |
|
39 |
|
|
40 |
def get_proper_angle(v1, v2, degrees=True): |
|
41 |
norm_dot = np.dot(v1/np.linalg.norm(v1), v2/np.linalg.norm(v2)) |
|
42 |
angle = np.arccos(np.sign(norm_dot) if np.abs(norm_dot) >= 1 else norm_dot) |
|
43 |
return(angle*180/np.pi if degrees else angle) |
|
44 |
|
|
45 |
|
|
46 |
########################################## |
|
47 |
# Lecture des différents arguments de base |
|
48 |
########################################## |
|
49 |
|
|
50 |
surface_file = sys.argv[1] |
|
51 |
atom_surface = int(sys.argv[2]) |
|
52 |
molecule_file = sys.argv[3] |
|
53 |
atom_molecule = int(sys.argv[4]) |
|
54 |
distance = int(sys.argv[5]) |
|
55 |
if len(sys.argv) < 8 : |
|
56 |
output = sys.argv[6] |
|
57 |
|
|
58 |
surface = read(surface_file) |
|
59 |
molecule = read(molecule_file) |
|
60 |
|
|
61 |
################################# |
|
62 |
# Lecture des positions de atomes |
|
63 |
################################# |
|
64 |
|
|
65 |
atom_surf_1 = surface[atom_surface].position |
|
66 |
x_atom_surf_1 = atom_surf_1[0] |
|
67 |
y_atom_surf_1 = atom_surf_1[1] |
|
68 |
z_atom_surf_1 = atom_surf_1[2] |
|
69 |
|
|
70 |
atom_mol_1 = molecule[atom_molecule].position |
|
71 |
x_atom_mol_1 = atom_mol_1[0] |
|
72 |
y_atom_mol_1 = atom_mol_1[1] |
|
73 |
z_atom_mol_1 = atom_mol_1[2] |
|
74 |
|
|
75 |
nb_atom_mol = int(len(molecule)) |
|
76 |
|
|
77 |
|
|
78 |
####################################################### |
|
79 |
# Cas avec un plus grand nombre d'arguments (rotations) |
|
80 |
####################################################### |
|
81 |
|
|
82 |
|
|
83 |
if len(sys.argv) > 7 : |
|
84 |
|
|
85 |
####################### |
|
86 |
# Lecture des arguments |
|
87 |
####################### |
|
88 |
|
|
89 |
atom_molecule_2 = int(sys.argv[6]) |
|
90 |
phi = int(sys.argv[7]) |
|
91 |
theta = int(sys.argv[8]) |
|
92 |
psi = int(sys.argv[9]) |
|
93 |
output = sys.argv[10] |
|
94 |
|
|
95 |
theta_min = theta - 0.01 |
|
96 |
theta_max = theta + 0.01 |
|
97 |
phi_min = phi - 0.01 |
|
98 |
phi_max = phi + 0.01 |
|
99 |
psi_min = psi - 0.01 |
|
100 |
psi_max = psi + 0.01 |
|
101 |
|
|
102 |
atom_mol_2 = molecule[atom_molecule_2].position |
|
103 |
x_atom_mol_2 = atom_mol_2[0] |
|
104 |
y_atom_mol_2 = atom_mol_2[1] |
|
105 |
z_atom_mol_2 = atom_mol_2[2] |
|
106 |
|
|
107 |
################################## |
|
108 |
# Rotation avec les angles d'Euler |
|
109 |
################################## |
|
110 |
|
|
111 |
#Initialisation : placement du second atome de la molecule sur Oy |
|
112 |
vect_y = [0, 1, 0] |
|
113 |
vect_atom_mol_2 = molecule[atom_molecule_2].position - molecule[atom_molecule].position |
|
114 |
angle_1 = get_proper_angle(vect_y, vect_atom_mol_2) |
|
115 |
if (angle_1 != 0) : |
|
116 |
vect_normal = np.cross(vect_y, vect_atom_mol_2) |
|
117 |
molecule.rotate(-angle_1, vect_normal, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1)) |
|
118 |
vect_atom_mol_2_verif = molecule[atom_molecule_2].position - molecule[atom_molecule].position |
|
119 |
angle_1_verif = get_proper_angle(vect_atom_mol_2_verif, vect_y) |
|
120 |
angle_max = 0.01 |
|
121 |
angle_min = -0.01 |
|
122 |
if (angle_1_verif < angle_min or angle_1_verif > angle_max) : |
|
123 |
print 'Error in initialisation' |
|
124 |
|
|
125 |
#Rotation Euler |
|
126 |
molecule.euler_rotate(phi, theta, psi, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1)) |
|
127 |
|
|
128 |
#Correction des collisions entre la molécule et la surface |
|
129 |
z_atom_surf_max = z_atom_surf_1 + 1 |
|
130 |
|
|
131 |
collision = 0.5 |
|
132 |
rotation_tot = 0 |
|
133 |
nb_atom_collision_min = 0 |
|
134 |
rotation_opt = 0 |
|
135 |
rotation_modif = 5 |
|
136 |
|
|
137 |
for i in range(0,nb_atom_mol) : |
|
138 |
atom_test = molecule[i].position |
|
139 |
z_atom_test = atom_test[2] |
|
140 |
z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance |
|
141 |
if (z_atom_test_final < z_atom_surf_max) : |
|
142 |
collision = 1 |
|
143 |
nb_atom_collision_min += 1 |
|
144 |
if collision == 1 : |
|
145 |
print 'Collision between the molecule and the surface - modification of theta angle' |
|
146 |
|
|
147 |
vect_z = [0, 0, 1] |
|
148 |
|
|
149 |
while (collision == 1 and rotation_tot < 354) : |
|
150 |
vect_atom_mol_2_post_euler = molecule[atom_molecule_2].position - molecule[atom_molecule].position |
|
151 |
|
|
152 |
if (vect_atom_mol_2_post_euler[0] != 0 and vect_atom_mol_2_post_euler[1] != 0) : |
|
153 |
vect_rotation_theta = np.cross(vect_atom_mol_2_post_euler, vect_z) |
|
154 |
else : |
|
155 |
vect_rotation_theta = [0.0001, 0.0001 , 0] |
|
156 |
molecule.rotate(rotation_modif, vect_rotation_theta, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1)) |
|
157 |
rotation_tot += 5 |
|
158 |
|
|
159 |
collision = 0 |
|
160 |
nb_atom_collision = 0 |
|
161 |
|
|
162 |
for i in range(0,nb_atom_mol) : |
|
163 |
atom_test = molecule[i].position |
|
164 |
z_atom_test = atom_test[2] |
|
165 |
z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance |
|
166 |
if (z_atom_test_final < z_atom_surf_max) : |
|
167 |
nb_atom_collision += 1 |
|
168 |
collision = 1 |
|
169 |
if (nb_atom_collision < nb_atom_collision_min) : |
|
170 |
nb_atom_collision_min = nb_atom_collision |
|
171 |
rotation_opt = rotation_tot |
|
172 |
|
|
173 |
if nb_atom_collision_min != 0 : |
|
174 |
rotation_tot_2 = 0 |
|
175 |
rotation_opt_2 = 0 |
|
176 |
|
|
177 |
while (collision == 1 and rotation_tot_2 < 354) : |
|
178 |
vect_atom_mol_2_post_euler = molecule[atom_molecule_2].position - molecule[atom_molecule].position |
|
179 |
molecule.rotate(rotation_modif, vect_atom_mol_2_post_euler, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1)) |
|
180 |
rotation_tot_2 += 5 |
|
181 |
|
|
182 |
collision = 0 |
|
183 |
nb_atom_collision = 0 |
|
184 |
|
|
185 |
for i in range(0,nb_atom_mol) : |
|
186 |
atom_test = molecule[i].position |
|
187 |
z_atom_test = atom_test[2] |
|
188 |
z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance |
|
189 |
if (z_atom_test_final < z_atom_surf_max) : |
|
190 |
nb_atom_collision += 1 |
|
191 |
collision = 1 |
|
192 |
if (nb_atom_collision < nb_atom_collision_min) : |
|
193 |
nb_atom_collision_min = nb_atom_collision |
|
194 |
rotation_opt_2 = rotation_tot_2 |
|
195 |
|
|
196 |
if collision == 0 : |
|
197 |
print 'Collision corrected' |
|
198 |
elif collision == 1 : |
|
199 |
print 'Error: the collision could not be corrected' |
|
200 |
pos1 = output.rfind('/') |
|
201 |
pos = pos1 + 1 |
|
202 |
num_coll=output[pos:] |
|
203 |
j = output[:pos1] |
|
204 |
pos2 = j.rfind('/') |
|
205 |
molecule_directory = j[:pos2] |
|
206 |
fichier = open("%s/errors" % molecule_directory , "a") |
|
207 |
fichier.write("Adsorption %s : ERROR the collision could not be corrected\n" % num_coll ) |
|
208 |
fichier.close() |
|
209 |
|
|
210 |
|
|
211 |
########################################## |
|
212 |
# Adsorption de la molecule sur la surface |
|
213 |
########################################## |
|
214 |
|
|
215 |
add_adsorbate(surface, molecule, distance, (x_atom_surf_1,y_atom_surf_1), mol_index=atom_molecule) |
|
216 |
|
|
217 |
out=output+".xyz" |
|
218 |
write(out, surface) |
|
219 |
|
|
220 |
|
|
221 |
############################ |
|
222 |
# Dissociation si necessaire |
|
223 |
############################ |
|
224 |
|
|
225 |
if collision == 0 : |
|
226 |
surface = read(surface_file) |
|
227 |
molecule_reminder = molecule |
|
228 |
|
|
229 |
list_cutoffs = utils.natural_cutoffs(molecule) |
|
230 |
nl=NeighborList(list_cutoffs, self_interaction=False, bothways=True) |
|
231 |
|
|
232 |
nl.update(molecule) |
|
233 |
neighbor_indices, offsets = nl.get_neighbors(atom_molecule) |
|
234 |
|
|
235 |
symbols = molecule.get_chemical_symbols() |
|
236 |
|
|
237 |
neighbors_symbols=[] |
|
238 |
for i in neighbor_indices: |
|
239 |
neighbors_symbols.append(symbols[i]) |
|
240 |
|
|
241 |
nb_neighbors = len(neighbor_indices) |
|
242 |
|
|
243 |
diss=0 |
|
244 |
for i in range(0,nb_neighbors): |
|
245 |
if neighbors_symbols[i] == "H": |
|
246 |
diss=1 |
|
247 |
H_atom=neighbor_indices[i] #nb of the H atom |
|
248 |
print "Dissociation possible" |
|
249 |
|
|
250 |
if diss == 1: |
|
251 |
list_cutoffs_surface = utils.natural_cutoffs(surface) |
|
252 |
nl2=NeighborList(list_cutoffs_surface, self_interaction=False, bothways=True) |
|
253 |
nl2.update(surface) |
|
254 |
neighbor_indices_surf, offsets_surf = nl2.get_neighbors(atom_surface) |
|
255 |
symbols_surf = surface.get_chemical_symbols() |
|
256 |
for i in neighbor_indices_surf: |
|
257 |
surface = read(surface_file) |
|
258 |
molecule = molecule_reminder |
|
259 |
neighbor_surf_coord = surface[i].position |
|
260 |
coord_H_atom=molecule[H_atom].position |
|
261 |
if (neighbor_surf_coord[2] > z_atom_surf_1-1 and symbols_surf[i] == "O"): |
|
262 |
vector_H_diss=[neighbor_surf_coord[0]-coord_H_atom[0], neighbor_surf_coord[1]-coord_H_atom[1], neighbor_surf_coord[2]+1-coord_H_atom[2]] |
|
263 |
translation_matrix_dissociation=[] |
|
264 |
for k in range (0, nb_atom_mol): |
|
265 |
if k != H_atom: |
|
266 |
translation_matrix_dissociation.append([0,0,0]) |
|
267 |
else: |
|
268 |
translation_matrix_dissociation.append(vector_H_diss) |
|
269 |
molecule.translate(translation_matrix_dissociation) |
|
270 |
add_adsorbate(surface, molecule, distance, (x_atom_surf_1,y_atom_surf_1), mol_index=atom_molecule) |
|
271 |
output_diss = output + "_diss_" + str(i) + ".xyz" |
|
272 |
write(output_diss, surface) |
|
273 |
|
|
274 |
|
|
275 |
|
|
276 |
|
|
277 |
|
|
278 |
|
b/modules/ase_Fe1_Fe2.py | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# -*- coding: utf-8 -*- |
|
3 |
|
|
4 |
|
|
5 |
import ase |
|
6 |
from ase import Atom |
|
7 |
from ase import data |
|
8 |
import numpy as np |
|
9 |
|
|
10 |
# Chemical symbols of new types of atoms |
|
11 |
data.chemical_symbols += ['Fe1', 'Fe2'] |
|
12 |
data.chemical_symbols += ['O1'] |
|
13 |
|
|
14 |
# Atomic numbers |
|
15 |
Z_Fe = data.atomic_numbers['Fe'] |
|
16 |
Z_O = data.atomic_numbers['O'] |
|
17 |
data.atomic_numbers['Fe1'] = Z_Fe |
|
18 |
data.atomic_numbers['Fe2'] = Z_Fe |
|
19 |
data.atomic_numbers['O1'] = Z_O |
|
20 |
|
|
21 |
# Atomic names |
|
22 |
data.atomic_names += ['Iron', 'Iron'] |
|
23 |
data.atomic_names += ['Oxygen'] |
|
24 |
|
|
25 |
# Atomic masses |
|
26 |
Fe_mass = data.atomic_masses_iupac2016[Z_Fe] |
|
27 |
O_mass = data.atomic_masses_iupac2016[Z_O] |
|
28 |
data.atomic_masses_iupac2016 = np.concatenate((data.atomic_masses_iupac2016, np.array([Fe_mass,Fe_mass]))) |
|
29 |
data.atomic_masses_iupac2016 = np.concatenate((data.atomic_masses_iupac2016, np.array([O_mass]))) |
|
30 |
|
|
31 |
data.atomic_masses = data.atomic_masses_iupac2016 |
|
32 |
|
|
33 |
Fe_mass = data.atomic_masses_legacy[Z_Fe] |
|
34 |
O_mass = data.atomic_masses_legacy[Z_O] |
|
35 |
data.atomic_masses_legacy = np.concatenate((data.atomic_masses_legacy, np.array([Fe_mass,Fe_mass]))) |
|
36 |
data.atomic_masses_legacy = np.concatenate((data.atomic_masses_legacy, np.array([O_mass]))) |
|
37 |
|
|
38 |
# Atomic radius |
|
39 |
Fe_radius = data.covalent_radii[Z_Fe] |
|
40 |
O_radius = data.covalent_radii[Z_O] |
|
41 |
data.covalent_radii = np.concatenate((data.covalent_radii, np.array([Fe_radius,Fe_radius]))) |
|
42 |
data.covalent_radii = np.concatenate((data.covalent_radii, np.array([O_radius]))) |
|
43 |
|
|
44 |
# Reference and ground state datas |
|
45 |
data.reference_states += [data.reference_states[Z_Fe], data.reference_states[Z_Fe]] |
|
46 |
data.reference_states += [data.reference_states[Z_O]] |
|
47 |
|
|
48 |
Fe_gsmm = data.ground_state_magnetic_moments[Z_Fe] |
|
49 |
O_gsmm = data.ground_state_magnetic_moments[Z_O] |
|
50 |
|
|
51 |
data.ground_state_magnetic_moments = np.concatenate((data.ground_state_magnetic_moments, np.array([Fe_gsmm,Fe_gsmm]))) |
|
52 |
data.ground_state_magnetic_moments = np.concatenate((data.ground_state_magnetic_moments, np.array([O_gsmm]))) |
|
53 |
|
|
54 |
# VdW raduis |
|
55 |
Fe_vdw_r = data.vdw_radii[Z_Fe] |
|
56 |
O_vdw_r = data.vdw_radii[Z_O] |
|
57 |
data.vdw_radii = np.concatenate((data.vdw_radii, np.array([Fe_vdw_r,Fe_vdw_r]))) |
|
58 |
data.vdw_radii = np.concatenate((data.vdw_radii, np.array([O_vdw_r]))) |
b/modules/calculate_rmsd | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
__doc__ = \ |
|
3 |
""" |
|
4 |
Calculate Root-mean-square deviation (RMSD) between structure A and B, in XYZ |
|
5 |
or PDB format, using transformation and rotation. |
|
6 |
|
|
7 |
For more information, usage, example and citation read more at |
|
8 |
https://github.com/charnley/rmsd |
|
9 |
""" |
|
10 |
|
|
11 |
__version__ = '1.3.2' |
|
12 |
|
|
13 |
import copy |
|
14 |
import re |
|
15 |
|
|
16 |
import numpy as np |
|
17 |
from scipy.optimize import linear_sum_assignment |
|
18 |
from scipy.spatial.distance import cdist |
|
19 |
|
|
20 |
|
|
21 |
AXIS_SWAPS = np.array([ |
|
22 |
[0, 1, 2], |
|
23 |
[0, 2, 1], |
|
24 |
[1, 0, 2], |
|
25 |
[1, 2, 0], |
|
26 |
[2, 1, 0], |
|
27 |
[2, 0, 1]]) |
|
28 |
|
|
29 |
AXIS_REFLECTIONS = np.array([ |
|
30 |
[1, 1, 1], |
|
31 |
[-1, 1, 1], |
|
32 |
[1, -1, 1], |
|
33 |
[1, 1, -1], |
|
34 |
[-1, -1, 1], |
|
35 |
[-1, 1, -1], |
|
36 |
[1, -1, -1], |
|
37 |
[-1, -1, -1]]) |
|
38 |
|
|
39 |
|
|
40 |
def rmsd(V, W): |
|
41 |
""" |
|
42 |
Calculate Root-mean-square deviation from two sets of vectors V and W. |
|
43 |
|
|
44 |
Parameters |
|
45 |
---------- |
|
46 |
V : array |
|
47 |
(N,D) matrix, where N is points and D is dimension. |
|
48 |
W : array |
|
49 |
(N,D) matrix, where N is points and D is dimension. |
|
50 |
|
|
51 |
Returns |
|
52 |
------- |
|
53 |
rmsd : float |
|
54 |
Root-mean-square deviation between the two vectors |
|
55 |
""" |
|
56 |
D = len(V[0]) |
|
57 |
N = len(V) |
|
58 |
result = 0.0 |
|
59 |
for v, w in zip(V, W): |
|
60 |
result += sum([(v[i] - w[i])**2.0 for i in range(D)]) |
|
61 |
return np.sqrt(result/N) |
|
62 |
|
|
63 |
|
|
64 |
def kabsch_rmsd(P, Q, translate=False): |
|
65 |
""" |
|
66 |
Rotate matrix P unto Q using Kabsch algorithm and calculate the RMSD. |
|
67 |
|
|
68 |
Parameters |
|
69 |
---------- |
|
70 |
P : array |
|
71 |
(N,D) matrix, where N is points and D is dimension. |
|
72 |
Q : array |
|
73 |
(N,D) matrix, where N is points and D is dimension. |
|
74 |
translate : bool |
|
75 |
Use centroids to translate vector P and Q unto each other. |
|
76 |
|
|
77 |
Returns |
|
78 |
------- |
|
79 |
rmsd : float |
|
80 |
root-mean squared deviation |
|
81 |
""" |
|
82 |
if translate: |
|
83 |
Q = Q - centroid(Q) |
|
84 |
P = P - centroid(P) |
|
85 |
|
|
86 |
P = kabsch_rotate(P, Q) |
|
87 |
return rmsd(P, Q) |
|
88 |
|
|
89 |
|
|
90 |
def kabsch_rotate(P, Q): |
|
91 |
""" |
|
92 |
Rotate matrix P unto matrix Q using Kabsch algorithm. |
|
93 |
|
|
94 |
Parameters |
|
95 |
---------- |
|
96 |
P : array |
|
97 |
(N,D) matrix, where N is points and D is dimension. |
|
98 |
Q : array |
|
99 |
(N,D) matrix, where N is points and D is dimension. |
|
100 |
|
|
101 |
Returns |
|
102 |
------- |
|
103 |
P : array |
|
104 |
(N,D) matrix, where N is points and D is dimension, |
|
105 |
rotated |
|
106 |
|
|
107 |
""" |
|
108 |
U = kabsch(P, Q) |
|
109 |
|
|
110 |
# Rotate P |
|
111 |
P = np.dot(P, U) |
|
112 |
return P |
|
113 |
|
|
114 |
|
|
115 |
def kabsch(P, Q): |
|
116 |
""" |
|
117 |
Using the Kabsch algorithm with two sets of paired point P and Q, centered |
|
118 |
around the centroid. Each vector set is represented as an NxD |
|
119 |
matrix, where D is the the dimension of the space. |
|
120 |
|
|
121 |
The algorithm works in three steps: |
|
122 |
- a centroid translation of P and Q (assumed done before this function |
|
123 |
call) |
|
124 |
- the computation of a covariance matrix C |
|
125 |
- computation of the optimal rotation matrix U |
|
126 |
|
|
127 |
For more info see http://en.wikipedia.org/wiki/Kabsch_algorithm |
|
128 |
|
|
129 |
Parameters |
|
130 |
---------- |
|
131 |
P : array |
|
132 |
(N,D) matrix, where N is points and D is dimension. |
|
133 |
Q : array |
|
134 |
(N,D) matrix, where N is points and D is dimension. |
|
135 |
|
|
136 |
Returns |
|
137 |
------- |
|
138 |
U : matrix |
|
139 |
Rotation matrix (D,D) |
|
140 |
""" |
|
141 |
|
|
142 |
# Computation of the covariance matrix |
|
143 |
C = np.dot(np.transpose(P), Q) |
|
144 |
|
|
145 |
# Computation of the optimal rotation matrix |
|
146 |
# This can be done using singular value decomposition (SVD) |
|
147 |
# Getting the sign of the det(V)*(W) to decide |
|
148 |
# whether we need to correct our rotation matrix to ensure a |
|
149 |
# right-handed coordinate system. |
|
150 |
# And finally calculating the optimal rotation matrix U |
|
151 |
# see http://en.wikipedia.org/wiki/Kabsch_algorithm |
|
152 |
V, S, W = np.linalg.svd(C) |
|
153 |
d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0 |
|
154 |
|
|
155 |
if d: |
|
156 |
S[-1] = -S[-1] |
|
157 |
V[:, -1] = -V[:, -1] |
|
158 |
|
|
159 |
# Create Rotation matrix U |
|
160 |
U = np.dot(V, W) |
|
161 |
|
|
162 |
return U |
|
163 |
|
|
164 |
|
|
165 |
def quaternion_rmsd(P, Q): |
|
166 |
""" |
|
167 |
Rotate matrix P unto Q and calculate the RMSD |
|
168 |
based on doi:10.1016/1049-9660(91)90036-O |
|
169 |
|
|
170 |
Parameters |
|
171 |
---------- |
|
172 |
P : array |
|
173 |
(N,D) matrix, where N is points and D is dimension. |
|
174 |
Q : array |
|
175 |
(N,D) matrix, where N is points and D is dimension. |
|
176 |
|
|
177 |
Returns |
|
178 |
------- |
|
179 |
rmsd : float |
|
180 |
""" |
|
181 |
rot = quaternion_rotate(P, Q) |
|
182 |
P = np.dot(P, rot) |
|
183 |
return rmsd(P, Q) |
|
184 |
|
|
185 |
|
|
186 |
def quaternion_transform(r): |
|
187 |
""" |
|
188 |
Get optimal rotation |
|
189 |
note: translation will be zero when the centroids of each molecule are the |
|
190 |
same |
|
191 |
""" |
|
192 |
Wt_r = makeW(*r).T |
|
193 |
Q_r = makeQ(*r) |
|
194 |
rot = Wt_r.dot(Q_r)[:3, :3] |
|
195 |
return rot |
|
196 |
|
|
197 |
|
|
198 |
def makeW(r1, r2, r3, r4=0): |
|
199 |
""" |
|
200 |
matrix involved in quaternion rotation |
|
201 |
""" |
|
202 |
W = np.asarray([ |
|
203 |
[r4, r3, -r2, r1], |
|
204 |
[-r3, r4, r1, r2], |
|
205 |
[r2, -r1, r4, r3], |
|
206 |
[-r1, -r2, -r3, r4]]) |
|
207 |
return W |
|
208 |
|
|
209 |
|
|
210 |
def makeQ(r1, r2, r3, r4=0): |
|
211 |
""" |
|
212 |
matrix involved in quaternion rotation |
|
213 |
""" |
|
214 |
Q = np.asarray([ |
|
215 |
[r4, -r3, r2, r1], |
|
216 |
[r3, r4, -r1, r2], |
|
217 |
[-r2, r1, r4, r3], |
|
218 |
[-r1, -r2, -r3, r4]]) |
|
219 |
return Q |
|
220 |
|
|
221 |
|
|
222 |
def quaternion_rotate(X, Y): |
|
223 |
""" |
|
224 |
Calculate the rotation |
|
225 |
|
|
226 |
Parameters |
|
227 |
---------- |
|
228 |
X : array |
|
229 |
(N,D) matrix, where N is points and D is dimension. |
|
230 |
Y: array |
|
231 |
(N,D) matrix, where N is points and D is dimension. |
|
232 |
|
|
233 |
Returns |
|
234 |
------- |
|
235 |
rot : matrix |
|
236 |
Rotation matrix (D,D) |
|
237 |
""" |
|
238 |
N = X.shape[0] |
|
239 |
W = np.asarray([makeW(*Y[k]) for k in range(N)]) |
|
240 |
Q = np.asarray([makeQ(*X[k]) for k in range(N)]) |
|
241 |
Qt_dot_W = np.asarray([np.dot(Q[k].T, W[k]) for k in range(N)]) |
|
242 |
W_minus_Q = np.asarray([W[k] - Q[k] for k in range(N)]) |
|
243 |
A = np.sum(Qt_dot_W, axis=0) |
|
244 |
eigen = np.linalg.eigh(A) |
|
245 |
r = eigen[1][:, eigen[0].argmax()] |
|
246 |
rot = quaternion_transform(r) |
|
247 |
return rot |
|
248 |
|
|
249 |
|
|
250 |
def centroid(X): |
|
251 |
""" |
|
252 |
Centroid is the mean position of all the points in all of the coordinate |
|
253 |
directions, from a vectorset X. |
|
254 |
|
|
255 |
https://en.wikipedia.org/wiki/Centroid |
|
256 |
|
|
257 |
C = sum(X)/len(X) |
|
258 |
|
|
259 |
Parameters |
|
260 |
---------- |
|
261 |
X : array |
|
262 |
(N,D) matrix, where N is points and D is dimension. |
|
263 |
|
|
264 |
Returns |
|
265 |
------- |
|
266 |
C : float |
|
267 |
centroid |
|
268 |
""" |
|
269 |
C = X.mean(axis=0) |
|
270 |
return C |
|
271 |
|
|
272 |
|
|
273 |
def reorder_distance(p_atoms, q_atoms, p_coord, q_coord): |
|
274 |
""" |
|
275 |
Re-orders the input atom list and xyz coordinates by atom type and then by |
|
276 |
distance of each atom from the centroid. |
|
277 |
|
|
278 |
Parameters |
|
279 |
---------- |
|
280 |
atoms : array |
|
281 |
(N,1) matrix, where N is points holding the atoms' names |
|
282 |
coord : array |
|
283 |
(N,D) matrix, where N is points and D is dimension |
|
284 |
|
|
285 |
Returns |
|
286 |
------- |
|
287 |
atoms_reordered : array |
|
288 |
(N,1) matrix, where N is points holding the ordered atoms' names |
|
289 |
coords_reordered : array |
|
290 |
(N,D) matrix, where N is points and D is dimension (rows re-ordered) |
|
291 |
""" |
|
292 |
|
|
293 |
# Find unique atoms |
|
294 |
unique_atoms = np.unique(p_atoms) |
|
295 |
|
|
296 |
# generate full view from q shape to fill in atom view on the fly |
|
297 |
view_reorder = np.zeros(q_atoms.shape, dtype=int) |
|
298 |
|
|
299 |
for atom in unique_atoms: |
|
300 |
|
|
301 |
p_atom_idx, = np.where(p_atoms == atom) |
|
302 |
q_atom_idx, = np.where(q_atoms == atom) |
|
303 |
|
|
304 |
A_coord = p_coord[p_atom_idx] |
|
305 |
B_coord = q_coord[q_atom_idx] |
|
306 |
|
|
307 |
# Calculate distance from each atom to centroid |
|
308 |
A_norms = np.linalg.norm(A_coord, axis=1) |
|
309 |
B_norms = np.linalg.norm(B_coord, axis=1) |
|
310 |
|
|
311 |
reorder_indices_A = np.argsort(A_norms) |
|
312 |
reorder_indices_B = np.argsort(B_norms) |
|
313 |
|
|
314 |
# Project the order of P onto Q |
|
315 |
translator = np.argsort(reorder_indices_A) |
|
316 |
view = reorder_indices_B[translator] |
|
317 |
view_reorder[p_atom_idx] = q_atom_idx[view] |
|
318 |
|
|
319 |
return view_reorder |
|
320 |
|
|
321 |
|
|
322 |
def hungarian(A, B): |
|
323 |
""" |
|
324 |
Hungarian reordering. |
|
325 |
|
|
326 |
Assume A and B are coordinates for atoms of SAME type only |
|
327 |
""" |
|
328 |
|
|
329 |
# should be kabasch here i think |
|
330 |
distances = cdist(A, B, 'euclidean') |
|
331 |
|
|
332 |
# Perform Hungarian analysis on distance matrix between atoms of 1st |
|
333 |
# structure and trial structure |
|
334 |
indices_a, indices_b = linear_sum_assignment(distances) |
|
335 |
|
|
336 |
return indices_b |
|
337 |
|
|
338 |
|
|
339 |
def reorder_hungarian(p_atoms, q_atoms, p_coord, q_coord): |
|
340 |
""" |
|
341 |
Re-orders the input atom list and xyz coordinates using the Hungarian |
|
342 |
method (using optimized column results) |
|
343 |
|
|
344 |
Parameters |
|
345 |
---------- |
|
346 |
p_atoms : array |
|
347 |
(N,1) matrix, where N is points holding the atoms' names |
|
348 |
p_atoms : array |
|
349 |
(N,1) matrix, where N is points holding the atoms' names |
|
350 |
p_coord : array |
|
351 |
(N,D) matrix, where N is points and D is dimension |
|
352 |
q_coord : array |
|
353 |
(N,D) matrix, where N is points and D is dimension |
|
354 |
|
|
355 |
Returns |
|
356 |
------- |
|
357 |
view_reorder : array |
|
358 |
(N,1) matrix, reordered indexes of atom alignment based on the |
|
359 |
coordinates of the atoms |
|
360 |
|
|
361 |
""" |
|
362 |
|
|
363 |
# Find unique atoms |
|
364 |
unique_atoms = np.unique(p_atoms) |
|
365 |
|
|
366 |
# generate full view from q shape to fill in atom view on the fly |
|
367 |
view_reorder = np.zeros(q_atoms.shape, dtype=int) |
|
368 |
view_reorder -= 1 |
|
369 |
|
|
370 |
for atom in unique_atoms: |
|
371 |
p_atom_idx, = np.where(p_atoms == atom) |
|
372 |
q_atom_idx, = np.where(q_atoms == atom) |
|
373 |
|
|
374 |
A_coord = p_coord[p_atom_idx] |
|
375 |
B_coord = q_coord[q_atom_idx] |
|
376 |
|
|
377 |
view = hungarian(A_coord, B_coord) |
|
378 |
view_reorder[p_atom_idx] = q_atom_idx[view] |
|
379 |
|
|
380 |
return view_reorder |
|
381 |
|
|
382 |
|
|
383 |
def generate_permutations(elements, n): |
|
384 |
""" |
|
385 |
Heap's algorithm for generating all n! permutations in a list |
|
386 |
https://en.wikipedia.org/wiki/Heap%27s_algorithm |
|
387 |
|
|
388 |
""" |
|
389 |
c = [0] * n |
|
390 |
yield elements |
|
391 |
i = 0 |
|
392 |
while i < n: |
|
393 |
if c[i] < i: |
|
394 |
if i % 2 == 0: |
|
395 |
elements[0], elements[i] = elements[i], elements[0] |
|
396 |
else: |
|
397 |
elements[c[i]], elements[i] = elements[i], elements[c[i]] |
|
398 |
yield elements |
|
399 |
c[i] += 1 |
|
400 |
i = 0 |
|
401 |
else: |
|
402 |
c[i] = 0 |
|
403 |
i += 1 |
|
404 |
|
|
405 |
|
|
406 |
def brute_permutation(A, B): |
|
407 |
""" |
|
408 |
Re-orders the input atom list and xyz coordinates using the brute force |
|
409 |
method of permuting all rows of the input coordinates |
|
410 |
|
|
411 |
Parameters |
|
412 |
---------- |
|
413 |
A : array |
|
414 |
(N,D) matrix, where N is points and D is dimension |
|
415 |
B : array |
|
416 |
(N,D) matrix, where N is points and D is dimension |
|
417 |
|
|
418 |
Returns |
|
419 |
------- |
|
420 |
view : array |
|
421 |
(N,1) matrix, reordered view of B projected to A |
|
422 |
""" |
|
423 |
|
|
424 |
rmsd_min = np.inf |
|
425 |
view_min = None |
|
426 |
|
|
427 |
# Sets initial ordering for row indices to [0, 1, 2, ..., len(A)], used in |
|
428 |
# brute-force method |
|
429 |
|
|
430 |
num_atoms = A.shape[0] |
|
431 |
initial_order = list(range(num_atoms)) |
|
432 |
|
|
433 |
for reorder_indices in generate_permutations(initial_order, num_atoms): |
|
434 |
|
|
435 |
# Re-order the atom array and coordinate matrix |
|
436 |
coords_ordered = B[reorder_indices] |
|
437 |
|
|
438 |
# Calculate the RMSD between structure 1 and the Hungarian re-ordered |
|
439 |
# structure 2 |
|
440 |
rmsd_temp = kabsch_rmsd(A, coords_ordered) |
|
441 |
|
|
442 |
# Replaces the atoms and coordinates with the current structure if the |
|
443 |
# RMSD is lower |
|
444 |
if rmsd_temp < rmsd_min: |
|
445 |
rmsd_min = rmsd_temp |
|
446 |
view_min = copy.deepcopy(reorder_indices) |
|
447 |
|
|
448 |
return view_min |
|
449 |
|
|
450 |
|
|
451 |
def reorder_brute(p_atoms, q_atoms, p_coord, q_coord): |
|
452 |
""" |
|
453 |
Re-orders the input atom list and xyz coordinates using all permutation of |
|
454 |
rows (using optimized column results) |
|
455 |
|
|
456 |
Parameters |
|
457 |
---------- |
|
458 |
p_atoms : array |
|
459 |
(N,1) matrix, where N is points holding the atoms' names |
|
460 |
q_atoms : array |
|
461 |
(N,1) matrix, where N is points holding the atoms' names |
|
462 |
p_coord : array |
|
463 |
(N,D) matrix, where N is points and D is dimension |
|
464 |
q_coord : array |
|
465 |
(N,D) matrix, where N is points and D is dimension |
|
466 |
|
|
467 |
Returns |
|
468 |
------- |
|
469 |
view_reorder : array |
|
470 |
(N,1) matrix, reordered indexes of atom alignment based on the |
|
471 |
coordinates of the atoms |
|
472 |
|
|
473 |
""" |
|
474 |
|
|
475 |
# Find unique atoms |
|
476 |
unique_atoms = np.unique(p_atoms) |
|
477 |
|
|
478 |
# generate full view from q shape to fill in atom view on the fly |
|
479 |
view_reorder = np.zeros(q_atoms.shape, dtype=int) |
|
480 |
view_reorder -= 1 |
|
481 |
|
|
482 |
for atom in unique_atoms: |
|
483 |
p_atom_idx, = np.where(p_atoms == atom) |
|
484 |
q_atom_idx, = np.where(q_atoms == atom) |
|
485 |
|
|
486 |
A_coord = p_coord[p_atom_idx] |
|
487 |
B_coord = q_coord[q_atom_idx] |
|
488 |
|
|
489 |
view = brute_permutation(A_coord, B_coord) |
|
490 |
view_reorder[p_atom_idx] = q_atom_idx[view] |
|
491 |
|
|
492 |
return view_reorder |
|
493 |
|
|
494 |
|
|
495 |
def check_reflections(p_atoms, q_atoms, p_coord, q_coord, |
|
496 |
reorder_method=reorder_hungarian, |
|
497 |
rotation_method=kabsch_rmsd, |
|
498 |
keep_stereo=False): |
|
499 |
""" |
|
500 |
Minimize RMSD using reflection planes for molecule P and Q |
|
501 |
|
|
502 |
Warning: This will affect stereo-chemistry |
|
503 |
|
|
504 |
Parameters |
|
505 |
---------- |
|
506 |
p_atoms : array |
|
507 |
(N,1) matrix, where N is points holding the atoms' names |
|
508 |
q_atoms : array |
|
509 |
(N,1) matrix, where N is points holding the atoms' names |
|
510 |
p_coord : array |
|
511 |
(N,D) matrix, where N is points and D is dimension |
|
512 |
q_coord : array |
|
513 |
(N,D) matrix, where N is points and D is dimension |
|
514 |
|
|
515 |
Returns |
|
516 |
------- |
|
517 |
min_rmsd |
|
518 |
min_swap |
|
519 |
min_reflection |
|
520 |
min_review |
|
521 |
|
|
522 |
""" |
|
523 |
|
|
524 |
min_rmsd = np.inf |
|
525 |
min_swap = None |
|
526 |
min_reflection = None |
|
527 |
min_review = None |
|
528 |
tmp_review = None |
|
529 |
swap_mask = [1,-1,-1,1,-1,1] |
|
530 |
reflection_mask = [1,-1,-1,-1,1,1,1,-1] |
|
531 |
|
|
532 |
for swap, i in zip(AXIS_SWAPS, swap_mask): |
|
533 |
for reflection, j in zip(AXIS_REFLECTIONS, reflection_mask): |
|
534 |
if keep_stereo and i * j == -1: continue # skip enantiomers |
|
535 |
|
|
536 |
tmp_atoms = copy.copy(q_atoms) |
|
537 |
tmp_coord = copy.deepcopy(q_coord) |
|
538 |
tmp_coord = tmp_coord[:, swap] |
|
539 |
tmp_coord = np.dot(tmp_coord, np.diag(reflection)) |
|
540 |
tmp_coord -= centroid(tmp_coord) |
|
541 |
|
|
542 |
# Reorder |
|
543 |
if reorder_method is not None: |
|
544 |
tmp_review = reorder_method(p_atoms, tmp_atoms, p_coord, tmp_coord) |
|
545 |
tmp_coord = tmp_coord[tmp_review] |
|
546 |
tmp_atoms = tmp_atoms[tmp_review] |
|
547 |
|
|
548 |
# Rotation |
|
549 |
if rotation_method is None: |
|
550 |
this_rmsd = rmsd(p_coord, tmp_coord) |
|
551 |
else: |
|
552 |
this_rmsd = rotation_method(p_coord, tmp_coord) |
|
553 |
|
|
554 |
if this_rmsd < min_rmsd: |
|
555 |
min_rmsd = this_rmsd |
|
556 |
min_swap = swap |
|
557 |
min_reflection = reflection |
|
558 |
min_review = tmp_review |
|
559 |
|
|
560 |
if not (p_atoms == q_atoms[min_review]).all(): |
|
561 |
print("error: Not aligned") |
|
562 |
quit() |
|
563 |
|
|
564 |
return min_rmsd, min_swap, min_reflection, min_review |
|
565 |
|
|
566 |
|
|
567 |
def set_coordinates(atoms, V, title="", decimals=8): |
|
568 |
""" |
|
569 |
Print coordinates V with corresponding atoms to stdout in XYZ format. |
|
570 |
Parameters |
|
571 |
---------- |
|
572 |
atoms : list |
|
573 |
List of atomic types |
|
574 |
V : array |
|
575 |
(N,3) matrix of atomic coordinates |
|
576 |
title : string (optional) |
|
577 |
Title of molecule |
|
578 |
decimals : int (optional) |
|
579 |
number of decimals for the coordinates |
|
580 |
|
|
581 |
Return |
|
582 |
------ |
|
583 |
output : str |
|
584 |
Molecule in XYZ format |
|
585 |
|
|
586 |
""" |
|
587 |
N, D = V.shape |
|
588 |
|
|
589 |
fmt = "{:2s}" + (" {:15."+str(decimals)+"f}")*3 |
|
590 |
|
|
591 |
out = list() |
|
592 |
out += [str(N)] |
|
593 |
out += [title] |
|
594 |
|
|
595 |
for i in range(N): |
|
596 |
atom = atoms[i] |
|
597 |
atom = atom[0].upper() + atom[1:] |
|
598 |
out += [fmt.format(atom, V[i, 0], V[i, 1], V[i, 2])] |
|
599 |
|
|
600 |
return "\n".join(out) |
|
601 |
|
|
602 |
|
|
603 |
def print_coordinates(atoms, V, title=""): |
|
604 |
""" |
|
605 |
Print coordinates V with corresponding atoms to stdout in XYZ format. |
|
606 |
|
|
607 |
Parameters |
|
608 |
---------- |
|
609 |
atoms : list |
|
610 |
List of element types |
|
611 |
V : array |
|
612 |
(N,3) matrix of atomic coordinates |
|
613 |
title : string (optional) |
|
614 |
Title of molecule |
|
615 |
|
|
616 |
""" |
|
617 |
|
|
618 |
print(set_coordinates(atoms, V, title=title)) |
|
619 |
|
|
620 |
return |
|
621 |
|
|
622 |
|
|
623 |
def get_coordinates(filename, fmt): |
|
624 |
""" |
|
625 |
Get coordinates from filename in format fmt. Supports XYZ and PDB. |
|
626 |
Parameters |
|
627 |
---------- |
|
628 |
filename : string |
|
629 |
Filename to read |
|
630 |
fmt : string |
|
631 |
Format of filename. Either xyz or pdb. |
|
632 |
Returns |
|
633 |
------- |
|
634 |
atoms : list |
|
635 |
List of atomic types |
|
636 |
V : array |
|
637 |
(N,3) where N is number of atoms |
|
638 |
""" |
|
639 |
if fmt == "xyz": |
|
640 |
get_func = get_coordinates_xyz |
|
641 |
elif fmt == "pdb": |
|
642 |
get_func = get_coordinates_pdb |
|
643 |
else: |
|
644 |
exit("Could not recognize file format: {:s}".format(fmt)) |
|
645 |
|
|
646 |
return get_func(filename) |
|
647 |
|
|
648 |
|
|
649 |
def get_coordinates_pdb(filename): |
|
650 |
""" |
|
651 |
Get coordinates from the first chain in a pdb file |
|
652 |
and return a vectorset with all the coordinates. |
|
653 |
|
|
654 |
Parameters |
|
655 |
---------- |
|
656 |
filename : string |
|
657 |
Filename to read |
|
658 |
|
|
659 |
Returns |
|
660 |
------- |
|
661 |
atoms : list |
|
662 |
List of atomic types |
|
663 |
V : array |
|
664 |
(N,3) where N is number of atoms |
|
665 |
""" |
|
666 |
|
|
667 |
# PDB files tend to be a bit of a mess. The x, y and z coordinates |
|
668 |
# are supposed to be in column 31-38, 39-46 and 47-54, but this is |
|
669 |
# not always the case. |
|
670 |
# Because of this the three first columns containing a decimal is used. |
|
671 |
# Since the format doesn't require a space between columns, we use the |
|
672 |
# above column indices as a fallback. |
|
673 |
|
|
674 |
x_column = None |
|
675 |
V = list() |
|
676 |
|
|
677 |
# Same with atoms and atom naming. |
|
678 |
# The most robust way to do this is probably |
|
679 |
# to assume that the atomtype is given in column 3. |
|
680 |
|
|
681 |
atoms = list() |
|
682 |
|
|
683 |
with open(filename, 'r') as f: |
|
684 |
lines = f.readlines() |
|
685 |
for line in lines: |
|
686 |
if line.startswith("TER") or line.startswith("END"): |
|
687 |
break |
|
688 |
if line.startswith("ATOM"): |
|
689 |
tokens = line.split() |
|
690 |
# Try to get the atomtype |
|
691 |
try: |
|
692 |
atom = tokens[2][0] |
|
693 |
if atom in ("H", "C", "N", "O", "S", "P"): |
|
694 |
atoms.append(atom) |
|
695 |
else: |
|
696 |
# e.g. 1HD1 |
|
697 |
atom = tokens[2][1] |
|
698 |
if atom == "H": |
|
699 |
atoms.append(atom) |
|
700 |
else: |
|
701 |
raise Exception |
|
702 |
except: |
|
703 |
exit("error: Parsing atomtype for the following line: \n{0:s}".format(line)) |
|
704 |
|
|
705 |
if x_column == None: |
|
706 |
try: |
|
707 |
# look for x column |
|
708 |
for i, x in enumerate(tokens): |
|
709 |
if "." in x and "." in tokens[i + 1] and "." in tokens[i + 2]: |
|
710 |
x_column = i |
|
711 |
break |
|
712 |
except IndexError: |
|
713 |
exit("error: Parsing coordinates for the following line: \n{0:s}".format(line)) |
|
714 |
# Try to read the coordinates |
|
715 |
try: |
|
716 |
V.append(np.asarray(tokens[x_column:x_column + 3], dtype=float)) |
|
717 |
except: |
|
718 |
# If that doesn't work, use hardcoded indices |
|
719 |
try: |
|
720 |
x = line[30:38] |
|
721 |
y = line[38:46] |
|
722 |
z = line[46:54] |
|
723 |
V.append(np.asarray([x, y ,z], dtype=float)) |
|
724 |
except: |
|
725 |
exit("error: Parsing input for the following line: \n{0:s}".format(line)) |
|
726 |
|
|
727 |
|
|
728 |
V = np.asarray(V) |
|
729 |
atoms = np.asarray(atoms) |
|
730 |
|
|
731 |
assert V.shape[0] == atoms.size |
|
732 |
|
|
733 |
return atoms, V |
|
734 |
|
|
735 |
|
|
736 |
def get_coordinates_xyz(filename): |
|
737 |
""" |
|
738 |
Get coordinates from filename and return a vectorset with all the |
|
739 |
coordinates, in XYZ format. |
|
740 |
|
|
741 |
Parameters |
|
742 |
---------- |
|
743 |
filename : string |
|
744 |
Filename to read |
|
745 |
|
|
746 |
Returns |
|
747 |
------- |
|
748 |
atoms : list |
|
749 |
List of atomic types |
|
750 |
V : array |
|
751 |
(N,3) where N is number of atoms |
|
752 |
""" |
|
753 |
|
|
754 |
f = open(filename, 'r') |
|
755 |
V = list() |
|
756 |
atoms = list() |
|
757 |
n_atoms = 0 |
|
758 |
|
|
759 |
# Read the first line to obtain the number of atoms to read |
|
760 |
try: |
|
761 |
n_atoms = int(f.readline()) |
|
762 |
except ValueError: |
|
763 |
exit("error: Could not obtain the number of atoms in the .xyz file.") |
|
764 |
|
|
765 |
# Skip the title line |
|
766 |
f.readline() |
|
767 |
|
|
768 |
# Use the number of atoms to not read beyond the end of a file |
|
769 |
for lines_read, line in enumerate(f): |
|
770 |
|
|
771 |
if lines_read == n_atoms: |
|
772 |
break |
|
773 |
|
|
774 |
atom = re.findall(r'[a-zA-Z]+', line)[0] |
|
775 |
atom = atom.upper() |
|
776 |
|
|
777 |
numbers = re.findall(r'[-]?\d+\.\d*(?:[Ee][-\+]\d+)?', line) |
|
778 |
numbers = [float(number) for number in numbers] |
|
779 |
|
|
780 |
# The numbers are not valid unless we obtain exacly three |
|
781 |
if len(numbers) >= 3: |
|
782 |
V.append(np.array(numbers)[:3]) |
|
783 |
atoms.append(atom) |
|
784 |
else: |
|
785 |
exit("Reading the .xyz file failed in line {0}. Please check the format.".format(lines_read + 2)) |
|
786 |
|
|
787 |
f.close() |
|
788 |
atoms = np.array(atoms) |
|
789 |
V = np.array(V) |
|
790 |
return atoms, V |
|
791 |
|
|
792 |
|
|
793 |
def main(): |
|
794 |
|
|
795 |
import argparse |
|
796 |
import sys |
|
797 |
|
|
798 |
description = __doc__ |
|
799 |
|
|
800 |
version_msg = """ |
|
801 |
rmsd {} |
|
802 |
|
|
803 |
See https://github.com/charnley/rmsd for citation information |
|
804 |
|
|
805 |
""" |
|
806 |
version_msg = version_msg.format(__version__) |
|
807 |
|
|
808 |
epilog = """ |
|
809 |
""" |
|
810 |
|
|
811 |
parser = argparse.ArgumentParser( |
|
812 |
usage='calculate_rmsd [options] FILE_A FILE_B', |
|
813 |
description=description, |
|
814 |
formatter_class=argparse.RawDescriptionHelpFormatter, |
|
815 |
epilog=epilog) |
|
816 |
|
|
817 |
|
|
818 |
# Input structures |
|
819 |
parser.add_argument('structure_a', metavar='FILE_A', type=str, help='structures in .xyz or .pdb format') |
|
820 |
parser.add_argument('structure_b', metavar='FILE_B', type=str) |
|
821 |
|
|
822 |
# Admin |
|
823 |
parser.add_argument('-v', '--version', action='version', version=version_msg) |
|
824 |
|
|
825 |
# Rotation |
|
826 |
parser.add_argument('-r', '--rotation', action='store', default="kabsch", help='select rotation method. "kabsch" (default), "quaternion" or "none"', metavar="METHOD") |
|
827 |
|
|
828 |
# Reorder arguments |
|
829 |
parser.add_argument('-e', '--reorder', action='store_true', help='align the atoms of molecules (default: Hungarian)') |
|
830 |
parser.add_argument('--reorder-method', action='store', default="hungarian", metavar="METHOD", help='select which reorder method to use; hungarian (default), brute, distance') |
|
831 |
parser.add_argument('--use-reflections', action='store_true', help='scan through reflections in planes (eg Y transformed to -Y -> X, -Y, Z) and axis changes, (eg X and Z coords exchanged -> Z, Y, X). This will affect stereo-chemistry.') |
|
832 |
parser.add_argument('--use-reflections-keep-stereo', action='store_true', help='scan through reflections in planes (eg Y transformed to -Y -> X, -Y, Z) and axis changes, (eg X and Z coords exchanged -> Z, Y, X). Stereo-chemistry will be kept.') |
|
833 |
|
|
834 |
# Filter |
|
835 |
index_group = parser.add_mutually_exclusive_group() |
|
836 |
index_group.add_argument('-nh', '--no-hydrogen', action='store_true', help='ignore hydrogens when calculating RMSD') |
|
837 |
index_group.add_argument('--remove-idx', nargs='+', type=int, help='index list of atoms NOT to consider', metavar='IDX') |
|
838 |
index_group.add_argument('--add-idx', nargs='+', type=int, help='index list of atoms to consider', metavar='IDX') |
|
839 |
|
|
840 |
# format and print |
|
841 |
parser.add_argument('--format', action='store', help='format of input files. valid format are xyz and pdb', metavar='FMT') |
|
842 |
parser.add_argument('-p', '--output', '--print', action='store_true', help='print out structure B, centered and rotated unto structure A\'s coordinates in XYZ format') |
|
843 |
|
|
844 |
if len(sys.argv) == 1: |
|
845 |
parser.print_help() |
|
846 |
sys.exit(1) |
|
847 |
|
|
848 |
args = parser.parse_args() |
|
849 |
|
|
850 |
# As default, load the extension as format |
|
851 |
if args.format is None: |
|
852 |
args.format = args.structure_a.split('.')[-1] |
|
853 |
|
|
854 |
p_all_atoms, p_all = get_coordinates(args.structure_a, args.format) |
|
855 |
q_all_atoms, q_all = get_coordinates(args.structure_b, args.format) |
|
856 |
|
|
857 |
p_size = p_all.shape[0] |
|
858 |
q_size = q_all.shape[0] |
|
859 |
|
|
860 |
if not p_size == q_size: |
|
861 |
print("error: Structures not same size") |
|
862 |
quit() |
|
863 |
|
|
864 |
if np.count_nonzero(p_all_atoms != q_all_atoms) and not args.reorder: |
|
865 |
msg = """ |
|
866 |
error: Atoms are not in the same order. |
|
867 |
|
|
868 |
Use --reorder to align the atoms (can be expensive for large structures). |
|
869 |
|
|
870 |
Please see --help or documentation for more information or |
|
871 |
https://github.com/charnley/rmsd for further examples. |
|
872 |
""" |
|
873 |
print(msg) |
|
874 |
exit() |
|
875 |
|
|
876 |
|
|
877 |
# Set local view |
|
878 |
p_view = None |
|
879 |
q_view = None |
|
880 |
|
|
881 |
|
|
882 |
if args.no_hydrogen: |
|
883 |
p_view = np.where(p_all_atoms != 'H') |
|
884 |
q_view = np.where(q_all_atoms != 'H') |
|
885 |
|
|
886 |
elif args.remove_idx: |
|
887 |
index = range(p_size) |
|
888 |
index = set(index) - set(args.remove_idx) |
|
889 |
index = list(index) |
|
890 |
p_view = index |
|
891 |
q_view = index |
|
892 |
|
|
893 |
elif args.add_idx: |
|
894 |
p_view = args.add_idx |
|
895 |
q_view = args.add_idx |
|
896 |
|
|
897 |
|
|
898 |
# Set local view |
|
899 |
if p_view is None: |
|
900 |
p_coord = copy.deepcopy(p_all) |
|
901 |
q_coord = copy.deepcopy(q_all) |
|
902 |
p_atoms = copy.deepcopy(p_all_atoms) |
|
903 |
q_atoms = copy.deepcopy(q_all_atoms) |
|
904 |
|
|
905 |
else: |
|
906 |
|
|
907 |
if args.reorder and args.output: |
|
908 |
print("error: Cannot reorder atoms and print structure, when excluding atoms (such as --no-hydrogen)") |
|
909 |
quit() |
|
910 |
|
|
911 |
if args.use_reflections and args.output: |
|
912 |
print("error: Cannot use reflections on atoms and print, when excluding atoms (such as --no-hydrogen)") |
|
913 |
quit() |
|
914 |
|
|
915 |
p_coord = copy.deepcopy(p_all[p_view]) |
|
916 |
q_coord = copy.deepcopy(q_all[q_view]) |
|
917 |
p_atoms = copy.deepcopy(p_all_atoms[p_view]) |
|
918 |
q_atoms = copy.deepcopy(q_all_atoms[q_view]) |
Formats disponibles : Unified diff