Révision 86112fec

b/DockOnSurf_path
1
#####################################################
2
# Please change here the paths to your directories: #
3
#####################################################
4

  
5

  
6
# Directory of the DockOnSurf script:
7
export DockOnSurf_path="/home/sblanck/DockOnSurf_hematite"
8

  
9
# Directory where you want to get the results for the molecule alone:
10
# The script will automatically create a subdirectory with the name of the studied molecule
11
export Molecule_results_path="/home/sblanck/cp2k/hematite_molecules"
12

  
13
# Directory where you want to get the results for the asorption structures:
14
# The script will automatically create a subdirectory with the name of the studied molecule
15
export MolOnSurf_results_path="/home/sblanck/cp2k/hematite"
16

  
17
# Directory where your surface xyz files are stored:
18
export Surface_path="/home/sblanck/DockOnSurf_hematite/surfaces"
19

  
20
# Directory where your molecule mol files are stored:
21
export Molecule_path="/home/sblanck/DockOnSurf_hematite/molecules"
22

  
23
# Input file for CP2K calculation for molecule alone:
24
# Please check that the PROJECT_NAME is "molecule"
25
export CP2K_input_molecule="/home/sblanck/DockOnSurf_hematite/CP2K_input/molecule.inp"
26

  
27
# Input file for CP2K calculation for molecule on the surface:
28
# Please check that the PROJECT_NAME is "surf_molecule"
29
export CP2K_input_MolOnSurf="/home/sblanck/DockOnSurf_hematite/CP2K_input/adsorption.inp"
30

  
31
# Submission script file for CP2K calculations:
32
# Please check that the name of the Job is "XXXX"
33
export CP2K_sub="/home/sblanck/DockOnSurf_hematite/CP2K_input/gammakpoint.j"
b/global_script.sh
1
#!/bin/bash
2

  
3
user="$(pwd | awk -F"/" '{print $3}')"
4

  
5
file_path="$(find /home/${user} -name "DockOnSurf_path")"
6

  
7
### Mise en place de l'arborescence
8

  
9
source ${file_path}
10

  
11
### Récupération des vecteurs de cellule
12

  
13
a1="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $2}')"
14
if [[ $a1 =~ "E+00" ]]; then a1="$(echo $a1 | cut -d "E" -f1)"; fi ; export a1
15
a2="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $3}')"
16
if [[ $a2 =~ "E+00" ]]; then a2="$(echo $a2 | cut -d "E" -f1)"; fi ; export a2
17
a3="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $4}')"
18
if [[ $a3 =~ "E+00" ]]; then a3="$(echo $a3 | cut -d "E" -f1)"; fi ; export a3
19
b1="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $2}')"
20
if [[ $b1 =~ "E+00" ]]; then b1="$(echo $b1 | cut -d "E" -f1)"; fi ; export b1
21
b2="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $3}')"
22
if [[ $b2 =~ "E+00" ]]; then b2="$(echo $b2 | cut -d "E" -f1)"; fi ; export b2
23
b3="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $4}')"
24
if [[ $b3 =~ "E+00" ]]; then b3="$(echo $b3 | cut -d "E" -f1)"; fi ; export b3
25
c1="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $2}')"
26
if [[ $c1 =~ "E+00" ]]; then c1="$(echo $c1 | cut -d "E" -f1)"; fi ; export c1
27
c2="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $3}')"
28
if [[ $c2 =~ "E+00" ]]; then c2="$(echo $c2 | cut -d "E" -f1)"; fi ; export c2
29
c3="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $4}')"
30
if [[ $c3 =~ "E+00" ]]; then c3="$(echo $c3 | cut -d "E" -f1)"; fi ; export c3
31

  
32
### Paramètres initiaux
33

  
34
echo 'Name of the molecule (without .mol) '
35
read molecule
36

  
37
echo '' 
38

  
39
module load rdkit
40

  
41
### Generation des conformeres
42

  
43
${DockOnSurf_path}/modules/generation_conformeres.py $molecule 5000
44
echo ' -- CONFORMERS HAVE BEEN GENERATED --'
45

  
46
### Conversion des fichiers .mol en .xyz
47

  
48
${DockOnSurf_path}/modules/mol_to_xyz.sh $molecule
49

  
50
### Lancement des calculs pour les molecules seules
51

  
52
${DockOnSurf_path}/modules/launch_cp2k_molecule_seule.sh $molecule
53

  
54
### Attente jusqu'à ce que tous les calculs aient fini
55

  
56
mol="$(echo $molecule | cut -c1-10)"
57
while [ "$(qstat | grep $mol | wc -l)" != 0 ];
58
do 
59
	sleep 300
60
done
61

  
62
echo ' -- ALL CALCULATIONS FOR MOLECULE ALONE HAVE BEEN DONE --'
63

  
64
### Suppression des fichiers inutiles
65

  
66
rm -r ${Molecule_results_path}/${molecule}/*/*RESTART*
67
rm -r ${Molecule_results_path}/${molecule}/*/*restart*
68
rm -r ${Molecule_results_path}/${molecule}/*/*BFGS*
69
rm -r ${Molecule_results_path}/${molecule}/*/*.p*
70

  
71
echo ' '
72
echo ' -- RESULTS --'
73

  
74
min_energy_molecule=0
75
for struct in ${Molecule_results_path}/${molecule}/${molecule}*; do
76
	energy_molecule="$(awk '/ENERGY/{E=$NF} END{print E}' ${struct}/*${molecule}.out)"
77
	if [[ "${energy_molecule}" > "${min_energy_molecule}" ]]
78
	then 
79
		min_energy_molecule=${energy_molecule}
80
		numero_mol_stable="$(echo $struct | awk -F'/' '{print $NF}')"
81
	fi
82
done
83
echo "   Most stable structure for molecule alone: ${numero_mol_stable} with energy ${min_energy_molecule} Ha"
84

  
85
echo ' ' 
86
echo ' -- ARGUMENTS FOR ADSORPTION -- '
87
echo ' '
88
echo 'Name of the file to use for the surface (without .xyz) '
89
read surface
90
echo 'Number of the central atom of the molecule'
91
read center
92
read -a list_atom_1_mol -p 'List of atoms of the molecule to adsorb '
93
read -a list_atom_2_mol -p 'List of neighboring atoms of the molecule (linked to the atoms to adsorb) '
94
nombre_atom_mol=${#list_atom_1_mol[@]}
95
if [ ${#list_atom_2_mol[@]} != ${nombre_atom_mol} ]
96
then
97
	echo "  Problem: number of atoms different in the two lists"
98
	exit
99
fi
100
read -a list_atom_surf -p 'List of the atoms of the surface that can be adsorption sites '
101
echo 'Value of cutoff energy for reoptimisation (eV)'
102
read cutoff
103
echo '' 
104

  
105
### Adsorption de la molecule sur la surface
106

  
107
${DockOnSurf_path}/modules/script_add_adsorbate+diss.sh $molecule $surface "${list_atom_1_mol[*]}" "${list_atom_2_mol[*]}" "${list_atom_surf[*]}"
108

  
109
echo ' -- ADSORPTION SUTRUCTURES HAVE BEEN CALCULATED --'
110

  
111
# Changes labels of elements to its original definition, thus allowing different kinds of same element (Fe1 Fe2) with different properties (e.g. spin state in compounds with magnetic coupling) 
112

  
113
${DockOnSurf_path}/modules/fe_change.sh $molecule $surface ${MolOnSurf_results_path}/${molecule}
114

  
115
### Lancement des calculs pour les molecules adsorbees
116

  
117
${DockOnSurf_path}/modules/launch_script+diss.sh $molecule
118

  
119
### Attente jusqu'à ce que tous les calculs aient fini
120

  
121
mol2="$(echo $molecule | cut -c1-5)"
122
while [ "$(qstat | grep surf_${mol2} | wc -l)" != 0 ];
123
do 
124
	sleep 300
125
done
126

  
127
echo ' -- ALL CALCULATIONS FOR ADSORPTION STRUCTURES HAVE BEEN DONE --'
128

  
129
### Suppression des fichiers inutiles
130

  
131
rm -r ${MolOnSurf_results_path}/${molecule}/*/*RESTART*
132
rm -r ${MolOnSurf_results_path}/${molecule}/*/*BFGS*
133
rm -r ${MolOnSurf_results_path}/${molecule}/*/*.p*
134

  
135
### Clustering et lancement des structures centres des clusters
136

  
137
nb_at_surf="$(awk 'NR==1{print}' ${Surface_path}/${surface}.xyz)"
138
num_center="$(expr ${nb_at_surf} + $center )"
139

  
140
${DockOnSurf_path}/modules/script_grandes_molecules+diss.sh $molecule $num_center ${nb_at_surf} ${cutoff}
141

  
142
echo ' -- CLUSTERING HAS BEEN DONE AND CLUSTER CENTERS CALCULATIONS HAVE BEEN LAUNCHED --'
143

  
144
### Attente jusqu'à ce que tous les calculs aient fini
145

  
146
while [ "$(qstat | grep surf_${mol2} | wc -l)" != 0 ];
147
do
148
        sleep 300
149
done
150

  
151
echo ' -- ALL CLUSTER CENTERS CALCULATIONS HAVE BEEN DONE --'
152

  
153

  
154
echo " "
155
echo ' -- RESULTS --'
156

  
157
echo ' '
158
echo "Most stable structure for molecule alone: ${numero_mol_stable} with energy ${min_energy_molecule} Ha"
159

  
160
min_energy_global=0
161
for global_struct in ${MolOnSurf_results_path}/${molecule}/relaunched_calculations/* ; do
162
	if [[ "$global_struct" != *.inp ]]
163
	then
164
		if [[ "$global_struct" != *.j ]]
165
		then 
166
			energy_global="$(awk '/ENERGY/{E=$NF} END{print E}' ${global_struct}/*${molecule}.out)"
167
			if [[ "${energy_global}" > "${min_energy_global}" ]]
168
			then
169
				min_energy_global=${energy_global}
170
				numero_global_stable="$(echo ${global_struct} | awk -F'/' '{print $NF}')"
171
			fi
172
		fi
173
	fi
174
done
175

  
176
echo "Most stable structure for adsorption: ${numero_global_stable} with energy ${min_energy_global} Ha"
177

  
178

  
b/modules/add_adsorbate_euler+dissociation.py
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3

  
4
import sys
5
import numpy as np
6
import ase
7
from ase import io
8
from ase.io import read
9
from ase.io import write
10
from ase import Atom
11
from ase import build
12
from ase.build import add_adsorbate
13
from ase import constraints
14
from ase.constraints import FixAtoms
15
from ase import neighborlist
16
from ase.neighborlist import NeighborList
17
from ase import utils
18
import ase_Fe1_Fe2
19

  
20
if len(sys.argv) < 7 :
21
    print "This script has to be used with at least 6 arguments."
22
    print "1. Name of the file containing the surface"
23
    print "2. Atom number of the atom of the surface where the adsorbate will be adsorbed"
24
    print "3. Name of the file containing the molecule to be adsorbed"
25
    print "4. Atom number of the atom of the molecule that will be adsorbed of the surface"
26
    print "5. Distance between the molecule and the surface"
27
    print "6. Name of the output file"
28
    print "Other arguments can be added before the name of the output file :"
29
    print "  Second atom of the molecule"
30
    print "  phi angle (rotation around z)"
31
    print "  theta angle (rotation around new x)"
32
    print "  psi angle (rotation around new z) "
33
    sys.exit(1)
34

  
35

  
36
#############################################################
37
# Définition de la fonction permettant de calculer les angles
38
#############################################################
39

  
40
def get_proper_angle(v1, v2, degrees=True):
41
    norm_dot = np.dot(v1/np.linalg.norm(v1), v2/np.linalg.norm(v2))
42
    angle = np.arccos(np.sign(norm_dot) if np.abs(norm_dot) >= 1 else norm_dot)
43
    return(angle*180/np.pi if degrees else angle)
44

  
45

  
46
##########################################
47
# Lecture des différents arguments de base
48
##########################################
49

  
50
surface_file = sys.argv[1]
51
atom_surface = int(sys.argv[2])
52
molecule_file = sys.argv[3]
53
atom_molecule = int(sys.argv[4])
54
distance = int(sys.argv[5])
55
if len(sys.argv) < 8 :
56
    output = sys.argv[6]
57

  
58
surface = read(surface_file)
59
molecule = read(molecule_file)
60

  
61
#################################
62
# Lecture des positions de atomes
63
#################################
64

  
65
atom_surf_1 = surface[atom_surface].position
66
x_atom_surf_1 = atom_surf_1[0]
67
y_atom_surf_1 = atom_surf_1[1]
68
z_atom_surf_1 = atom_surf_1[2]
69

  
70
atom_mol_1 = molecule[atom_molecule].position
71
x_atom_mol_1 = atom_mol_1[0]
72
y_atom_mol_1 = atom_mol_1[1]
73
z_atom_mol_1 = atom_mol_1[2]
74

  
75
nb_atom_mol = int(len(molecule))
76

  
77

  
78
#######################################################
79
# Cas avec un plus grand nombre d'arguments (rotations)
80
#######################################################
81

  
82

  
83
if len(sys.argv) > 7 :
84

  
85
    #######################
86
    # Lecture des arguments
87
    #######################
88

  
89
    atom_molecule_2 = int(sys.argv[6])
90
    phi = int(sys.argv[7])
91
    theta = int(sys.argv[8])
92
    psi = int(sys.argv[9])
93
    output = sys.argv[10]
94

  
95
    theta_min = theta - 0.01
96
    theta_max = theta + 0.01
97
    phi_min = phi - 0.01
98
    phi_max = phi + 0.01
99
    psi_min = psi - 0.01
100
    psi_max = psi + 0.01
101

  
102
    atom_mol_2 = molecule[atom_molecule_2].position
103
    x_atom_mol_2 = atom_mol_2[0]
104
    y_atom_mol_2 = atom_mol_2[1]
105
    z_atom_mol_2 = atom_mol_2[2]
106

  
107
    ##################################
108
    # Rotation avec les angles d'Euler
109
    ##################################
110

  
111
    #Initialisation : placement du second atome de la molecule sur Oy
112
    vect_y = [0, 1, 0]
113
    vect_atom_mol_2 = molecule[atom_molecule_2].position - molecule[atom_molecule].position
114
    angle_1 = get_proper_angle(vect_y, vect_atom_mol_2)
115
    if (angle_1 != 0) :
116
        vect_normal = np.cross(vect_y, vect_atom_mol_2)
117
        molecule.rotate(-angle_1, vect_normal, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1))
118
        vect_atom_mol_2_verif = molecule[atom_molecule_2].position - molecule[atom_molecule].position
119
        angle_1_verif = get_proper_angle(vect_atom_mol_2_verif, vect_y)
120
        angle_max = 0.01
121
        angle_min = -0.01
122
        if (angle_1_verif < angle_min or angle_1_verif > angle_max) :
123
            print 'Error in initialisation'
124

  
125
    #Rotation Euler
126
    molecule.euler_rotate(phi, theta, psi, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1))
127

  
128
    #Correction des collisions entre la molécule et la surface
129
    z_atom_surf_max = z_atom_surf_1 + 1
130

  
131
    collision = 0.5
132
    rotation_tot = 0
133
    nb_atom_collision_min = 0
134
    rotation_opt = 0
135
    rotation_modif = 5
136
    
137
    for i in range(0,nb_atom_mol) :
138
        atom_test = molecule[i].position
139
        z_atom_test = atom_test[2]
140
        z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance
141
        if (z_atom_test_final < z_atom_surf_max) :
142
            collision = 1
143
            nb_atom_collision_min += 1
144
    if collision == 1 :
145
        print 'Collision between the molecule and the surface - modification of theta angle'
146

  
147
    vect_z = [0, 0, 1]
148

  
149
    while (collision == 1 and rotation_tot < 354) :
150
        vect_atom_mol_2_post_euler = molecule[atom_molecule_2].position - molecule[atom_molecule].position
151
        
152
        if (vect_atom_mol_2_post_euler[0] != 0 and vect_atom_mol_2_post_euler[1] != 0) :
153
            vect_rotation_theta = np.cross(vect_atom_mol_2_post_euler, vect_z)
154
        else :
155
            vect_rotation_theta = [0.0001, 0.0001 , 0]
156
        molecule.rotate(rotation_modif, vect_rotation_theta, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1))
157
        rotation_tot += 5
158
        
159
        collision = 0
160
        nb_atom_collision = 0
161

  
162
        for i in range(0,nb_atom_mol) :
163
            atom_test = molecule[i].position
164
            z_atom_test = atom_test[2]
165
            z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance
166
            if (z_atom_test_final < z_atom_surf_max) :
167
                nb_atom_collision += 1
168
                collision = 1
169
        if (nb_atom_collision < nb_atom_collision_min) :
170
            nb_atom_collision_min = nb_atom_collision
171
            rotation_opt = rotation_tot
172

  
173
        if nb_atom_collision_min != 0 :
174
            rotation_tot_2 = 0
175
            rotation_opt_2 = 0
176

  
177
            while (collision == 1 and rotation_tot_2 < 354) :
178
                vect_atom_mol_2_post_euler = molecule[atom_molecule_2].position - molecule[atom_molecule].position
179
                molecule.rotate(rotation_modif, vect_atom_mol_2_post_euler, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1))
180
                rotation_tot_2 += 5
181
                
182
                collision = 0
183
                nb_atom_collision = 0
184

  
185
                for i in range(0,nb_atom_mol) :
186
                    atom_test = molecule[i].position
187
                    z_atom_test = atom_test[2]
188
                    z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance
189
                    if (z_atom_test_final < z_atom_surf_max) :
190
                        nb_atom_collision += 1
191
                        collision = 1
192
                if (nb_atom_collision < nb_atom_collision_min) :
193
                    nb_atom_collision_min = nb_atom_collision
194
                    rotation_opt_2 = rotation_tot_2
195

  
196
    if collision == 0 :
197
        print 'Collision corrected'
198
    elif collision == 1 :
199
        print 'Error: the collision could not be corrected'
200
        pos1 = output.rfind('/')
201
        pos = pos1 + 1
202
        num_coll=output[pos:]
203
        j = output[:pos1]
204
        pos2 = j.rfind('/')
205
        molecule_directory = j[:pos2]
206
        fichier = open("%s/errors" % molecule_directory , "a")
207
        fichier.write("Adsorption %s : ERROR the collision could not be corrected\n" % num_coll )
208
        fichier.close()
209

  
210

  
211
##########################################
212
# Adsorption de la molecule sur la surface
213
##########################################
214

  
215
add_adsorbate(surface, molecule, distance, (x_atom_surf_1,y_atom_surf_1), mol_index=atom_molecule)
216

  
217
out=output+".xyz"
218
write(out, surface)
219

  
220

  
221
############################
222
# Dissociation si necessaire
223
############################
224

  
225
if collision == 0 :
226
    surface = read(surface_file)
227
    molecule_reminder = molecule
228
    
229
    list_cutoffs = utils.natural_cutoffs(molecule)
230
    nl=NeighborList(list_cutoffs, self_interaction=False, bothways=True)
231

  
232
    nl.update(molecule)
233
    neighbor_indices, offsets = nl.get_neighbors(atom_molecule)
234

  
235
    symbols = molecule.get_chemical_symbols()
236

  
237
    neighbors_symbols=[]
238
    for i in neighbor_indices:
239
            neighbors_symbols.append(symbols[i])
240

  
241
    nb_neighbors = len(neighbor_indices)
242

  
243
    diss=0
244
    for i in range(0,nb_neighbors):
245
        if neighbors_symbols[i] == "H":
246
            diss=1
247
            H_atom=neighbor_indices[i] #nb of the H atom
248
            print "Dissociation possible"
249

  
250
    if diss == 1:
251
        list_cutoffs_surface = utils.natural_cutoffs(surface)
252
        nl2=NeighborList(list_cutoffs_surface, self_interaction=False, bothways=True)
253
        nl2.update(surface)
254
        neighbor_indices_surf, offsets_surf = nl2.get_neighbors(atom_surface)
255
        symbols_surf = surface.get_chemical_symbols()
256
        for i in neighbor_indices_surf:
257
            surface = read(surface_file)
258
            molecule = molecule_reminder
259
            neighbor_surf_coord = surface[i].position
260
            coord_H_atom=molecule[H_atom].position
261
            if (neighbor_surf_coord[2] > z_atom_surf_1-1 and symbols_surf[i] == "O"):
262
                vector_H_diss=[neighbor_surf_coord[0]-coord_H_atom[0], neighbor_surf_coord[1]-coord_H_atom[1], neighbor_surf_coord[2]+1-coord_H_atom[2]]
263
                translation_matrix_dissociation=[]
264
                for k in range (0, nb_atom_mol):
265
                    if k != H_atom:
266
                        translation_matrix_dissociation.append([0,0,0])
267
                    else:
268
                        translation_matrix_dissociation.append(vector_H_diss)
269
                molecule.translate(translation_matrix_dissociation)
270
                add_adsorbate(surface, molecule, distance, (x_atom_surf_1,y_atom_surf_1), mol_index=atom_molecule)
271
                output_diss = output + "_diss_" + str(i) + ".xyz"
272
                write(output_diss, surface)
273
    
274

  
275

  
276

  
277

  
278

  
b/modules/ase_Fe1_Fe2.py
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3

  
4

  
5
import ase
6
from ase import Atom
7
from ase import data
8
import numpy as np
9

  
10
# Chemical symbols of new types of atoms
11
data.chemical_symbols += ['Fe1', 'Fe2']
12
data.chemical_symbols += ['O1']
13

  
14
# Atomic numbers
15
Z_Fe = data.atomic_numbers['Fe']
16
Z_O = data.atomic_numbers['O']
17
data.atomic_numbers['Fe1'] = Z_Fe
18
data.atomic_numbers['Fe2'] = Z_Fe
19
data.atomic_numbers['O1'] = Z_O
20

  
21
# Atomic names
22
data.atomic_names += ['Iron', 'Iron']
23
data.atomic_names += ['Oxygen'] 
24

  
25
# Atomic masses
26
Fe_mass = data.atomic_masses_iupac2016[Z_Fe]
27
O_mass = data.atomic_masses_iupac2016[Z_O] 
28
data.atomic_masses_iupac2016 = np.concatenate((data.atomic_masses_iupac2016, np.array([Fe_mass,Fe_mass])))
29
data.atomic_masses_iupac2016 = np.concatenate((data.atomic_masses_iupac2016, np.array([O_mass])))
30

  
31
data.atomic_masses = data.atomic_masses_iupac2016
32

  
33
Fe_mass = data.atomic_masses_legacy[Z_Fe]
34
O_mass = data.atomic_masses_legacy[Z_O]
35
data.atomic_masses_legacy = np.concatenate((data.atomic_masses_legacy, np.array([Fe_mass,Fe_mass])))
36
data.atomic_masses_legacy = np.concatenate((data.atomic_masses_legacy, np.array([O_mass])))
37

  
38
# Atomic radius
39
Fe_radius = data.covalent_radii[Z_Fe]
40
O_radius = data.covalent_radii[Z_O]
41
data.covalent_radii = np.concatenate((data.covalent_radii, np.array([Fe_radius,Fe_radius])))
42
data.covalent_radii = np.concatenate((data.covalent_radii, np.array([O_radius])))
43

  
44
# Reference and ground state datas
45
data.reference_states += [data.reference_states[Z_Fe], data.reference_states[Z_Fe]]
46
data.reference_states += [data.reference_states[Z_O]]
47

  
48
Fe_gsmm = data.ground_state_magnetic_moments[Z_Fe]
49
O_gsmm = data.ground_state_magnetic_moments[Z_O]
50

  
51
data.ground_state_magnetic_moments = np.concatenate((data.ground_state_magnetic_moments, np.array([Fe_gsmm,Fe_gsmm])))
52
data.ground_state_magnetic_moments = np.concatenate((data.ground_state_magnetic_moments, np.array([O_gsmm])))
53

  
54
# VdW raduis
55
Fe_vdw_r = data.vdw_radii[Z_Fe]
56
O_vdw_r = data.vdw_radii[Z_O]
57
data.vdw_radii = np.concatenate((data.vdw_radii, np.array([Fe_vdw_r,Fe_vdw_r])))
58
data.vdw_radii = np.concatenate((data.vdw_radii, np.array([O_vdw_r])))
b/modules/calculate_rmsd
1
#!/usr/bin/env python
2
__doc__ = \
3
"""
4
Calculate Root-mean-square deviation (RMSD) between structure A and B, in XYZ
5
or PDB format, using transformation and rotation.
6

  
7
For more information, usage, example and citation read more at
8
https://github.com/charnley/rmsd
9
"""
10

  
11
__version__ = '1.3.2'
12

  
13
import copy
14
import re
15

  
16
import numpy as np
17
from scipy.optimize import linear_sum_assignment
18
from scipy.spatial.distance import cdist
19

  
20

  
21
AXIS_SWAPS = np.array([
22
    [0, 1, 2],
23
    [0, 2, 1],
24
    [1, 0, 2],
25
    [1, 2, 0],
26
    [2, 1, 0],
27
    [2, 0, 1]])
28

  
29
AXIS_REFLECTIONS = np.array([
30
    [1, 1, 1],
31
    [-1, 1, 1],
32
    [1, -1, 1],
33
    [1, 1, -1],
34
    [-1, -1, 1],
35
    [-1, 1, -1],
36
    [1, -1, -1],
37
    [-1, -1, -1]])
38

  
39

  
40
def rmsd(V, W):
41
    """
42
    Calculate Root-mean-square deviation from two sets of vectors V and W.
43

  
44
    Parameters
45
    ----------
46
    V : array
47
        (N,D) matrix, where N is points and D is dimension.
48
    W : array
49
        (N,D) matrix, where N is points and D is dimension.
50

  
51
    Returns
52
    -------
53
    rmsd : float
54
        Root-mean-square deviation between the two vectors
55
    """
56
    D = len(V[0])
57
    N = len(V)
58
    result = 0.0
59
    for v, w in zip(V, W):
60
        result += sum([(v[i] - w[i])**2.0 for i in range(D)])
61
    return np.sqrt(result/N)
62

  
63

  
64
def kabsch_rmsd(P, Q, translate=False):
65
    """
66
    Rotate matrix P unto Q using Kabsch algorithm and calculate the RMSD.
67

  
68
    Parameters
69
    ----------
70
    P : array
71
        (N,D) matrix, where N is points and D is dimension.
72
    Q : array
73
        (N,D) matrix, where N is points and D is dimension.
74
    translate : bool
75
        Use centroids to translate vector P and Q unto each other.
76

  
77
    Returns
78
    -------
79
    rmsd : float
80
        root-mean squared deviation
81
    """
82
    if translate:
83
        Q = Q - centroid(Q)
84
        P = P - centroid(P)
85

  
86
    P = kabsch_rotate(P, Q)
87
    return rmsd(P, Q)
88

  
89

  
90
def kabsch_rotate(P, Q):
91
    """
92
    Rotate matrix P unto matrix Q using Kabsch algorithm.
93

  
94
    Parameters
95
    ----------
96
    P : array
97
        (N,D) matrix, where N is points and D is dimension.
98
    Q : array
99
        (N,D) matrix, where N is points and D is dimension.
100

  
101
    Returns
102
    -------
103
    P : array
104
        (N,D) matrix, where N is points and D is dimension,
105
        rotated
106

  
107
    """
108
    U = kabsch(P, Q)
109

  
110
    # Rotate P
111
    P = np.dot(P, U)
112
    return P
113

  
114

  
115
def kabsch(P, Q):
116
    """
117
    Using the Kabsch algorithm with two sets of paired point P and Q, centered
118
    around the centroid. Each vector set is represented as an NxD
119
    matrix, where D is the the dimension of the space.
120

  
121
    The algorithm works in three steps:
122
    - a centroid translation of P and Q (assumed done before this function
123
      call)
124
    - the computation of a covariance matrix C
125
    - computation of the optimal rotation matrix U
126

  
127
    For more info see http://en.wikipedia.org/wiki/Kabsch_algorithm
128

  
129
    Parameters
130
    ----------
131
    P : array
132
        (N,D) matrix, where N is points and D is dimension.
133
    Q : array
134
        (N,D) matrix, where N is points and D is dimension.
135

  
136
    Returns
137
    -------
138
    U : matrix
139
        Rotation matrix (D,D)
140
    """
141

  
142
    # Computation of the covariance matrix
143
    C = np.dot(np.transpose(P), Q)
144

  
145
    # Computation of the optimal rotation matrix
146
    # This can be done using singular value decomposition (SVD)
147
    # Getting the sign of the det(V)*(W) to decide
148
    # whether we need to correct our rotation matrix to ensure a
149
    # right-handed coordinate system.
150
    # And finally calculating the optimal rotation matrix U
151
    # see http://en.wikipedia.org/wiki/Kabsch_algorithm
152
    V, S, W = np.linalg.svd(C)
153
    d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
154

  
155
    if d:
156
        S[-1] = -S[-1]
157
        V[:, -1] = -V[:, -1]
158

  
159
    # Create Rotation matrix U
160
    U = np.dot(V, W)
161

  
162
    return U
163

  
164

  
165
def quaternion_rmsd(P, Q):
166
    """
167
    Rotate matrix P unto Q and calculate the RMSD
168
    based on doi:10.1016/1049-9660(91)90036-O
169

  
170
    Parameters
171
    ----------
172
    P : array
173
        (N,D) matrix, where N is points and D is dimension.
174
    Q : array
175
        (N,D) matrix, where N is points and D is dimension.
176

  
177
    Returns
178
    -------
179
    rmsd : float
180
    """
181
    rot = quaternion_rotate(P, Q)
182
    P = np.dot(P, rot)
183
    return rmsd(P, Q)
184

  
185

  
186
def quaternion_transform(r):
187
    """
188
    Get optimal rotation
189
    note: translation will be zero when the centroids of each molecule are the
190
    same
191
    """
192
    Wt_r = makeW(*r).T
193
    Q_r = makeQ(*r)
194
    rot = Wt_r.dot(Q_r)[:3, :3]
195
    return rot
196

  
197

  
198
def makeW(r1, r2, r3, r4=0):
199
    """
200
    matrix involved in quaternion rotation
201
    """
202
    W = np.asarray([
203
        [r4, r3, -r2, r1],
204
        [-r3, r4, r1, r2],
205
        [r2, -r1, r4, r3],
206
        [-r1, -r2, -r3, r4]])
207
    return W
208

  
209

  
210
def makeQ(r1, r2, r3, r4=0):
211
    """
212
    matrix involved in quaternion rotation
213
    """
214
    Q = np.asarray([
215
        [r4, -r3, r2, r1],
216
        [r3, r4, -r1, r2],
217
        [-r2, r1, r4, r3],
218
        [-r1, -r2, -r3, r4]])
219
    return Q
220

  
221

  
222
def quaternion_rotate(X, Y):
223
    """
224
    Calculate the rotation
225

  
226
    Parameters
227
    ----------
228
    X : array
229
        (N,D) matrix, where N is points and D is dimension.
230
    Y: array
231
        (N,D) matrix, where N is points and D is dimension.
232

  
233
    Returns
234
    -------
235
    rot : matrix
236
        Rotation matrix (D,D)
237
    """
238
    N = X.shape[0]
239
    W = np.asarray([makeW(*Y[k]) for k in range(N)])
240
    Q = np.asarray([makeQ(*X[k]) for k in range(N)])
241
    Qt_dot_W = np.asarray([np.dot(Q[k].T, W[k]) for k in range(N)])
242
    W_minus_Q = np.asarray([W[k] - Q[k] for k in range(N)])
243
    A = np.sum(Qt_dot_W, axis=0)
244
    eigen = np.linalg.eigh(A)
245
    r = eigen[1][:, eigen[0].argmax()]
246
    rot = quaternion_transform(r)
247
    return rot
248

  
249

  
250
def centroid(X):
251
    """
252
    Centroid is the mean position of all the points in all of the coordinate
253
    directions, from a vectorset X.
254

  
255
    https://en.wikipedia.org/wiki/Centroid
256

  
257
    C = sum(X)/len(X)
258

  
259
    Parameters
260
    ----------
261
    X : array
262
        (N,D) matrix, where N is points and D is dimension.
263

  
264
    Returns
265
    -------
266
    C : float
267
        centroid
268
    """
269
    C = X.mean(axis=0)
270
    return C
271

  
272

  
273
def reorder_distance(p_atoms, q_atoms, p_coord, q_coord):
274
    """
275
    Re-orders the input atom list and xyz coordinates by atom type and then by
276
    distance of each atom from the centroid.
277

  
278
    Parameters
279
    ----------
280
    atoms : array
281
        (N,1) matrix, where N is points holding the atoms' names
282
    coord : array
283
        (N,D) matrix, where N is points and D is dimension
284

  
285
    Returns
286
    -------
287
    atoms_reordered : array
288
        (N,1) matrix, where N is points holding the ordered atoms' names
289
    coords_reordered : array
290
        (N,D) matrix, where N is points and D is dimension (rows re-ordered)
291
    """
292

  
293
    # Find unique atoms
294
    unique_atoms = np.unique(p_atoms)
295

  
296
    # generate full view from q shape to fill in atom view on the fly
297
    view_reorder = np.zeros(q_atoms.shape, dtype=int)
298

  
299
    for atom in unique_atoms:
300

  
301
        p_atom_idx, = np.where(p_atoms == atom)
302
        q_atom_idx, = np.where(q_atoms == atom)
303

  
304
        A_coord = p_coord[p_atom_idx]
305
        B_coord = q_coord[q_atom_idx]
306

  
307
        # Calculate distance from each atom to centroid
308
        A_norms = np.linalg.norm(A_coord, axis=1)
309
        B_norms = np.linalg.norm(B_coord, axis=1)
310

  
311
        reorder_indices_A = np.argsort(A_norms)
312
        reorder_indices_B = np.argsort(B_norms)
313

  
314
        # Project the order of P onto Q
315
        translator = np.argsort(reorder_indices_A)
316
        view = reorder_indices_B[translator]
317
        view_reorder[p_atom_idx] = q_atom_idx[view]
318

  
319
    return view_reorder
320

  
321

  
322
def hungarian(A, B):
323
    """
324
    Hungarian reordering.
325

  
326
    Assume A and B are coordinates for atoms of SAME type only
327
    """
328

  
329
    # should be kabasch here i think
330
    distances = cdist(A, B, 'euclidean')
331

  
332
    # Perform Hungarian analysis on distance matrix between atoms of 1st
333
    # structure and trial structure
334
    indices_a, indices_b = linear_sum_assignment(distances)
335

  
336
    return indices_b
337

  
338

  
339
def reorder_hungarian(p_atoms, q_atoms, p_coord, q_coord):
340
    """
341
    Re-orders the input atom list and xyz coordinates using the Hungarian
342
    method (using optimized column results)
343

  
344
    Parameters
345
    ----------
346
    p_atoms : array
347
        (N,1) matrix, where N is points holding the atoms' names
348
    p_atoms : array
349
        (N,1) matrix, where N is points holding the atoms' names
350
    p_coord : array
351
        (N,D) matrix, where N is points and D is dimension
352
    q_coord : array
353
        (N,D) matrix, where N is points and D is dimension
354

  
355
    Returns
356
    -------
357
    view_reorder : array
358
             (N,1) matrix, reordered indexes of atom alignment based on the
359
             coordinates of the atoms
360

  
361
    """
362

  
363
    # Find unique atoms
364
    unique_atoms = np.unique(p_atoms)
365

  
366
    # generate full view from q shape to fill in atom view on the fly
367
    view_reorder = np.zeros(q_atoms.shape, dtype=int)
368
    view_reorder -= 1
369

  
370
    for atom in unique_atoms:
371
        p_atom_idx, = np.where(p_atoms == atom)
372
        q_atom_idx, = np.where(q_atoms == atom)
373

  
374
        A_coord = p_coord[p_atom_idx]
375
        B_coord = q_coord[q_atom_idx]
376

  
377
        view = hungarian(A_coord, B_coord)
378
        view_reorder[p_atom_idx] = q_atom_idx[view]
379

  
380
    return view_reorder
381

  
382

  
383
def generate_permutations(elements, n):
384
    """
385
    Heap's algorithm for generating all n! permutations in a list
386
    https://en.wikipedia.org/wiki/Heap%27s_algorithm
387

  
388
    """
389
    c = [0] * n
390
    yield elements
391
    i = 0
392
    while i < n:
393
        if c[i] < i:
394
            if i % 2 == 0:
395
                elements[0], elements[i] = elements[i], elements[0]
396
            else:
397
                elements[c[i]], elements[i] = elements[i], elements[c[i]]
398
            yield elements
399
            c[i] += 1
400
            i = 0
401
        else:
402
            c[i] = 0
403
            i += 1
404

  
405

  
406
def brute_permutation(A, B):
407
    """
408
    Re-orders the input atom list and xyz coordinates using the brute force
409
    method of permuting all rows of the input coordinates
410

  
411
    Parameters
412
    ----------
413
    A : array
414
        (N,D) matrix, where N is points and D is dimension
415
    B : array
416
        (N,D) matrix, where N is points and D is dimension
417

  
418
    Returns
419
    -------
420
    view : array
421
        (N,1) matrix, reordered view of B projected to A
422
    """
423

  
424
    rmsd_min = np.inf
425
    view_min = None
426

  
427
    # Sets initial ordering for row indices to [0, 1, 2, ..., len(A)], used in
428
    # brute-force method
429

  
430
    num_atoms = A.shape[0]
431
    initial_order = list(range(num_atoms))
432

  
433
    for reorder_indices in generate_permutations(initial_order, num_atoms):
434

  
435
        # Re-order the atom array and coordinate matrix
436
        coords_ordered = B[reorder_indices]
437

  
438
        # Calculate the RMSD between structure 1 and the Hungarian re-ordered
439
        # structure 2
440
        rmsd_temp = kabsch_rmsd(A, coords_ordered)
441

  
442
        # Replaces the atoms and coordinates with the current structure if the
443
        # RMSD is lower
444
        if rmsd_temp < rmsd_min:
445
            rmsd_min = rmsd_temp
446
            view_min = copy.deepcopy(reorder_indices)
447

  
448
    return view_min
449

  
450

  
451
def reorder_brute(p_atoms, q_atoms, p_coord, q_coord):
452
    """
453
    Re-orders the input atom list and xyz coordinates using all permutation of
454
    rows (using optimized column results)
455

  
456
    Parameters
457
    ----------
458
    p_atoms : array
459
        (N,1) matrix, where N is points holding the atoms' names
460
    q_atoms : array
461
        (N,1) matrix, where N is points holding the atoms' names
462
    p_coord : array
463
        (N,D) matrix, where N is points and D is dimension
464
    q_coord : array
465
        (N,D) matrix, where N is points and D is dimension
466

  
467
    Returns
468
    -------
469
    view_reorder : array
470
        (N,1) matrix, reordered indexes of atom alignment based on the
471
        coordinates of the atoms
472

  
473
    """
474

  
475
    # Find unique atoms
476
    unique_atoms = np.unique(p_atoms)
477

  
478
    # generate full view from q shape to fill in atom view on the fly
479
    view_reorder = np.zeros(q_atoms.shape, dtype=int)
480
    view_reorder -= 1
481

  
482
    for atom in unique_atoms:
483
        p_atom_idx, = np.where(p_atoms == atom)
484
        q_atom_idx, = np.where(q_atoms == atom)
485

  
486
        A_coord = p_coord[p_atom_idx]
487
        B_coord = q_coord[q_atom_idx]
488

  
489
        view = brute_permutation(A_coord, B_coord)
490
        view_reorder[p_atom_idx] = q_atom_idx[view]
491

  
492
    return view_reorder
493

  
494

  
495
def check_reflections(p_atoms, q_atoms, p_coord, q_coord,
496
                      reorder_method=reorder_hungarian,
497
                      rotation_method=kabsch_rmsd,
498
                      keep_stereo=False):
499
    """
500
    Minimize RMSD using reflection planes for molecule P and Q
501

  
502
    Warning: This will affect stereo-chemistry
503

  
504
    Parameters
505
    ----------
506
    p_atoms : array
507
        (N,1) matrix, where N is points holding the atoms' names
508
    q_atoms : array
509
        (N,1) matrix, where N is points holding the atoms' names
510
    p_coord : array
511
        (N,D) matrix, where N is points and D is dimension
512
    q_coord : array
513
        (N,D) matrix, where N is points and D is dimension
514

  
515
    Returns
516
    -------
517
    min_rmsd
518
    min_swap
519
    min_reflection
520
    min_review
521

  
522
    """
523

  
524
    min_rmsd = np.inf
525
    min_swap = None
526
    min_reflection = None
527
    min_review = None
528
    tmp_review = None
529
    swap_mask = [1,-1,-1,1,-1,1]
530
    reflection_mask = [1,-1,-1,-1,1,1,1,-1]
531

  
532
    for swap, i in zip(AXIS_SWAPS, swap_mask):
533
        for reflection, j in zip(AXIS_REFLECTIONS, reflection_mask):
534
            if keep_stereo and  i * j == -1: continue # skip enantiomers
535

  
536
            tmp_atoms = copy.copy(q_atoms)
537
            tmp_coord = copy.deepcopy(q_coord)
538
            tmp_coord = tmp_coord[:, swap]
539
            tmp_coord = np.dot(tmp_coord, np.diag(reflection))
540
            tmp_coord -= centroid(tmp_coord)
541

  
542
            # Reorder
543
            if reorder_method is not None:
544
                tmp_review = reorder_method(p_atoms, tmp_atoms, p_coord, tmp_coord)
545
                tmp_coord = tmp_coord[tmp_review]
546
                tmp_atoms = tmp_atoms[tmp_review]
547

  
548
            # Rotation
549
            if rotation_method is None:
550
                this_rmsd = rmsd(p_coord, tmp_coord)
551
            else:
552
                this_rmsd = rotation_method(p_coord, tmp_coord)
553

  
554
            if this_rmsd < min_rmsd:
555
                min_rmsd = this_rmsd
556
                min_swap = swap
557
                min_reflection = reflection
558
                min_review = tmp_review
559

  
560
    if not (p_atoms == q_atoms[min_review]).all():
561
        print("error: Not aligned")
562
        quit()
563

  
564
    return min_rmsd, min_swap, min_reflection, min_review
565

  
566

  
567
def set_coordinates(atoms, V, title="", decimals=8):
568
    """
569
    Print coordinates V with corresponding atoms to stdout in XYZ format.
570
    Parameters
571
    ----------
572
    atoms : list
573
        List of atomic types
574
    V : array
575
        (N,3) matrix of atomic coordinates
576
    title : string (optional)
577
        Title of molecule
578
    decimals : int (optional)
579
        number of decimals for the coordinates
580

  
581
    Return
582
    ------
583
    output : str
584
        Molecule in XYZ format
585

  
586
    """
587
    N, D = V.shape
588

  
589
    fmt = "{:2s}" + (" {:15."+str(decimals)+"f}")*3
590

  
591
    out = list()
592
    out += [str(N)]
593
    out += [title]
594

  
595
    for i in range(N):
596
        atom = atoms[i]
597
        atom = atom[0].upper() + atom[1:]
598
        out += [fmt.format(atom, V[i, 0], V[i, 1], V[i, 2])]
599

  
600
    return "\n".join(out)
601

  
602

  
603
def print_coordinates(atoms, V, title=""):
604
    """
605
    Print coordinates V with corresponding atoms to stdout in XYZ format.
606

  
607
    Parameters
608
    ----------
609
    atoms : list
610
        List of element types
611
    V : array
612
        (N,3) matrix of atomic coordinates
613
    title : string (optional)
614
        Title of molecule
615

  
616
    """
617

  
618
    print(set_coordinates(atoms, V, title=title))
619

  
620
    return
621

  
622

  
623
def get_coordinates(filename, fmt):
624
    """
625
    Get coordinates from filename in format fmt. Supports XYZ and PDB.
626
    Parameters
627
    ----------
628
    filename : string
629
        Filename to read
630
    fmt : string
631
        Format of filename. Either xyz or pdb.
632
    Returns
633
    -------
634
    atoms : list
635
        List of atomic types
636
    V : array
637
        (N,3) where N is number of atoms
638
    """
639
    if fmt == "xyz":
640
        get_func = get_coordinates_xyz
641
    elif fmt == "pdb":
642
        get_func = get_coordinates_pdb
643
    else:
644
        exit("Could not recognize file format: {:s}".format(fmt))
645

  
646
    return get_func(filename)
647

  
648

  
649
def get_coordinates_pdb(filename):
650
    """
651
    Get coordinates from the first chain in a pdb file
652
    and return a vectorset with all the coordinates.
653

  
654
    Parameters
655
    ----------
656
    filename : string
657
        Filename to read
658

  
659
    Returns
660
    -------
661
    atoms : list
662
        List of atomic types
663
    V : array
664
        (N,3) where N is number of atoms
665
    """
666

  
667
    # PDB files tend to be a bit of a mess. The x, y and z coordinates
668
    # are supposed to be in column 31-38, 39-46 and 47-54, but this is
669
    # not always the case.
670
    # Because of this the three first columns containing a decimal is used.
671
    # Since the format doesn't require a space between columns, we use the
672
    # above column indices as a fallback.
673

  
674
    x_column = None
675
    V = list()
676

  
677
    # Same with atoms and atom naming.
678
    # The most robust way to do this is probably
679
    # to assume that the atomtype is given in column 3.
680

  
681
    atoms = list()
682

  
683
    with open(filename, 'r') as f:
684
        lines = f.readlines()
685
        for line in lines:
686
            if line.startswith("TER") or line.startswith("END"):
687
                break
688
            if line.startswith("ATOM"):
689
                tokens = line.split()
690
                # Try to get the atomtype
691
                try:
692
                    atom = tokens[2][0]
693
                    if atom in ("H", "C", "N", "O", "S", "P"):
694
                        atoms.append(atom)
695
                    else:
696
                        # e.g. 1HD1
697
                        atom = tokens[2][1]
698
                        if atom == "H":
699
                            atoms.append(atom)
700
                        else:
701
                            raise Exception
702
                except:
703
                    exit("error: Parsing atomtype for the following line: \n{0:s}".format(line))
704

  
705
                if x_column == None:
706
                    try:
707
                        # look for x column
708
                        for i, x in enumerate(tokens):
709
                            if "." in x and "." in tokens[i + 1] and "." in tokens[i + 2]:
710
                                x_column = i
711
                                break
712
                    except IndexError:
713
                        exit("error: Parsing coordinates for the following line: \n{0:s}".format(line))
714
                # Try to read the coordinates
715
                try:
716
                    V.append(np.asarray(tokens[x_column:x_column + 3], dtype=float))
717
                except:
718
                    # If that doesn't work, use hardcoded indices
719
                    try:
720
                        x = line[30:38]
721
                        y = line[38:46]
722
                        z = line[46:54]
723
                        V.append(np.asarray([x, y ,z], dtype=float))
724
                    except:
725
                        exit("error: Parsing input for the following line: \n{0:s}".format(line))
726

  
727

  
728
    V = np.asarray(V)
729
    atoms = np.asarray(atoms)
730

  
731
    assert V.shape[0] == atoms.size
732

  
733
    return atoms, V
734

  
735

  
736
def get_coordinates_xyz(filename):
737
    """
738
    Get coordinates from filename and return a vectorset with all the
739
    coordinates, in XYZ format.
740

  
741
    Parameters
742
    ----------
743
    filename : string
744
        Filename to read
745

  
746
    Returns
747
    -------
748
    atoms : list
749
        List of atomic types
750
    V : array
751
        (N,3) where N is number of atoms
752
    """
753

  
754
    f = open(filename, 'r')
755
    V = list()
756
    atoms = list()
757
    n_atoms = 0
758

  
759
    # Read the first line to obtain the number of atoms to read
760
    try:
761
        n_atoms = int(f.readline())
762
    except ValueError:
763
        exit("error: Could not obtain the number of atoms in the .xyz file.")
764

  
765
    # Skip the title line
766
    f.readline()
767

  
768
    # Use the number of atoms to not read beyond the end of a file
769
    for lines_read, line in enumerate(f):
770

  
771
        if lines_read == n_atoms:
772
            break
773

  
774
        atom = re.findall(r'[a-zA-Z]+', line)[0]
775
        atom = atom.upper()
776

  
777
        numbers = re.findall(r'[-]?\d+\.\d*(?:[Ee][-\+]\d+)?', line)
778
        numbers = [float(number) for number in numbers]
779

  
780
        # The numbers are not valid unless we obtain exacly three
781
        if len(numbers) >= 3:
782
            V.append(np.array(numbers)[:3])
783
            atoms.append(atom)
784
        else:
785
            exit("Reading the .xyz file failed in line {0}. Please check the format.".format(lines_read + 2))
786

  
787
    f.close()
788
    atoms = np.array(atoms)
789
    V = np.array(V)
790
    return atoms, V
791

  
792

  
793
def main():
794

  
795
    import argparse
796
    import sys
797

  
798
    description = __doc__
799

  
800
    version_msg = """
801
rmsd {}
802

  
803
See https://github.com/charnley/rmsd for citation information
804

  
805
"""
806
    version_msg = version_msg.format(__version__)
807

  
808
    epilog = """
809
"""
810

  
811
    parser = argparse.ArgumentParser(
812
        usage='calculate_rmsd [options] FILE_A FILE_B',
813
        description=description,
814
        formatter_class=argparse.RawDescriptionHelpFormatter,
815
        epilog=epilog)
816

  
817

  
818
    # Input structures
819
    parser.add_argument('structure_a', metavar='FILE_A', type=str, help='structures in .xyz or .pdb format')
820
    parser.add_argument('structure_b', metavar='FILE_B', type=str)
821

  
822
    # Admin
823
    parser.add_argument('-v', '--version', action='version', version=version_msg)
824

  
825
    # Rotation
826
    parser.add_argument('-r', '--rotation', action='store', default="kabsch", help='select rotation method. "kabsch" (default), "quaternion" or "none"', metavar="METHOD")
827

  
828
    # Reorder arguments
829
    parser.add_argument('-e', '--reorder', action='store_true', help='align the atoms of molecules (default: Hungarian)')
830
    parser.add_argument('--reorder-method', action='store', default="hungarian", metavar="METHOD", help='select which reorder method to use; hungarian (default), brute, distance')
831
    parser.add_argument('--use-reflections', action='store_true', help='scan through reflections in planes (eg Y transformed to -Y -> X, -Y, Z) and axis changes, (eg X and Z coords exchanged -> Z, Y, X). This will affect stereo-chemistry.')
832
    parser.add_argument('--use-reflections-keep-stereo', action='store_true', help='scan through reflections in planes (eg Y transformed to -Y -> X, -Y, Z) and axis changes, (eg X and Z coords exchanged -> Z, Y, X). Stereo-chemistry will be kept.')
833

  
834
    # Filter
835
    index_group = parser.add_mutually_exclusive_group()
836
    index_group.add_argument('-nh', '--no-hydrogen', action='store_true', help='ignore hydrogens when calculating RMSD')
837
    index_group.add_argument('--remove-idx', nargs='+', type=int, help='index list of atoms NOT to consider', metavar='IDX')
838
    index_group.add_argument('--add-idx', nargs='+', type=int, help='index list of atoms to consider', metavar='IDX')
839

  
840
    # format and print
841
    parser.add_argument('--format', action='store', help='format of input files. valid format are xyz and pdb', metavar='FMT')
842
    parser.add_argument('-p', '--output', '--print', action='store_true', help='print out structure B, centered and rotated unto structure A\'s coordinates in XYZ format')
843

  
844
    if len(sys.argv) == 1:
845
        parser.print_help()
846
        sys.exit(1)
847

  
848
    args = parser.parse_args()
849

  
850
    # As default, load the extension as format
851
    if args.format is None:
852
        args.format = args.structure_a.split('.')[-1]
853

  
854
    p_all_atoms, p_all = get_coordinates(args.structure_a, args.format)
855
    q_all_atoms, q_all = get_coordinates(args.structure_b, args.format)
856

  
857
    p_size = p_all.shape[0]
858
    q_size = q_all.shape[0]
859

  
860
    if not p_size == q_size:
861
        print("error: Structures not same size")
862
        quit()
863

  
864
    if np.count_nonzero(p_all_atoms != q_all_atoms) and not args.reorder:
865
        msg = """
866
error: Atoms are not in the same order.
867

  
868
Use --reorder to align the atoms (can be expensive for large structures).
869

  
870
Please see --help or documentation for more information or
871
https://github.com/charnley/rmsd for further examples.
872
"""
873
        print(msg)
874
        exit()
875

  
876

  
877
    # Set local view
878
    p_view = None
879
    q_view = None
880

  
881

  
882
    if args.no_hydrogen:
883
        p_view = np.where(p_all_atoms != 'H')
884
        q_view = np.where(q_all_atoms != 'H')
885

  
886
    elif args.remove_idx:
887
        index = range(p_size)
888
        index = set(index) - set(args.remove_idx)
889
        index = list(index)
890
        p_view = index
891
        q_view = index
892

  
893
    elif args.add_idx:
894
        p_view = args.add_idx
895
        q_view = args.add_idx
896

  
897

  
898
    # Set local view
899
    if p_view is None:
900
        p_coord = copy.deepcopy(p_all)
901
        q_coord = copy.deepcopy(q_all)
902
        p_atoms = copy.deepcopy(p_all_atoms)
903
        q_atoms = copy.deepcopy(q_all_atoms)
904

  
905
    else:
906

  
907
        if args.reorder and args.output:
908
            print("error: Cannot reorder atoms and print structure, when excluding atoms (such as --no-hydrogen)")
909
            quit()
910

  
911
        if args.use_reflections and args.output:
912
            print("error: Cannot use reflections on atoms and print, when excluding atoms (such as --no-hydrogen)")
913
            quit()
914

  
915
        p_coord = copy.deepcopy(p_all[p_view])
916
        q_coord = copy.deepcopy(q_all[q_view])
917
        p_atoms = copy.deepcopy(p_all_atoms[p_view])
918
        q_atoms = copy.deepcopy(q_all_atoms[q_view])
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff