dockonsurf / modules / formats.py @ c6e71e46
Historique | Voir | Annoter | Télécharger (8,4 ko)
1 |
"""Module for the conversion between atomic coordinates files and objects
|
---|---|
2 |
|
3 |
functions:
|
4 |
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
|
5 |
of separate mol objects.
|
6 |
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
|
7 |
adapt_format: Converts the coordinate files into a required library object type.
|
8 |
read_coords: Reads the atomic coordinates resulting from finished calculations.
|
9 |
"""
|
10 |
|
11 |
import logging |
12 |
|
13 |
import rdkit.Chem.AllChem as Chem |
14 |
|
15 |
logger = logging.getLogger('DockOnSurf')
|
16 |
|
17 |
|
18 |
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None): |
19 |
"""Converts the conformers inside a rdkit mol object to a list of
|
20 |
separate mol objects.
|
21 |
|
22 |
@param mol: rdkit mol object containing at least one conformer.
|
23 |
@param idx_lst: list of conformer indices to be considered. If not passed,
|
24 |
all conformers are considered.
|
25 |
@return: list of separate mol objects.
|
26 |
"""
|
27 |
if idx_lst is None: |
28 |
idx_lst = list(range(mol.GetNumConformers())) |
29 |
return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)), |
30 |
removeHs=False) for idx in idx_lst] |
31 |
|
32 |
|
33 |
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol): |
34 |
"""Converts a rdkit mol object into ase Atoms object.
|
35 |
@param mol: rdkit mol object containing only one conformer.
|
36 |
@return ase.Atoms: ase Atoms object with the same coordinates.
|
37 |
"""
|
38 |
from ase import Atoms |
39 |
if mol.GetNumConformers() > 1: |
40 |
logger.warning('A mol object with multiple conformers is parsed, '
|
41 |
'converting to Atoms only the first conformer.')
|
42 |
symbols = [atm.GetSymbol() for atm in mol.GetAtoms()] |
43 |
positions = mol.GetConformer(0).GetPositions()
|
44 |
return Atoms(symbols=symbols, positions=positions)
|
45 |
|
46 |
|
47 |
def add_special_atoms(symbol_pairs): |
48 |
"""Allows to use custom elements with symbols not in the periodic table.
|
49 |
|
50 |
This function adds new chemical elements to be used by ase. Every new custom
|
51 |
element must have a traditional (present in the periodic table) partner
|
52 |
from which to obtain all its properties.
|
53 |
@param symbol_pairs: List of tuples containing the pairs of chemical symbols.
|
54 |
Every tuple contains a pair of chemical symbols, the first label must be
|
55 |
the label of the custom element and the second one the symbol of the
|
56 |
reference one (traditional present on the periodic table).
|
57 |
@return:
|
58 |
"""
|
59 |
import numpy as np |
60 |
from ase import data |
61 |
for i, pair in enumerate(symbol_pairs): |
62 |
data.chemical_symbols += [pair[0]]
|
63 |
z_orig = data.atomic_numbers[pair[1]]
|
64 |
orig_iupac_mass = data.atomic_masses_iupac2016[z_orig] |
65 |
orig_com_mass = data.atomic_masses_common[z_orig] |
66 |
data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1 |
67 |
data.atomic_names += [pair[0]]
|
68 |
data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016, |
69 |
orig_iupac_mass) |
70 |
data.atomic_masses = data.atomic_masses_iupac2016 |
71 |
data.atomic_masses_common = np.append(data.atomic_masses_common, |
72 |
orig_com_mass) |
73 |
data.covalent_radii = np.append(data.covalent_radii, |
74 |
data.covalent_radii[z_orig]) |
75 |
data.reference_states += [data.reference_states[z_orig]] |
76 |
# TODO Add vdw_radii, gsmm and aml (smaller length)
|
77 |
|
78 |
|
79 |
def adapt_format(requirement, coord_file, spec_atms=tuple()): |
80 |
"""Converts the coordinate files into a required library object type.
|
81 |
|
82 |
Depending on the library required to use and the file type, it converts the
|
83 |
coordinate file into a library-workable object.
|
84 |
@param requirement: str, the library for which the conversion should be
|
85 |
made. Accepted values: 'ase', 'rdkit'.
|
86 |
@param coord_file: str, path to the coordinates file aiming to convert.
|
87 |
Accepted file tyoes: 'xyz', 'mol'.
|
88 |
@param spec_atms: List of tuples containing pairs of new/traditional
|
89 |
chemical symbols.
|
90 |
@return: an object the required library can work with.
|
91 |
"""
|
92 |
import ase.io |
93 |
from ase.io.formats import filetype |
94 |
|
95 |
req_vals = ['rdkit', 'ase'] |
96 |
file_type_vals = ['xyz', 'mol'] |
97 |
lib_err = f"The conversion to the '{requirement}' library object type" \
|
98 |
f" has not yet been implemented"
|
99 |
conv_info = f"Converted {coord_file} to {requirement} object type"
|
100 |
|
101 |
fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
|
102 |
|
103 |
if requirement not in req_vals: |
104 |
logger.error(lib_err) |
105 |
raise NotImplementedError(lib_err) |
106 |
|
107 |
if filetype(coord_file) not in file_type_vals: |
108 |
logger.error(fil_type_err) |
109 |
raise NotImplementedError(fil_type_err) |
110 |
|
111 |
if requirement == 'rdkit': |
112 |
if filetype(coord_file) == 'xyz': |
113 |
from modules.xyz2mol import xyz2mol |
114 |
ase_atms = ase.io.read(coord_file) |
115 |
atomic_nums = ase_atms.get_atomic_numbers().tolist() |
116 |
xyz_coordinates = ase_atms.positions.tolist() |
117 |
rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
|
118 |
logger.debug(conv_info) |
119 |
return Chem.AddHs(rd_mol_obj)
|
120 |
elif filetype(coord_file) == 'mol': |
121 |
logger.debug(conv_info) |
122 |
return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False)) |
123 |
|
124 |
if requirement == 'ase': |
125 |
add_special_atoms(spec_atms) |
126 |
if filetype(coord_file) == 'xyz': |
127 |
logger.debug(conv_info) |
128 |
return ase.io.read(coord_file)
|
129 |
elif filetype(coord_file) == 'mol': |
130 |
logger.debug(conv_info) |
131 |
rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
|
132 |
return rdkit_mol_to_ase_atoms(rd_mol)
|
133 |
|
134 |
|
135 |
def read_coords(code, run_type, req, spec_atms=tuple()): |
136 |
"""Reads the atomic coordinates resulting from finished calculations.
|
137 |
|
138 |
Given a run_type ('isolated', 'screening' or 'refinement') directory
|
139 |
containing different subdirectories with finished calculations in every
|
140 |
subdirectory, it reads, from each subirectory, the final coordinates
|
141 |
resulting from the calculation and returns a list of objects adequate to the
|
142 |
required library.
|
143 |
|
144 |
@param code: the code that produced the calculation results files.
|
145 |
@param run_type: the type of calculation (and also the name of the folder)
|
146 |
containing the calculation subdirectories.
|
147 |
@param req: The required library object type to make the list of (eg. rdkit,
|
148 |
ase)
|
149 |
@param spec_atms: List of tuples containing pairs of new/traditional
|
150 |
chemical symbols.
|
151 |
@return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
|
152 |
"""
|
153 |
import os |
154 |
# Relate file-name patterns to codes
|
155 |
if code == 'cp2k': |
156 |
pattern = '-pos-1.xyz'
|
157 |
else:
|
158 |
pattern = ''
|
159 |
|
160 |
# Read appropriate files and transform them to adequate object
|
161 |
atoms_list = [] |
162 |
for conf in os.listdir(run_type): |
163 |
if not os.path.isdir(f'{run_type}/{conf}') or 'conf_' not in conf: |
164 |
continue
|
165 |
for fil in os.listdir(f"{run_type}/{conf}"): |
166 |
if pattern not in fil: |
167 |
continue
|
168 |
atoms_list.append(adapt_format(req, f'{run_type}/{conf}/{fil}',
|
169 |
spec_atms)) |
170 |
return atoms_list
|
171 |
|
172 |
|
173 |
def read_energies(code, run_type): |
174 |
"""Reads the energies resulting from finished calculations.
|
175 |
|
176 |
Given a run_type ('isolated', 'screening' or 'refinement') directory
|
177 |
containing different subdirectories with finished calculations in every
|
178 |
subdirectory, it reads the final energies of calculations inside each
|
179 |
subdirectory.
|
180 |
|
181 |
@param code: the code that produced the calculation results files.
|
182 |
@param run_type: the type of calculation (and also the name of the folder)
|
183 |
containing the calculation subdirectories.
|
184 |
@return: list of energies
|
185 |
"""
|
186 |
import os |
187 |
import numpy as np |
188 |
from modules.utilities import tail |
189 |
|
190 |
energies = [] |
191 |
if code == 'cp2k': |
192 |
pattern = '-pos-1.xyz'
|
193 |
for conf in os.listdir(run_type): |
194 |
for fil in os.listdir(f"{run_type}/{conf}"): |
195 |
if pattern in fil: |
196 |
traj_fh = open(f"{run_type}/{conf}/{fil}", 'rb') |
197 |
num_atoms = int(traj_fh.readline().strip())
|
198 |
last_geo = tail(traj_fh, num_atoms + 2).splitlines()
|
199 |
for line in last_geo: |
200 |
if 'E =' in line: |
201 |
energies.append(float(line.split('E =')[1])) |
202 |
|
203 |
return np.array(energies)
|